diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/hw/acpi/ghes.h | 16 | ||||
| -rw-r--r-- | include/hw/acpi/vmclock.h | 34 | ||||
| -rw-r--r-- | include/hw/i386/intel_iommu.h | 8 | ||||
| -rw-r--r-- | include/hw/intc/loongarch_extioi_common.h | 2 | ||||
| -rw-r--r-- | include/hw/intc/loongarch_ipi.h | 1 | ||||
| -rw-r--r-- | include/hw/intc/loongson_ipi_common.h | 5 | ||||
| -rw-r--r-- | include/hw/loader.h | 2 | ||||
| -rw-r--r-- | include/hw/misc/vmcoreinfo.h | 7 | ||||
| -rw-r--r-- | include/hw/pci/pci.h | 1 | ||||
| -rw-r--r-- | include/hw/pci/pci_device.h | 4 | ||||
| -rw-r--r-- | include/hw/qdev-core.h | 27 | ||||
| -rw-r--r-- | include/hw/s390x/css-bridge.h | 1 | ||||
| -rw-r--r-- | include/hw/s390x/s390-virtio-ccw.h | 6 | ||||
| -rw-r--r-- | include/hw/s390x/s390_flic.h | 2 | ||||
| -rw-r--r-- | include/hw/s390x/storage-keys.h | 2 | ||||
| -rw-r--r-- | include/hw/usb.h | 12 | ||||
| -rw-r--r-- | include/hw/virtio/vhost.h | 23 | ||||
| -rw-r--r-- | include/hw/virtio/virtio-gpu.h | 3 | ||||
| -rw-r--r-- | include/hw/xen/xen-bus-helper.h | 9 | ||||
| -rw-r--r-- | include/hw/xen/xen-bus.h | 1 | ||||
| -rw-r--r-- | include/migration/misc.h | 4 | ||||
| -rw-r--r-- | include/qom/object.h | 21 | ||||
| -rw-r--r-- | include/standard-headers/linux/vmclock-abi.h | 182 | ||||
| -rw-r--r-- | include/system/system.h | 1 |
24 files changed, 320 insertions, 54 deletions
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 674f6958e9..39619a2457 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -23,6 +23,7 @@ #define ACPI_GHES_H #include "hw/acpi/bios-linker-loader.h" +#include "qapi/error.h" /* * Values for Hardware Error Notification Type field @@ -59,26 +60,29 @@ enum AcpiGhesNotifyType { enum { ACPI_HEST_SRC_ID_SEA = 0, /* future ids go here */ - ACPI_HEST_SRC_ID_RESERVED, + + ACPI_GHES_ERROR_SOURCE_COUNT }; typedef struct AcpiGhesState { - uint64_t ghes_addr_le; + uint64_t hw_error_le; bool present; /* True if GHES is present at all on this board */ } AcpiGhesState; -void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker); -void acpi_build_hest(GArray *table_data, BIOSLinker *linker, +void acpi_build_hest(GArray *table_data, GArray *hardware_errors, + BIOSLinker *linker, const char *oem_id, const char *oem_table_id); void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, GArray *hardware_errors); -int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr); +int acpi_ghes_memory_errors(uint16_t source_id, uint64_t error_physical_addr); +void ghes_record_cper_errors(const void *cper, size_t len, + uint16_t source_id, Error **errp); /** * acpi_ghes_present: Report whether ACPI GHES table is present * * Returns: true if the system has an ACPI GHES table and it is - * safe to call acpi_ghes_record_errors() to record a memory error. + * safe to call acpi_ghes_memory_errors() to record a memory error. */ bool acpi_ghes_present(void); #endif diff --git a/include/hw/acpi/vmclock.h b/include/hw/acpi/vmclock.h new file mode 100644 index 0000000000..5605605812 --- /dev/null +++ b/include/hw/acpi/vmclock.h @@ -0,0 +1,34 @@ +#ifndef ACPI_VMCLOCK_H +#define ACPI_VMCLOCK_H + +#include "hw/acpi/bios-linker-loader.h" +#include "hw/qdev-core.h" +#include "qemu/uuid.h" +#include "qom/object.h" + +#define TYPE_VMCLOCK "vmclock" + +#define VMCLOCK_ADDR 0xfeffb000 +#define VMCLOCK_SIZE 0x1000 + +OBJECT_DECLARE_SIMPLE_TYPE(VmclockState, VMCLOCK) + +struct vmclock_abi; + +struct VmclockState { + DeviceState parent_obj; + MemoryRegion clk_page; + uint64_t physaddr; + struct vmclock_abi *clk; +}; + +/* returns NULL unless there is exactly one device */ +static inline Object *find_vmclock_dev(void) +{ + return object_resolve_path_type("", TYPE_VMCLOCK, NULL); +} + +void vmclock_build_acpi(VmclockState *vms, GArray *table_data, + BIOSLinker *linker, const char *oem_id); + +#endif diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index d372cd396b..e95477e855 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -45,8 +45,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, INTEL_IOMMU_DEVICE) #define DMAR_REG_SIZE 0x230 #define VTD_HOST_AW_39BIT 39 #define VTD_HOST_AW_48BIT 48 -#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_39BIT +#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_48BIT #define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1) +#define VTD_MGAW_FROM_CAP(cap) ((cap >> 16) & 0x3fULL) #define DMAR_REPORT_F_INTR (1) @@ -152,9 +153,10 @@ struct VTDIOTLBEntry { uint64_t gfn; uint16_t domain_id; uint32_t pasid; - uint64_t slpte; + uint64_t pte; uint64_t mask; uint8_t access_flags; + uint8_t pgtt; }; /* VT-d Source-ID Qualifier types */ @@ -262,6 +264,7 @@ struct IntelIOMMUState { bool caching_mode; /* RO - is cap CM enabled? */ bool scalable_mode; /* RO - is Scalable Mode supported? */ + bool flts; /* RO - is stage-1 translation supported? */ bool snoop_control; /* RO - is SNP filed supported? */ dma_addr_t root; /* Current root table pointer */ @@ -305,6 +308,7 @@ struct IntelIOMMUState { bool dma_drain; /* Whether DMA r/w draining enabled */ bool dma_translation; /* Whether DMA translation supported */ bool pasid; /* Whether to support PASID */ + bool fs1gp; /* First Stage 1-GByte Page Support */ /* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */ bool stale_tm; diff --git a/include/hw/intc/loongarch_extioi_common.h b/include/hw/intc/loongarch_extioi_common.h index f6bc778a85..22d7880977 100644 --- a/include/hw/intc/loongarch_extioi_common.h +++ b/include/hw/intc/loongarch_extioi_common.h @@ -65,6 +65,8 @@ typedef struct ExtIOICore { uint32_t coreisr[EXTIOI_IRQS_GROUP_COUNT]; DECLARE_BITMAP(sw_isr[LS3A_INTC_IP], EXTIOI_IRQS); qemu_irq parent_irq[LS3A_INTC_IP]; + uint64_t arch_id; + CPUState *cpu; } ExtIOICore; struct LoongArchExtIOICommonState { diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h index 276b3040a3..923bf21ecb 100644 --- a/include/hw/intc/loongarch_ipi.h +++ b/include/hw/intc/loongarch_ipi.h @@ -20,6 +20,7 @@ struct LoongarchIPIState { struct LoongarchIPIClass { LoongsonIPICommonClass parent_class; + DeviceRealize parent_realize; }; #endif diff --git a/include/hw/intc/loongson_ipi_common.h b/include/hw/intc/loongson_ipi_common.h index df9d9c5168..b587f9c571 100644 --- a/include/hw/intc/loongson_ipi_common.h +++ b/include/hw/intc/loongson_ipi_common.h @@ -27,6 +27,8 @@ typedef struct IPICore { /* 64bit buf divide into 2 32-bit buf */ uint32_t buf[IPI_MBX_NUM * 2]; qemu_irq irq; + uint64_t arch_id; + CPUState *cpu; } IPICore; struct LoongsonIPICommonState { @@ -44,7 +46,8 @@ struct LoongsonIPICommonClass { DeviceRealize parent_realize; DeviceUnrealize parent_unrealize; AddressSpace *(*get_iocsr_as)(CPUState *cpu); - CPUState *(*cpu_by_arch_id)(int64_t id); + int (*cpu_by_arch_id)(LoongsonIPICommonState *lics, int64_t id, + int *index, CPUState **pcs); }; MemTxResult loongson_ipi_core_readl(void *opaque, hwaddr addr, uint64_t *data, diff --git a/include/hw/loader.h b/include/hw/loader.h index 7f6d06b956..8985046be4 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -101,7 +101,7 @@ ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz, * Returns the size of the decompressed payload if decompression was performed * successfully. */ -ssize_t unpack_efi_zboot_image(uint8_t **buffer, int *size); +ssize_t unpack_efi_zboot_image(uint8_t **buffer, ssize_t *size); #define ELF_LOAD_FAILED -1 #define ELF_LOAD_NOT_ELF -2 diff --git a/include/hw/misc/vmcoreinfo.h b/include/hw/misc/vmcoreinfo.h index 0b7b55d400..1aa4477163 100644 --- a/include/hw/misc/vmcoreinfo.h +++ b/include/hw/misc/vmcoreinfo.h @@ -16,10 +16,9 @@ #include "standard-headers/linux/qemu_fw_cfg.h" #include "qom/object.h" -#define VMCOREINFO_DEVICE "vmcoreinfo" +#define TYPE_VMCOREINFO "vmcoreinfo" typedef struct VMCoreInfoState VMCoreInfoState; -DECLARE_INSTANCE_CHECKER(VMCoreInfoState, VMCOREINFO, - VMCOREINFO_DEVICE) +DECLARE_INSTANCE_CHECKER(VMCoreInfoState, VMCOREINFO, TYPE_VMCOREINFO) typedef struct fw_cfg_vmcoreinfo FWCfgVMCoreInfo; @@ -33,7 +32,7 @@ struct VMCoreInfoState { /* returns NULL unless there is exactly one device */ static inline VMCoreInfoState *vmcoreinfo_find(void) { - Object *o = object_resolve_path_type("", VMCOREINFO_DEVICE, NULL); + Object *o = object_resolve_path_type("", TYPE_VMCOREINFO, NULL); return o ? VMCOREINFO(o) : NULL; } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index cefeb388bd..4002bbeebd 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -674,6 +674,7 @@ static inline void pci_irq_deassert(PCIDevice *pci_dev) } MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); +void pci_set_enabled(PCIDevice *pci_dev, bool state); void pci_set_power(PCIDevice *pci_dev, bool state); #endif diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index 8eaf0d58bb..add208edfa 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -57,7 +57,7 @@ typedef struct PCIReqIDCache PCIReqIDCache; struct PCIDevice { DeviceState qdev; bool partially_hotplugged; - bool has_power; + bool enabled; /* PCI config space */ uint8_t *config; @@ -148,7 +148,7 @@ struct PCIDevice { uint32_t romsize; bool has_rom; MemoryRegion rom; - uint32_t rom_bar; + int32_t rom_bar; /* INTx routing notifier */ PCIINTxRoutingNotifier intx_routing_notifier; diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index e6ef80b7fd..530f3da702 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -249,10 +249,6 @@ struct DeviceState { */ int64_t pending_deleted_expires_ms; /** - * @opts: QDict of options for the device - */ - QDict *opts; - /** * @hotplugged: was device added after PHASE_MACHINE_READY? */ int hotplugged; @@ -544,7 +540,8 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id, int required_for_version); HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev); HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev); -bool qdev_hotplug_allowed(DeviceState *dev, Error **errp); +bool qdev_hotplug_allowed(DeviceState *dev, BusState *bus, Error **errp); +bool qdev_hotunplug_allowed(DeviceState *dev, Error **errp); /** * qdev_get_hotplug_handler() - Get handler responsible for device wiring @@ -1028,6 +1025,26 @@ void qdev_assert_realized_properly(void); Object *qdev_get_machine(void); /** + * qdev_create_fake_machine(): Create a fake machine container. + * + * .. note:: + * This function is a kludge for user emulation (USER_ONLY) + * because when thread (TYPE_CPU) are realized, qdev_realize() + * access a machine container. + */ +void qdev_create_fake_machine(void); + +/** + * machine_get_container: + * @name: The name of container to lookup + * + * Get a container of the machine (QOM path "/machine/NAME"). + * + * Returns: the machine container object. + */ +Object *machine_get_container(const char *name); + +/** * qdev_get_human_name() - Return a human-readable name for a device * @dev: The device. Must be a valid and non-NULL pointer. * diff --git a/include/hw/s390x/css-bridge.h b/include/hw/s390x/css-bridge.h index deb606d71f..4f874ed781 100644 --- a/include/hw/s390x/css-bridge.h +++ b/include/hw/s390x/css-bridge.h @@ -19,7 +19,6 @@ /* virtual css bridge */ struct VirtualCssBridge { SysBusDevice sysbus_dev; - bool css_dev_path; }; #define TYPE_VIRTUAL_CSS_BRIDGE "virtual-css-bridge" diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h index 599740a998..686d9497d2 100644 --- a/include/hw/s390x/s390-virtio-ccw.h +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -53,16 +53,10 @@ struct S390CcwMachineClass { MachineClass parent_class; /*< public >*/ - bool ri_allowed; - bool cpu_model_allowed; bool hpage_1m_allowed; int max_threads; }; -/* runtime-instrumentation allowed by the machine */ -bool ri_allowed(void); -/* cpu model allowed by the machine */ -bool cpu_model_allowed(void); /* 1M huge page mappings allowed by the machine */ bool hpage_1m_allowed(void); diff --git a/include/hw/s390x/s390_flic.h b/include/hw/s390x/s390_flic.h index 4d66c5e42e..85016d5ccc 100644 --- a/include/hw/s390x/s390_flic.h +++ b/include/hw/s390x/s390_flic.h @@ -41,8 +41,6 @@ OBJECT_DECLARE_TYPE(S390FLICState, S390FLICStateClass, struct S390FLICState { SysBusDevice parent_obj; - /* to limit AdapterRoutes.num_routes for compat */ - uint32_t adapter_routes_max_batch; bool ais_supported; bool migration_enabled; }; diff --git a/include/hw/s390x/storage-keys.h b/include/hw/s390x/storage-keys.h index 976ffb2039..408d2815d4 100644 --- a/include/hw/s390x/storage-keys.h +++ b/include/hw/s390x/storage-keys.h @@ -21,8 +21,6 @@ OBJECT_DECLARE_TYPE(S390SKeysState, S390SKeysClass, S390_SKEYS) struct S390SKeysState { DeviceState parent_obj; - bool migration_enabled; - }; diff --git a/include/hw/usb.h b/include/hw/usb.h index d46d96779a..e410693d0c 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -579,16 +579,6 @@ void usb_pcap_init(FILE *fp); void usb_pcap_ctrl(USBPacket *p, bool setup); void usb_pcap_data(USBPacket *p, bool setup); -static inline USBDevice *usb_new(const char *name) -{ - return USB_DEVICE(qdev_new(name)); -} - -static inline USBDevice *usb_try_new(const char *name) -{ - return USB_DEVICE(qdev_try_new(name)); -} - static inline bool usb_realize_and_unref(USBDevice *dev, USBBus *bus, Error **errp) { return qdev_realize_and_unref(&dev->qdev, &bus->qbus, errp); @@ -596,7 +586,7 @@ static inline bool usb_realize_and_unref(USBDevice *dev, USBBus *bus, Error **er static inline USBDevice *usb_create_simple(USBBus *bus, const char *name) { - USBDevice *dev = usb_new(name); + USBDevice *dev = USB_DEVICE(qdev_new(name)); usb_realize_and_unref(dev, bus, &error_abort); return dev; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 461c168c37..a9469d50bc 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -365,7 +365,14 @@ static inline int vhost_reset_device(struct vhost_dev *hdev) * Returns true if the device supports these commands, and false if it * does not. */ +#ifdef CONFIG_VHOST bool vhost_supports_device_state(struct vhost_dev *dev); +#else +static inline bool vhost_supports_device_state(struct vhost_dev *dev) +{ + return false; +} +#endif /** * vhost_set_device_state_fd(): Begin transfer of internal state from/to @@ -448,7 +455,15 @@ int vhost_check_device_state(struct vhost_dev *dev, Error **errp); * * Returns 0 on success, and -errno otherwise. */ +#ifdef CONFIG_VHOST int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); +#else +static inline int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, + Error **errp) +{ + return -ENOSYS; +} +#endif /** * vhost_load_backend_state(): High-level function to load a vhost @@ -465,6 +480,14 @@ int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); * * Returns 0 on success, and -errno otherwise. */ +#ifdef CONFIG_VHOST int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); +#else +static inline int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, + Error **errp) +{ + return -ENOSYS; +} +#endif #endif diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index bd93672185..a42957c4e2 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -98,6 +98,7 @@ enum virtio_gpu_base_conf_flags { VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED, VIRTIO_GPU_FLAG_RUTABAGA_ENABLED, VIRTIO_GPU_FLAG_VENUS_ENABLED, + VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED, }; #define virtio_gpu_virgl_enabled(_cfg) \ @@ -114,6 +115,8 @@ enum virtio_gpu_base_conf_flags { (_cfg.flags & (1 << VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED)) #define virtio_gpu_rutabaga_enabled(_cfg) \ (_cfg.flags & (1 << VIRTIO_GPU_FLAG_RUTABAGA_ENABLED)) +#define virtio_gpu_resource_uuid_enabled(_cfg) \ + (_cfg.flags & (1 << VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED)) #define virtio_gpu_hostmem_enabled(_cfg) \ (_cfg.hostmem > 0) #define virtio_gpu_venus_enabled(_cfg) \ diff --git a/include/hw/xen/xen-bus-helper.h b/include/hw/xen/xen-bus-helper.h index d8dcc2f010..e9911115b3 100644 --- a/include/hw/xen/xen-bus-helper.h +++ b/include/hw/xen/xen-bus-helper.h @@ -38,6 +38,15 @@ int xs_node_scanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *fmt, ...) G_GNUC_SCANF(6, 7); +/* + * Unlike other functions here, the printf-formatted path_fmt is for + * the XenStore path, not the contents of the node. + */ +char *xs_node_read(struct qemu_xs_handle *h, xs_transaction_t tid, + unsigned int *len, Error **errp, + const char *path_fmt, ...) + G_GNUC_PRINTF(5, 6); + /* Watch node/key unless node is empty, in which case watch key */ struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node, const char *key, xs_watch_fn fn, diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h index 38d40afa37..2adb2af839 100644 --- a/include/hw/xen/xen-bus.h +++ b/include/hw/xen/xen-bus.h @@ -91,6 +91,7 @@ void xen_device_frontend_printf(XenDevice *xendev, const char *key, int xen_device_frontend_scanf(XenDevice *xendev, const char *key, const char *fmt, ...) G_GNUC_SCANF(3, 4); +char *xen_device_frontend_read(XenDevice *xendev, const char *key); void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs, Error **errp); diff --git a/include/migration/misc.h b/include/migration/misc.h index c0e23fdac9..67f7ef7a0e 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -104,4 +104,8 @@ bool migration_incoming_postcopy_advised(void); /* True if background snapshot is active */ bool migration_in_bg_snapshot(void); +/* Wrapper for block active/inactive operations */ +bool migration_block_activate(Error **errp); +bool migration_block_inactivate(void); + #endif diff --git a/include/qom/object.h b/include/qom/object.h index f28ffea9a6..9192265db7 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -1513,6 +1513,16 @@ const char *object_property_get_type(Object *obj, const char *name, */ Object *object_get_root(void); +/** + * object_get_container: + * @name: the name of container to lookup + * + * Lookup a root level container. + * + * Returns: the container with @name. + */ +Object *object_get_container(const char *name); + /** * object_get_objects_root: @@ -2010,17 +2020,6 @@ int object_child_foreach(Object *obj, int (*fn)(Object *child, void *opaque), int object_child_foreach_recursive(Object *obj, int (*fn)(Object *child, void *opaque), void *opaque); -/** - * container_get: - * @root: root of the #path, e.g., object_get_root() - * @path: path to the container - * - * Return a container object whose path is @path. Create more containers - * along the path if necessary. - * - * Returns: the container object. - */ -Object *container_get(Object *root, const char *path); /** * object_property_add_new_container: diff --git a/include/standard-headers/linux/vmclock-abi.h b/include/standard-headers/linux/vmclock-abi.h new file mode 100644 index 0000000000..15b0316cb4 --- /dev/null +++ b/include/standard-headers/linux/vmclock-abi.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ + +/* + * This structure provides a vDSO-style clock to VM guests, exposing the + * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch + * counter, etc.) and real time. It is designed to address the problem of + * live migration, which other clock enlightenments do not. + * + * When a guest is live migrated, this affects the clock in two ways. + * + * First, even between identical hosts the actual frequency of the underlying + * counter will change within the tolerances of its specification (typically + * ±50PPM, or 4 seconds a day). This frequency also varies over time on the + * same host, but can be tracked by NTP as it generally varies slowly. With + * live migration there is a step change in the frequency, with no warning. + * + * Second, there may be a step change in the value of the counter itself, as + * its accuracy is limited by the precision of the NTP synchronization on the + * source and destination hosts. + * + * So any calibration (NTP, PTP, etc.) which the guest has done on the source + * host before migration is invalid, and needs to be redone on the new host. + * + * In its most basic mode, this structure provides only an indication to the + * guest that live migration has occurred. This allows the guest to know that + * its clock is invalid and take remedial action. For applications that need + * reliable accurate timestamps (e.g. distributed databases), the structure + * can be mapped all the way to userspace. This allows the application to see + * directly for itself that the clock is disrupted and take appropriate + * action, even when using a vDSO-style method to get the time instead of a + * system call. + * + * In its more advanced mode. this structure can also be used to expose the + * precise relationship of the CPU counter to real time, as calibrated by the + * host. This means that userspace applications can have accurate time + * immediately after live migration, rather than having to pause operations + * and wait for NTP to recover. This mode does, of course, rely on the + * counter being reliable and consistent across CPUs. + * + * Note that this must be true UTC, never with smeared leap seconds. If a + * guest wishes to construct a smeared clock, it can do so. Presenting a + * smeared clock through this interface would be problematic because it + * actually messes with the apparent counter *period*. A linear smearing + * of 1 ms per second would effectively tweak the counter period by 1000PPM + * at the start/end of the smearing period, while a sinusoidal smear would + * basically be impossible to represent. + * + * This structure is offered with the intent that it be adopted into the + * nascent virtio-rtc standard, as a virtio-rtc that does not address the live + * migration problem seems a little less than fit for purpose. For that + * reason, certain fields use precisely the same numeric definitions as in + * the virtio-rtc proposal. The structure can also be exposed through an ACPI + * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for + * the fact that it uses a real _CRS to convey the address of the structure + * (which should be a full page, to allow for mapping directly to userspace). + */ + +#ifndef __VMCLOCK_ABI_H__ +#define __VMCLOCK_ABI_H__ + +#include "standard-headers/linux/types.h" + +struct vmclock_abi { + /* CONSTANT FIELDS */ + uint32_t magic; +#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ + uint32_t size; /* Size of region containing this structure */ + uint16_t version; /* 1 */ + uint8_t counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ +#define VMCLOCK_COUNTER_ARM_VCNT 0 +#define VMCLOCK_COUNTER_X86_TSC 1 +#define VMCLOCK_COUNTER_INVALID 0xff + uint8_t time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ +#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ +#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ +#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ + + /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ + uint32_t seq_count; /* Low bit means an update is in progress */ + /* + * This field changes to another non-repeating value when the CPU + * counter is disrupted, for example on live migration. This lets + * the guest know that it should discard any calibration it has + * performed of the counter against external sources (NTP/PTP/etc.). + */ + uint64_t disruption_marker; + uint64_t flags; + /* Indicates that the tai_offset_sec field is valid */ +#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) + /* + * Optionally used to notify guests of pending maintenance events. + * A guest which provides latency-sensitive services may wish to + * remove itself from service if an event is coming up. Two flags + * indicate the approximate imminence of the event. + */ +#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ +#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ +#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) +#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) +#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) +#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) + /* + * If the MONOTONIC flag is set then (other than leap seconds) it is + * guaranteed that the time calculated according this structure at + * any given moment shall never appear to be later than the time + * calculated via the structure at any *later* moment. + * + * In particular, a timestamp based on a counter reading taken + * immediately after setting the low bit of seq_count (and the + * associated memory barrier), using the previously-valid time and + * period fields, shall never be later than a timestamp based on + * a counter reading taken immediately before *clearing* the low + * bit again after the update, using the about-to-be-valid fields. + */ +#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) + + uint8_t pad[2]; + uint8_t clock_status; +#define VMCLOCK_STATUS_UNKNOWN 0 +#define VMCLOCK_STATUS_INITIALIZING 1 +#define VMCLOCK_STATUS_SYNCHRONIZED 2 +#define VMCLOCK_STATUS_FREERUNNING 3 +#define VMCLOCK_STATUS_UNRELIABLE 4 + + /* + * The time exposed through this device is never smeared. This field + * corresponds to the 'subtype' field in virtio-rtc, which indicates + * the smearing method. However in this case it provides a *hint* to + * the guest operating system, such that *if* the guest OS wants to + * provide its users with an alternative clock which does not follow + * UTC, it may do so in a fashion consistent with the other systems + * in the nearby environment. + */ + uint8_t leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ +#define VMCLOCK_SMEARING_STRICT 0 +#define VMCLOCK_SMEARING_NOON_LINEAR 1 +#define VMCLOCK_SMEARING_UTC_SLS 2 + uint16_t tai_offset_sec; /* Actually two's complement signed */ + uint8_t leap_indicator; + /* + * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined + * in the current draft of virtio-rtc, but since smearing cannot be + * used with the shared memory device, some values are not used. + * + * The _POST_POS and _POST_NEG values allow the guest to perform + * its own smearing during the day or so after a leap second when + * such smearing may need to continue being applied for a leap + * second which is now theoretically "historical". + */ +#define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ +#define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ +#define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ +#define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ +#define VMCLOCK_LEAP_POST_POS 0x04 +#define VMCLOCK_LEAP_POST_NEG 0x05 + + /* Bit shift for counter_period_frac_sec and its error rate */ + uint8_t counter_period_shift; + /* + * Paired values of counter and UTC at a given point in time. + */ + uint64_t counter_value; + /* + * Counter period, and error margin of same. The unit of these + * fields is 1/2^(64 + counter_period_shift) of a second. + */ + uint64_t counter_period_frac_sec; + uint64_t counter_period_esterror_rate_frac_sec; + uint64_t counter_period_maxerror_rate_frac_sec; + + /* + * Time according to time_type field above. + */ + uint64_t time_sec; /* Seconds since time_type epoch */ + uint64_t time_frac_sec; /* Units of 1/2^64 of a second */ + uint64_t time_esterror_nanosec; + uint64_t time_maxerror_nanosec; +}; + +#endif /* __VMCLOCK_ABI_H__ */ diff --git a/include/system/system.h b/include/system/system.h index 5364ad4f27..0cbb43ec30 100644 --- a/include/system/system.h +++ b/include/system/system.h @@ -15,6 +15,7 @@ extern bool qemu_uuid_set; const char *qemu_get_vm_name(void); +/* Exit notifiers will run with BQL held. */ void qemu_add_exit_notifier(Notifier *notify); void qemu_remove_exit_notifier(Notifier *notify); |