diff options
Diffstat (limited to 'hw')
83 files changed, 2081 insertions, 867 deletions
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig index e07d3204eb..1d4e9f0845 100644 --- a/hw/acpi/Kconfig +++ b/hw/acpi/Kconfig @@ -60,6 +60,11 @@ config ACPI_VMGENID default y depends on PC +config ACPI_VMCLOCK + bool + default y + depends on PC + config ACPI_VIOT bool depends on ACPI diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 9d530a24da..f70a2c045e 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -327,6 +327,7 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_EJECT_METHOD "CEJ0" #define CPU_OST_METHOD "COST" #define CPU_ADDED_LIST "CNEW" +#define CPU_EJ_LIST "CEJL" #define CPU_ENABLED "CPEN" #define CPU_SELECTOR "CSEL" @@ -488,7 +489,6 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, method = aml_method(CPU_SCAN_METHOD, 0, AML_SERIALIZED); { const uint8_t max_cpus_per_pass = 255; - Aml *else_ctx; Aml *while_ctx, *while_ctx2; Aml *has_event = aml_local(0); Aml *dev_chk = aml_int(1); @@ -499,6 +499,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, Aml *uid = aml_local(3); Aml *has_job = aml_local(4); Aml *new_cpus = aml_name(CPU_ADDED_LIST); + Aml *ej_cpus = aml_name(CPU_EJ_LIST); + Aml *num_ej_cpus = aml_local(5); aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); @@ -513,6 +515,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, */ aml_append(method, aml_name_decl(CPU_ADDED_LIST, aml_package(max_cpus_per_pass))); + aml_append(method, aml_name_decl(CPU_EJ_LIST, + aml_package(max_cpus_per_pass))); aml_append(method, aml_store(zero, uid)); aml_append(method, aml_store(one, has_job)); @@ -527,6 +531,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(while_ctx2, aml_store(one, has_event)); aml_append(while_ctx2, aml_store(zero, num_added_cpus)); + aml_append(while_ctx2, aml_store(zero, num_ej_cpus)); /* * Scan CPUs, till there are CPUs with events or @@ -559,8 +564,10 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, * if CPU_ADDED_LIST is full, exit inner loop and process * collected CPUs */ - ifctx = aml_if( - aml_equal(num_added_cpus, aml_int(max_cpus_per_pass))); + ifctx = aml_if(aml_lor( + aml_equal(num_added_cpus, aml_int(max_cpus_per_pass)), + aml_equal(num_ej_cpus, aml_int(max_cpus_per_pass)) + )); { aml_append(ifctx, aml_store(one, has_job)); aml_append(ifctx, aml_break()); @@ -577,16 +584,16 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(ifctx, aml_store(one, has_event)); } aml_append(while_ctx, ifctx); - else_ctx = aml_else(); + ifctx = aml_if(aml_equal(rm_evt, one)); { - aml_append(ifctx, - aml_call2(CPU_NOTIFY_METHOD, uid, eject_req)); - aml_append(ifctx, aml_store(one, rm_evt)); + /* cache to be removed CPUs to Notify later */ + aml_append(ifctx, aml_store(uid, + aml_index(ej_cpus, num_ej_cpus))); + aml_append(ifctx, aml_increment(num_ej_cpus)); aml_append(ifctx, aml_store(one, has_event)); } - aml_append(else_ctx, ifctx); - aml_append(while_ctx, else_ctx); + aml_append(while_ctx, ifctx); aml_append(while_ctx, aml_increment(uid)); } aml_append(while_ctx2, while_ctx); @@ -620,6 +627,24 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(while_ctx, aml_increment(cpu_idx)); } aml_append(while_ctx2, while_ctx); + + /* + * Notify OSPM about to be removed CPUs and clear remove flag + */ + aml_append(while_ctx2, aml_store(zero, cpu_idx)); + while_ctx = aml_while(aml_lless(cpu_idx, num_ej_cpus)); + { + aml_append(while_ctx, + aml_store(aml_derefof(aml_index(ej_cpus, cpu_idx)), + uid)); + aml_append(while_ctx, + aml_call2(CPU_NOTIFY_METHOD, uid, eject_req)); + aml_append(while_ctx, aml_store(uid, cpu_selector)); + aml_append(while_ctx, aml_store(one, rm_evt)); + aml_append(while_ctx, aml_increment(cpu_idx)); + } + aml_append(while_ctx2, while_ctx); + /* * If another batch is needed, then it will resume scanning * exactly at -- and not after -- the last CPU that's currently diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 58540c0aaf..c85d97ca37 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -363,7 +363,7 @@ static const VMStateDescription vmstate_ghes = { .version_id = 1, .minimum_version_id = 1, .fields = (const VMStateField[]) { - VMSTATE_UINT64(ghes_addr_le, AcpiGhesState), + VMSTATE_UINT64(hw_error_le, AcpiGhesState), VMSTATE_END_OF_LIST() }, }; @@ -371,7 +371,7 @@ static const VMStateDescription vmstate_ghes = { static bool ghes_needed(void *opaque) { AcpiGedState *s = opaque; - return s->ghes_state.ghes_addr_le; + return s->ghes_state.hw_error_le; } static const VMStateDescription vmstate_ghes_state = { diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c index c315de1802..7cec1812da 100644 --- a/hw/acpi/ghes-stub.c +++ b/hw/acpi/ghes-stub.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "hw/acpi/ghes.h" -int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) +int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address) { return -1; } diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index e9511d9b8f..b709c177cd 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -28,15 +28,12 @@ #include "hw/nvram/fw_cfg.h" #include "qemu/uuid.h" -#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors" -#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr" +#define ACPI_HW_ERROR_FW_CFG_FILE "etc/hardware_errors" +#define ACPI_HW_ERROR_ADDR_FW_CFG_FILE "etc/hardware_errors_addr" /* The max size in bytes for one error block */ #define ACPI_GHES_MAX_RAW_DATA_LENGTH (1 * KiB) -/* Now only support ARMv8 SEA notification type error source */ -#define ACPI_GHES_ERROR_SOURCE_COUNT 1 - /* Generic Hardware Error Source version 2 */ #define ACPI_GHES_SOURCE_GENERIC_ERROR_V2 10 @@ -184,51 +181,24 @@ static void acpi_ghes_build_append_mem_cper(GArray *table, build_append_int_noprefix(table, 0, 7); } -static int acpi_ghes_record_mem_error(uint64_t error_block_address, - uint64_t error_physical_addr) +static void +ghes_gen_err_data_uncorrectable_recoverable(GArray *block, + const uint8_t *section_type, + int data_length) { - GArray *block; - - /* Memory Error Section Type */ - const uint8_t uefi_cper_mem_sec[] = - UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ - 0xED, 0x7C, 0x83, 0xB1); - /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data, * Table 17-13 Generic Error Data Entry */ QemuUUID fru_id = {}; - uint32_t data_length; - - block = g_array_new(false, true /* clear */, 1); - - /* This is the length if adding a new generic error data entry*/ - data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH; - /* - * It should not run out of the preallocated memory if adding a new generic - * error data entry - */ - assert((data_length + ACPI_GHES_GESB_SIZE) <= - ACPI_GHES_MAX_RAW_DATA_LENGTH); /* Build the new generic error status block header */ acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE, 0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE); /* Build this new generic error data entry header */ - acpi_ghes_generic_error_data(block, uefi_cper_mem_sec, + acpi_ghes_generic_error_data(block, section_type, ACPI_CPER_SEV_RECOVERABLE, 0, 0, ACPI_GHES_MEM_CPER_LENGTH, fru_id, 0); - - /* Build the memory section CPER for above new generic error data entry */ - acpi_ghes_build_append_mem_cper(block, error_physical_addr); - - /* Write the generic error data entry into guest memory */ - cpu_physical_memory_write(error_block_address, block->data, block->len); - - g_array_free(block, true); - - return 0; } /* @@ -236,7 +206,7 @@ static int acpi_ghes_record_mem_error(uint64_t error_block_address, * Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs. * See docs/specs/acpi_hest_ghes.rst for blobs format. */ -void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) +static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) { int i, error_status_block_offset; @@ -264,7 +234,7 @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT); /* Tell guest firmware to place hardware_errors blob into RAM */ - bios_linker_loader_alloc(linker, ACPI_GHES_ERRORS_FW_CFG_FILE, + bios_linker_loader_alloc(linker, ACPI_HW_ERROR_FW_CFG_FILE, hardware_errors, sizeof(uint64_t), false); for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) { @@ -273,23 +243,31 @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) * corresponding "Generic Error Status Block" */ bios_linker_loader_add_pointer(linker, - ACPI_GHES_ERRORS_FW_CFG_FILE, sizeof(uint64_t) * i, - sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, - error_status_block_offset + i * ACPI_GHES_MAX_RAW_DATA_LENGTH); + ACPI_HW_ERROR_FW_CFG_FILE, + sizeof(uint64_t) * i, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, + error_status_block_offset + + i * ACPI_GHES_MAX_RAW_DATA_LENGTH); } /* * tell firmware to write hardware_errors GPA into * hardware_errors_addr fw_cfg, once the former has been initialized. */ - bios_linker_loader_write_pointer(linker, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, - 0, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, 0); + bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, 0, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, 0); } /* Build Generic Hardware Error Source version 2 (GHESv2) */ -static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) +static void build_ghes_v2(GArray *table_data, + BIOSLinker *linker, + enum AcpiGhesNotifyType notify, + uint16_t source_id) { uint64_t address_offset; + /* * Type: * Generic Hardware Error Source version 2(GHESv2 - Type 10) @@ -316,21 +294,13 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0, 4 /* QWord access */, 0); bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - address_offset + GAS_ADDR_OFFSET, sizeof(uint64_t), - ACPI_GHES_ERRORS_FW_CFG_FILE, source_id * sizeof(uint64_t)); + address_offset + GAS_ADDR_OFFSET, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, + source_id * sizeof(uint64_t)); - switch (source_id) { - case ACPI_HEST_SRC_ID_SEA: - /* - * Notification Structure - * Now only enable ARMv8 SEA notification type - */ - build_ghes_hw_error_notification(table_data, ACPI_GHES_NOTIFY_SEA); - break; - default: - error_report("Not support this error source"); - abort(); - } + /* Notification Structure */ + build_ghes_hw_error_notification(table_data, notify); /* Error Status Block Length */ build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4); @@ -344,9 +314,11 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0, 4 /* QWord access */, 0); bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - address_offset + GAS_ADDR_OFFSET, - sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, - (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) * sizeof(uint64_t)); + address_offset + GAS_ADDR_OFFSET, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, + (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) + * sizeof(uint64_t)); /* * Read Ack Preserve field @@ -359,17 +331,21 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) } /* Build Hardware Error Source Table */ -void acpi_build_hest(GArray *table_data, BIOSLinker *linker, +void acpi_build_hest(GArray *table_data, GArray *hardware_errors, + BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { AcpiTable table = { .sig = "HEST", .rev = 1, .oem_id = oem_id, .oem_table_id = oem_table_id }; + build_ghes_error_table(hardware_errors, linker); + acpi_table_begin(&table, table_data); /* Error Source Count */ build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4); - build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker); + build_ghes_v2(table_data, linker, + ACPI_GHES_NOTIFY_SEA, ACPI_HEST_SRC_ID_SEA); acpi_table_end(linker, &table); } @@ -378,70 +354,132 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, GArray *hardware_error) { /* Create a read-only fw_cfg file for GHES */ - fw_cfg_add_file(s, ACPI_GHES_ERRORS_FW_CFG_FILE, hardware_error->data, + fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data, hardware_error->len); /* Create a read-write fw_cfg file for Address */ - fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL, - NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false); + fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL, + NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false); ags->present = true; } -int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) +static void get_hw_error_offsets(uint64_t ghes_addr, + uint64_t *cper_addr, + uint64_t *read_ack_register_addr) { - uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0; - uint64_t start_addr; - bool ret = -1; + if (!ghes_addr) { + return; + } + + /* + * non-HEST version supports only one source, so no need to change + * the start offset based on the source ID. Also, we can't validate + * the source ID, as it is stored inside the HEST table. + */ + + cpu_physical_memory_read(ghes_addr, cper_addr, + sizeof(*cper_addr)); + + *cper_addr = le64_to_cpu(*cper_addr); + + /* + * As the current version supports only one source, the ack offset is + * just sizeof(uint64_t). + */ + *read_ack_register_addr = ghes_addr + sizeof(uint64_t); +} + +void ghes_record_cper_errors(const void *cper, size_t len, + uint16_t source_id, Error **errp) +{ + uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register; AcpiGedState *acpi_ged_state; AcpiGhesState *ags; - assert(source_id < ACPI_HEST_SRC_ID_RESERVED); + if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) { + error_setg(errp, "GHES CPER record is too big: %zd", len); + return; + } acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, NULL)); - g_assert(acpi_ged_state); + if (!acpi_ged_state) { + error_setg(errp, "Can't find ACPI_GED object"); + return; + } ags = &acpi_ged_state->ghes_state; - start_addr = le64_to_cpu(ags->ghes_addr_le); + assert(ACPI_GHES_ERROR_SOURCE_COUNT == 1); + get_hw_error_offsets(le64_to_cpu(ags->hw_error_le), + &cper_addr, &read_ack_register_addr); + + if (!cper_addr) { + error_setg(errp, "can not find Generic Error Status Block"); + return; + } + + cpu_physical_memory_read(read_ack_register_addr, + &read_ack_register, sizeof(read_ack_register)); + + /* zero means OSPM does not acknowledge the error */ + if (!read_ack_register) { + error_setg(errp, + "OSPM does not acknowledge previous error," + " so can not record CPER for current error anymore"); + return; + } + + read_ack_register = cpu_to_le64(0); + /* + * Clear the Read Ack Register, OSPM will write 1 to this register when + * it acknowledges the error. + */ + cpu_physical_memory_write(read_ack_register_addr, + &read_ack_register, sizeof(uint64_t)); + + /* Write the generic error data entry into guest memory */ + cpu_physical_memory_write(cper_addr, cper, len); + + return; +} - if (physical_address) { +int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address) +{ + /* Memory Error Section Type */ + const uint8_t guid[] = + UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ + 0xED, 0x7C, 0x83, 0xB1); + Error *errp = NULL; + int data_length; + GArray *block; - if (source_id < ACPI_HEST_SRC_ID_RESERVED) { - start_addr += source_id * sizeof(uint64_t); - } + block = g_array_new(false, true /* clear */, 1); - cpu_physical_memory_read(start_addr, &error_block_addr, - sizeof(error_block_addr)); + data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH; + /* + * It should not run out of the preallocated memory if adding a new generic + * error data entry + */ + assert((data_length + ACPI_GHES_GESB_SIZE) <= + ACPI_GHES_MAX_RAW_DATA_LENGTH); - error_block_addr = le64_to_cpu(error_block_addr); + ghes_gen_err_data_uncorrectable_recoverable(block, guid, data_length); - read_ack_register_addr = start_addr + - ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t); + /* Build the memory section CPER for above new generic error data entry */ + acpi_ghes_build_append_mem_cper(block, physical_address); - cpu_physical_memory_read(read_ack_register_addr, - &read_ack_register, sizeof(read_ack_register)); + /* Report the error */ + ghes_record_cper_errors(block->data, block->len, source_id, &errp); - /* zero means OSPM does not acknowledge the error */ - if (!read_ack_register) { - error_report("OSPM does not acknowledge previous error," - " so can not record CPER for current error anymore"); - } else if (error_block_addr) { - read_ack_register = cpu_to_le64(0); - /* - * Clear the Read Ack Register, OSPM will write it to 1 when - * it acknowledges this error. - */ - cpu_physical_memory_write(read_ack_register_addr, - &read_ack_register, sizeof(uint64_t)); + g_array_free(block, true); - ret = acpi_ghes_record_mem_error(error_block_addr, - physical_address); - } else - error_report("can not find Generic Error Status Block"); + if (errp) { + error_report_err(errp); + return -1; } - return ret; + return 0; } bool acpi_ghes_present(void) diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index c8854f4d48..73f02b9691 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -15,6 +15,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_false: files('acpi-nvdimm-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c')) acpi_ss.add(when: 'CONFIG_ACPI_CXL', if_true: files('cxl.c'), if_false: files('cxl-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c')) +acpi_ss.add(when: 'CONFIG_ACPI_VMCLOCK', if_true: files('vmclock.c')) acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c')) acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c')) acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'), if_false: files('ghes-stub.c')) diff --git a/hw/acpi/vmclock.c b/hw/acpi/vmclock.c new file mode 100644 index 0000000000..7387e5c9ca --- /dev/null +++ b/hw/acpi/vmclock.c @@ -0,0 +1,179 @@ +/* + * Virtual Machine Clock Device + * + * Copyright © 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "hw/i386/e820_memory_layout.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/vmclock.h" +#include "hw/nvram/fw_cfg.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "migration/vmstate.h" +#include "system/reset.h" + +#include "standard-headers/linux/vmclock-abi.h" + +void vmclock_build_acpi(VmclockState *vms, GArray *table_data, + BIOSLinker *linker, const char *oem_id) +{ + Aml *ssdt, *dev, *scope, *crs; + AcpiTable table = { .sig = "SSDT", .rev = 1, + .oem_id = oem_id, .oem_table_id = "VMCLOCK" }; + + /* Put VMCLOCK into a separate SSDT table */ + acpi_table_begin(&table, table_data); + ssdt = init_aml_allocator(); + + scope = aml_scope("\\_SB"); + dev = aml_device("VCLK"); + aml_append(dev, aml_name_decl("_HID", aml_string("AMZNC10C"))); + aml_append(dev, aml_name_decl("_CID", aml_string("VMCLOCK"))); + aml_append(dev, aml_name_decl("_DDN", aml_string("VMCLOCK"))); + + /* Simple status method */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xf))); + + crs = aml_resource_template(); + aml_append(crs, aml_qword_memory(AML_POS_DECODE, + AML_MIN_FIXED, AML_MAX_FIXED, + AML_CACHEABLE, AML_READ_ONLY, + 0xffffffffffffffffULL, + vms->physaddr, + vms->physaddr + VMCLOCK_SIZE - 1, + 0, VMCLOCK_SIZE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + aml_append(ssdt, scope); + + g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len); + acpi_table_end(linker, &table); + free_aml_allocator(); +} + +static void vmclock_update_guest(VmclockState *vms) +{ + uint64_t disruption_marker; + uint32_t seq_count; + + if (!vms->clk) { + return; + } + + seq_count = le32_to_cpu(vms->clk->seq_count) | 1; + vms->clk->seq_count = cpu_to_le32(seq_count); + /* These barriers pair with read barriers in the guest */ + smp_wmb(); + + disruption_marker = le64_to_cpu(vms->clk->disruption_marker); + disruption_marker++; + vms->clk->disruption_marker = cpu_to_le64(disruption_marker); + + /* These barriers pair with read barriers in the guest */ + smp_wmb(); + vms->clk->seq_count = cpu_to_le32(seq_count + 1); +} + +/* + * After restoring an image, we need to update the guest memory to notify + * it of clock disruption. + */ +static int vmclock_post_load(void *opaque, int version_id) +{ + VmclockState *vms = opaque; + + vmclock_update_guest(vms); + return 0; +} + +static const VMStateDescription vmstate_vmclock = { + .name = "vmclock", + .version_id = 1, + .minimum_version_id = 1, + .post_load = vmclock_post_load, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(physaddr, VmclockState), + VMSTATE_END_OF_LIST() + }, +}; + +static void vmclock_handle_reset(void *opaque) +{ + VmclockState *vms = VMCLOCK(opaque); + + if (!memory_region_is_mapped(&vms->clk_page)) { + memory_region_add_subregion_overlap(get_system_memory(), + vms->physaddr, + &vms->clk_page, 0); + } +} + +static void vmclock_realize(DeviceState *dev, Error **errp) +{ + VmclockState *vms = VMCLOCK(dev); + + /* + * Given that this function is executing, there is at least one VMCLOCK + * device. Check if there are several. + */ + if (!find_vmclock_dev()) { + error_setg(errp, "at most one %s device is permitted", TYPE_VMCLOCK); + return; + } + + vms->physaddr = VMCLOCK_ADDR; + + e820_add_entry(vms->physaddr, VMCLOCK_SIZE, E820_RESERVED); + + memory_region_init_ram(&vms->clk_page, OBJECT(dev), "vmclock_page", + VMCLOCK_SIZE, &error_abort); + memory_region_set_enabled(&vms->clk_page, true); + vms->clk = memory_region_get_ram_ptr(&vms->clk_page); + memset(vms->clk, 0, VMCLOCK_SIZE); + + vms->clk->magic = cpu_to_le32(VMCLOCK_MAGIC); + vms->clk->size = cpu_to_le16(VMCLOCK_SIZE); + vms->clk->version = cpu_to_le16(1); + + /* These are all zero and thus default, but be explicit */ + vms->clk->clock_status = VMCLOCK_STATUS_UNKNOWN; + vms->clk->counter_id = VMCLOCK_COUNTER_INVALID; + + qemu_register_reset(vmclock_handle_reset, vms); + + vmclock_update_guest(vms); +} + +static void vmclock_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_vmclock; + dc->realize = vmclock_realize; + dc->hotpluggable = false; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +} + +static const TypeInfo vmclock_device_info = { + .name = TYPE_VMCLOCK, + .parent = TYPE_DEVICE, + .instance_size = sizeof(VmclockState), + .class_init = vmclock_device_class_init, +}; + +static void vmclock_register_types(void) +{ + type_register_static(&vmclock_device_info); +} + +type_init(vmclock_register_types) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 68fe8654e6..b44bea8a82 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -857,7 +857,7 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR; uint64_t kernel_size = 0; uint8_t *buffer; - int size; + ssize_t size; /* On aarch64, it's the bootloader's job to uncompress the kernel. */ size = load_image_gzipped_buffer(filename, LOAD_IMAGE_MAX_GUNZIP_BYTES, diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c index a712ff954b..48a32c2407 100644 --- a/hw/arm/musicpal.c +++ b/hw/arm/musicpal.c @@ -1238,7 +1238,7 @@ static void musicpal_init(MachineState *machine) qdev_get_gpio_in(pic, MP_TIMER4_IRQ), NULL); /* Logically OR both UART IRQs together */ - uart_orgate = DEVICE(object_new(TYPE_OR_IRQ)); + uart_orgate = qdev_new(TYPE_OR_IRQ); object_property_set_int(OBJECT(uart_orgate), "num-lines", 2, &error_fatal); qdev_realize_and_unref(uart_orgate, NULL, &error_fatal); qdev_connect_gpio_out(uart_orgate, 0, diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index c9b13057a3..3ac8f8e178 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -946,10 +946,9 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_dbg2(tables_blob, tables->linker, vms); if (vms->ras) { - build_ghes_error_table(tables->hardware_errors, tables->linker); acpi_add_table(table_offsets, tables_blob); - acpi_build_hest(tables_blob, tables->linker, vms->oem_id, - vms->oem_table_id); + acpi_build_hest(tables_blob, tables->hardware_errors, tables->linker, + vms->oem_id, vms->oem_table_id); } if (ms->numa_state->num_nodes > 0) { diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 306d38927c..034a18b70e 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -239,7 +239,8 @@ static void xen_block_connect(XenDevice *xendev, Error **errp) return; } - if (xen_device_frontend_scanf(xendev, "protocol", "%ms", &str) != 1) { + str = xen_device_frontend_read(xendev, "protocol"); + if (!str) { /* x86 defaults to the 32-bit protocol even for 64-bit guests. */ if (object_dynamic_cast(OBJECT(qdev_get_machine()), "x86-machine")) { protocol = BLKIF_PROTOCOL_X86_32; diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c index 12705a1337..7c353fde50 100644 --- a/hw/char/imx_serial.c +++ b/hw/char/imx_serial.c @@ -27,6 +27,7 @@ #include "qemu/log.h" #include "qemu/module.h" #include "qemu/fifo32.h" +#include "trace.h" #ifndef DEBUG_IMX_UART #define DEBUG_IMX_UART 0 @@ -184,10 +185,10 @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset, unsigned size) { IMXSerialState *s = (IMXSerialState *)opaque; + Chardev *chr = qemu_chr_fe_get_driver(&s->chr); uint32_t c, rx_used; uint8_t rxtl = s->ufcr & TL_MASK; - - DPRINTF("read(offset=0x%" HWADDR_PRIx ")\n", offset); + uint64_t value; switch (offset >> 2) { case 0x0: /* URXD */ @@ -208,49 +209,67 @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset, imx_serial_rx_fifo_ageing_timer_restart(s); qemu_chr_fe_accept_input(&s->chr); } - return c; + value = c; + break; case 0x20: /* UCR1 */ - return s->ucr1; + value = s->ucr1; + break; case 0x21: /* UCR2 */ - return s->ucr2; + value = s->ucr2; + break; case 0x25: /* USR1 */ - return s->usr1; + value = s->usr1; + break; case 0x26: /* USR2 */ - return s->usr2; + value = s->usr2; + break; case 0x2A: /* BRM Modulator */ - return s->ubmr; + value = s->ubmr; + break; case 0x2B: /* Baud Rate Count */ - return s->ubrc; + value = s->ubrc; + break; case 0x2d: /* Test register */ - return s->uts1; + value = s->uts1; + break; case 0x24: /* UFCR */ - return s->ufcr; + value = s->ufcr; + break; case 0x2c: - return s->onems; + value = s->onems; + break; case 0x22: /* UCR3 */ - return s->ucr3; + value = s->ucr3; + break; case 0x23: /* UCR4 */ - return s->ucr4; + value = s->ucr4; + break; case 0x29: /* BRM Incremental */ - return 0x0; /* TODO */ + value = 0x0; /* TODO */ + break; default: qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%" HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset); - return 0; + value = 0; + break; } + + trace_imx_serial_read(chr ? chr->label : "NODEV", offset, value); + + return value; } static void imx_serial_write(void *opaque, hwaddr offset, @@ -260,8 +279,7 @@ static void imx_serial_write(void *opaque, hwaddr offset, Chardev *chr = qemu_chr_fe_get_driver(&s->chr); unsigned char ch; - DPRINTF("write(offset=0x%" HWADDR_PRIx ", value = 0x%x) to %s\n", - offset, (unsigned int)value, chr ? chr->label : "NODEV"); + trace_imx_serial_write(chr ? chr->label : "NODEV", offset, value); switch (offset >> 2) { case 0x10: /* UTXD */ @@ -373,9 +391,11 @@ static int imx_can_receive(void *opaque) static void imx_put_data(void *opaque, uint32_t value) { IMXSerialState *s = (IMXSerialState *)opaque; + Chardev *chr = qemu_chr_fe_get_driver(&s->chr); uint8_t rxtl = s->ufcr & TL_MASK; - DPRINTF("received char\n"); + trace_imx_serial_put_data(chr ? chr->label : "NODEV", value); + imx_serial_rx_fifo_push(s, value); if (fifo32_num_used(&s->rx_fifo) >= rxtl) { s->usr1 |= USR1_RRDY; diff --git a/hw/char/stm32f2xx_usart.c b/hw/char/stm32f2xx_usart.c index ebcc510f4e..87882daa71 100644 --- a/hw/char/stm32f2xx_usart.c +++ b/hw/char/stm32f2xx_usart.c @@ -30,17 +30,7 @@ #include "qemu/log.h" #include "qemu/module.h" -#ifndef STM_USART_ERR_DEBUG -#define STM_USART_ERR_DEBUG 0 -#endif - -#define DB_PRINT_L(lvl, fmt, args...) do { \ - if (STM_USART_ERR_DEBUG >= lvl) { \ - qemu_log("%s: " fmt, __func__, ## args); \ - } \ -} while (0) - -#define DB_PRINT(fmt, args...) DB_PRINT_L(1, fmt, ## args) +#include "trace.h" static int stm32f2xx_usart_can_receive(void *opaque) { @@ -67,10 +57,11 @@ static void stm32f2xx_update_irq(STM32F2XXUsartState *s) static void stm32f2xx_usart_receive(void *opaque, const uint8_t *buf, int size) { STM32F2XXUsartState *s = opaque; + DeviceState *d = DEVICE(s); if (!(s->usart_cr1 & USART_CR1_UE && s->usart_cr1 & USART_CR1_RE)) { /* USART not enabled - drop the chars */ - DB_PRINT("Dropping the chars\n"); + trace_stm32f2xx_usart_drop(d->id); return; } @@ -79,7 +70,7 @@ static void stm32f2xx_usart_receive(void *opaque, const uint8_t *buf, int size) stm32f2xx_update_irq(s); - DB_PRINT("Receiving: %c\n", s->usart_dr); + trace_stm32f2xx_usart_receive(d->id, *buf); } static void stm32f2xx_usart_reset(DeviceState *dev) @@ -101,49 +92,55 @@ static uint64_t stm32f2xx_usart_read(void *opaque, hwaddr addr, unsigned int size) { STM32F2XXUsartState *s = opaque; - uint64_t retvalue; - - DB_PRINT("Read 0x%"HWADDR_PRIx"\n", addr); + DeviceState *d = DEVICE(s); + uint64_t retvalue = 0; switch (addr) { case USART_SR: retvalue = s->usart_sr; qemu_chr_fe_accept_input(&s->chr); - return retvalue; + break; case USART_DR: - DB_PRINT("Value: 0x%" PRIx32 ", %c\n", s->usart_dr, (char) s->usart_dr); retvalue = s->usart_dr & 0x3FF; s->usart_sr &= ~USART_SR_RXNE; qemu_chr_fe_accept_input(&s->chr); stm32f2xx_update_irq(s); - return retvalue; + break; case USART_BRR: - return s->usart_brr; + retvalue = s->usart_brr; + break; case USART_CR1: - return s->usart_cr1; + retvalue = s->usart_cr1; + break; case USART_CR2: - return s->usart_cr2; + retvalue = s->usart_cr2; + break; case USART_CR3: - return s->usart_cr3; + retvalue = s->usart_cr3; + break; case USART_GTPR: - return s->usart_gtpr; + retvalue = s->usart_gtpr; + break; default: qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr); return 0; } - return 0; + trace_stm32f2xx_usart_read(d->id, size, addr, retvalue); + + return retvalue; } static void stm32f2xx_usart_write(void *opaque, hwaddr addr, uint64_t val64, unsigned int size) { STM32F2XXUsartState *s = opaque; + DeviceState *d = DEVICE(s); uint32_t value = val64; unsigned char ch; - DB_PRINT("Write 0x%" PRIx32 ", 0x%"HWADDR_PRIx"\n", value, addr); + trace_stm32f2xx_usart_write(d->id, size, addr, val64); switch (addr) { case USART_SR: diff --git a/hw/char/trace-events b/hw/char/trace-events index 59e1f734a7..3ee7cfcdff 100644 --- a/hw/char/trace-events +++ b/hw/char/trace-events @@ -52,6 +52,11 @@ escc_sunkbd_event_out(int ch) "Translated keycode 0x%2.2x" escc_kbd_command(int val) "Command %d" escc_sunmouse_event(int dx, int dy, int buttons_state) "dx=%d dy=%d buttons=0x%01x" +# imx_serial.c +imx_serial_read(const char *chrname, uint64_t addr, uint64_t value) "%s:[0x%03" PRIu64 "] -> 0x%08" PRIx64 +imx_serial_write(const char *chrname, uint64_t addr, uint64_t value) "%s:[0x%03" PRIu64 "] <- 0x%08" PRIx64 +imx_serial_put_data(const char *chrname, uint32_t value) "%s: 0x%" PRIx32 + # pl011.c pl011_irq_state(int level) "irq state %d" pl011_read(uint32_t addr, uint32_t value, const char *regname) "addr 0x%03x value 0x%08x reg %s" @@ -125,3 +130,9 @@ xen_console_unrealize(unsigned int idx) "idx %u" xen_console_realize(unsigned int idx, const char *chrdev) "idx %u chrdev %s" xen_console_device_create(unsigned int idx) "idx %u" xen_console_device_destroy(unsigned int idx) "idx %u" + +# stm32f2xx_usart.c +stm32f2xx_usart_read(char *id, unsigned size, uint64_t ofs, uint64_t val) " %s size %d ofs 0x%02" PRIx64 " -> 0x%02" PRIx64 +stm32f2xx_usart_write(char *id, unsigned size, uint64_t ofs, uint64_t val) "%s size %d ofs 0x%02" PRIx64 " <- 0x%02" PRIx64 +stm32f2xx_usart_drop(char *id) " %s dropping the chars" +stm32f2xx_usart_receive(char *id, uint8_t chr) " %s receiving '%c'" diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c index ef0c2912ef..d03c188d1d 100644 --- a/hw/char/xen_console.c +++ b/hw/char/xen_console.c @@ -367,28 +367,28 @@ static char *xen_console_get_name(XenDevice *xendev, Error **errp) if (con->dev == -1) { XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); - char fe_path[XENSTORE_ABS_PATH_MAX + 1]; int idx = (xen_mode == XEN_EMULATE) ? 0 : 1; + Error *local_err = NULL; char *value; /* Theoretically we could go up to INT_MAX here but that's overkill */ while (idx < 100) { if (!idx) { - snprintf(fe_path, sizeof(fe_path), - "/local/domain/%u/console", xendev->frontend_id); + value = xs_node_read(xenbus->xsh, XBT_NULL, NULL, &local_err, + "/local/domain/%u/console", + xendev->frontend_id); } else { - snprintf(fe_path, sizeof(fe_path), - "/local/domain/%u/device/console/%u", - xendev->frontend_id, idx); + value = xs_node_read(xenbus->xsh, XBT_NULL, NULL, &local_err, + "/local/domain/%u/device/console/%u", + xendev->frontend_id, idx); } - value = qemu_xen_xs_read(xenbus->xsh, XBT_NULL, fe_path, NULL); if (!value) { if (errno == ENOENT) { con->dev = idx; + error_free(local_err); goto found; } - error_setg(errp, "cannot read %s: %s", fe_path, - strerror(errno)); + error_propagate(errp, local_err); return NULL; } free(value); @@ -550,7 +550,8 @@ static void xen_console_device_create(XenBackendInstance *backend, goto fail; } - if (xs_node_scanf(xsh, XBT_NULL, fe, "type", errp, "%ms", &type) != 1) { + type = xs_node_read(xsh, XBT_NULL, NULL, errp, "%s/%s", fe, "type"); + if (!type) { error_prepend(errp, "failed to read console device type: "); goto fail; } @@ -568,7 +569,8 @@ static void xen_console_device_create(XenBackendInstance *backend, snprintf(label, sizeof(label), "xencons%ld", number); - if (xs_node_scanf(xsh, XBT_NULL, fe, "output", NULL, "%ms", &output) == 1) { + output = xs_node_read(xsh, XBT_NULL, NULL, errp, "%s/%s", fe, "output"); + if (output) { /* * FIXME: sure we want to support implicit * muxed monitors here? @@ -579,19 +581,27 @@ static void xen_console_device_create(XenBackendInstance *backend, output); goto fail; } - } else if (number) { - cd = serial_hd(number); - if (!cd) { - error_prepend(errp, "console: No serial device #%ld found: ", - number); - goto fail; - } + } else if (errno != ENOENT) { + error_prepend(errp, "console: No valid chardev found: "); + goto fail; } else { - /* No 'output' node on primary console: use null. */ - cd = qemu_chr_new(label, "null", NULL); - if (!cd) { - error_setg(errp, "console: failed to create null device"); - goto fail; + error_free(*errp); + *errp = NULL; + + if (number) { + cd = serial_hd(number); + if (!cd) { + error_setg(errp, "console: No serial device #%ld found", + number); + goto fail; + } + } else { + /* No 'output' node on primary console: use null. */ + cd = qemu_chr_new(label, "null", NULL); + if (!cd) { + error_setg(errp, "console: failed to create null device"); + goto fail; + } } } diff --git a/hw/core/gpio.c b/hw/core/gpio.c index 80d07a6ec9..6e32a8eec6 100644 --- a/hw/core/gpio.c +++ b/hw/core/gpio.c @@ -121,8 +121,7 @@ void qdev_connect_gpio_out_named(DeviceState *dev, const char *name, int n, name ? name : "unnamed-gpio-out", n); if (input_pin && !OBJECT(input_pin)->parent) { /* We need a name for object_property_set_link to work */ - object_property_add_child(container_get(qdev_get_machine(), - "/unattached"), + object_property_add_child(machine_get_container("unattached"), "non-qdev-gpio[*]", OBJECT(input_pin)); } object_property_set_link(OBJECT(dev), propname, diff --git a/hw/core/loader.c b/hw/core/loader.c index c0407e2d0d..4dfdb027ee 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -886,11 +886,11 @@ struct linux_efi_zboot_header { * * If the image is not a Linux EFI zboot image, do nothing and return success. */ -ssize_t unpack_efi_zboot_image(uint8_t **buffer, int *size) +ssize_t unpack_efi_zboot_image(uint8_t **buffer, ssize_t *size) { const struct linux_efi_zboot_header *header; uint8_t *data = NULL; - int ploff, plsize; + ssize_t ploff, plsize; ssize_t bytes; /* ignore if this is too small to be a EFI zboot image */ diff --git a/hw/core/machine.c b/hw/core/machine.c index c949af9766..c23b399496 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -36,7 +36,9 @@ #include "hw/virtio/virtio-iommu.h" #include "audio/audio.h" -GlobalProperty hw_compat_9_2[] = {}; +GlobalProperty hw_compat_9_2[] = { + {"arm-cpu", "backcompat-pauth-default-use-qarma5", "true"}, +}; const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2); GlobalProperty hw_compat_9_1[] = { diff --git a/hw/core/meson.build b/hw/core/meson.build index ce9dfa3f4b..65a1698ed1 100644 --- a/hw/core/meson.build +++ b/hw/core/meson.build @@ -46,3 +46,4 @@ system_ss.add(files( 'vm-change-state-handler.c', 'clock-vmstate.c', )) +user_ss.add(files('qdev-user.c')) diff --git a/hw/core/qdev-hotplug.c b/hw/core/qdev-hotplug.c index d495d0e9c7..ff176dc1bb 100644 --- a/hw/core/qdev-hotplug.c +++ b/hw/core/qdev-hotplug.c @@ -12,6 +12,7 @@ #include "qemu/osdep.h" #include "hw/qdev-core.h" #include "hw/boards.h" +#include "qapi/error.h" HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev) { @@ -30,12 +31,48 @@ HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev) return NULL; } -bool qdev_hotplug_allowed(DeviceState *dev, Error **errp) +static bool qdev_hotplug_unplug_allowed_common(DeviceState *dev, BusState *bus, + Error **errp) +{ + DeviceClass *dc = DEVICE_GET_CLASS(dev); + + if (!dc->hotpluggable) { + error_setg(errp, "Device '%s' does not support hotplugging", + object_get_typename(OBJECT(dev))); + return false; + } + + if (bus) { + if (!qbus_is_hotpluggable(bus)) { + error_setg(errp, "Bus '%s' does not support hotplugging", + bus->name); + return false; + } + } else { + if (!qdev_get_machine_hotplug_handler(dev)) { + /* + * No bus, no machine hotplug handler --> device is not hotpluggable + */ + error_setg(errp, + "Device '%s' can not be hotplugged on this machine", + object_get_typename(OBJECT(dev))); + return false; + } + } + + return true; +} + +bool qdev_hotplug_allowed(DeviceState *dev, BusState *bus, Error **errp) { MachineState *machine; MachineClass *mc; Object *m_obj = qdev_get_machine(); + if (!qdev_hotplug_unplug_allowed_common(dev, bus, errp)) { + return false; + } + if (object_dynamic_cast(m_obj, TYPE_MACHINE)) { machine = MACHINE(m_obj); mc = MACHINE_GET_CLASS(machine); @@ -47,6 +84,12 @@ bool qdev_hotplug_allowed(DeviceState *dev, Error **errp) return true; } +bool qdev_hotunplug_allowed(DeviceState *dev, Error **errp) +{ + return !qdev_unplug_blocked(dev, errp) && + qdev_hotplug_unplug_allowed_common(dev, dev->parent_bus, errp); +} + HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev) { if (dev->parent_bus) { diff --git a/hw/core/qdev-user.c b/hw/core/qdev-user.c new file mode 100644 index 0000000000..3d421d8f4e --- /dev/null +++ b/hw/core/qdev-user.c @@ -0,0 +1,19 @@ +/* + * QDev helpers specific to user emulation. + * + * Copyright 2025 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "qom/object.h" +#include "hw/qdev-core.h" + +void qdev_create_fake_machine(void) +{ + Object *fake_machine_obj; + + fake_machine_obj = object_property_add_new_container(object_get_root(), + "machine"); + object_property_add_new_container(fake_machine_obj, "unattached"); +} diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 57c1d9df3a..82bbdcb654 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -476,8 +476,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp) if (!obj->parent) { gchar *name = g_strdup_printf("device[%d]", unattached_count++); - object_property_add_child(container_get(qdev_get_machine(), - "/unattached"), + object_property_add_child(machine_get_container("unattached"), name, obj); unattached_parent = true; g_free(name); @@ -691,7 +690,6 @@ static void device_finalize(Object *obj) dev->canonical_path = NULL; } - qobject_unref(dev->opts); g_free(dev->id); } @@ -818,12 +816,28 @@ Object *qdev_get_machine(void) static Object *dev; if (dev == NULL) { - dev = container_get(object_get_root(), "/machine"); + dev = object_resolve_path_component(object_get_root(), "machine"); + /* + * Any call to this function before machine is created is treated + * as a programming error as of now. + */ + assert(dev); } return dev; } +Object *machine_get_container(const char *name) +{ + Object *container, *machine; + + machine = qdev_get_machine(); + container = object_resolve_path_component(machine, name); + assert(object_dynamic_cast(container, TYPE_CONTAINER)); + + return container; +} + char *qdev_get_human_name(DeviceState *dev) { g_assert(dev != NULL); diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c index e64d99c8ed..9355849ff0 100644 --- a/hw/core/sysbus.c +++ b/hw/core/sysbus.c @@ -65,9 +65,9 @@ void foreach_dynamic_sysbus_device(FindSysbusDeviceFunc *func, void *opaque) }; /* Loop through all sysbus devices that were spawned outside the machine */ - container = container_get(qdev_get_machine(), "/peripheral"); + container = machine_get_container("peripheral"); find_sysbus_device(container, &find); - container = container_get(qdev_get_machine(), "/peripheral-anon"); + container = machine_get_container("peripheral-anon"); find_sysbus_device(container, &find); } diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 12d5c37ee5..2aed6243f6 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -631,6 +631,14 @@ vhost_user_gpu_device_realize(DeviceState *qdev, Error **errp) error_report("EDID requested but the backend doesn't support it."); g->parent_obj.conf.flags &= ~(1 << VIRTIO_GPU_FLAG_EDID_ENABLED); } + if (virtio_has_feature(g->vhost->dev.features, + VIRTIO_GPU_F_RESOURCE_UUID)) { + g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED; + } + if (virtio_has_feature(g->vhost->dev.features, + VIRTIO_GPU_F_RESOURCE_UUID)) { + g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED; + } if (!virtio_gpu_base_device_realize(qdev, NULL, NULL, errp)) { return; diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 4fc7ef8896..7827536ac4 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -235,6 +235,9 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t features, if (virtio_gpu_context_init_enabled(g->conf)) { features |= (1 << VIRTIO_GPU_F_CONTEXT_INIT); } + if (virtio_gpu_resource_uuid_enabled(g->conf)) { + features |= (1 << VIRTIO_GPU_F_RESOURCE_UUID); + } return features; } diff --git a/hw/gpio/imx_gpio.c b/hw/gpio/imx_gpio.c index 898f80f8c8..549a281ed7 100644 --- a/hw/gpio/imx_gpio.c +++ b/hw/gpio/imx_gpio.c @@ -24,6 +24,7 @@ #include "migration/vmstate.h" #include "qemu/log.h" #include "qemu/module.h" +#include "trace.h" #ifndef DEBUG_IMX_GPIO #define DEBUG_IMX_GPIO 0 @@ -34,14 +35,6 @@ typedef enum IMXGPIOLevel { IMX_GPIO_LEVEL_HIGH = 1, } IMXGPIOLevel; -#define DPRINTF(fmt, args...) \ - do { \ - if (DEBUG_IMX_GPIO) { \ - fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPIO, \ - __func__, ##args); \ - } \ - } while (0) - static const char *imx_gpio_reg_name(uint32_t reg) { switch (reg) { @@ -111,6 +104,8 @@ static void imx_gpio_set(void *opaque, int line, int level) IMXGPIOState *s = IMX_GPIO(opaque); IMXGPIOLevel imx_level = level ? IMX_GPIO_LEVEL_HIGH : IMX_GPIO_LEVEL_LOW; + trace_imx_gpio_set(DEVICE(s)->canonical_path, line, imx_level); + imx_gpio_set_int_line(s, line, imx_level); /* this is an input signal, so set PSR */ @@ -200,7 +195,8 @@ static uint64_t imx_gpio_read(void *opaque, hwaddr offset, unsigned size) break; } - DPRINTF("(%s) = 0x%" PRIx32 "\n", imx_gpio_reg_name(offset), reg_value); + trace_imx_gpio_read(DEVICE(s)->canonical_path, imx_gpio_reg_name(offset), + reg_value); return reg_value; } @@ -210,8 +206,8 @@ static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value, { IMXGPIOState *s = IMX_GPIO(opaque); - DPRINTF("(%s, value = 0x%" PRIx32 ")\n", imx_gpio_reg_name(offset), - (uint32_t)value); + trace_imx_gpio_write(DEVICE(s)->canonical_path, imx_gpio_reg_name(offset), + value); switch (offset) { case DR_ADDR: diff --git a/hw/gpio/trace-events b/hw/gpio/trace-events index b91cc7e9a4..cea896b28f 100644 --- a/hw/gpio/trace-events +++ b/hw/gpio/trace-events @@ -1,5 +1,10 @@ # See docs/devel/tracing.rst for syntax documentation. +# imx_gpio.c +imx_gpio_read(const char *id, const char *reg, uint32_t value) "%s:[%s] -> 0x%" PRIx32 +imx_gpio_write(const char *id, const char *reg, uint32_t value) "%s:[%s] <- 0x%" PRIx32 +imx_gpio_set(const char *id, int line, int level) "%s:[%d] <- %d" + # npcm7xx_gpio.c npcm7xx_gpio_read(const char *id, uint64_t offset, uint64_t value) " %s offset: 0x%04" PRIx64 " value 0x%08" PRIx64 npcm7xx_gpio_write(const char *id, uint64_t offset, uint64_t value) "%s offset: 0x%04" PRIx64 " value 0x%08" PRIx64 diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index 65259308e2..8230f43e41 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -655,12 +655,12 @@ static void hppa_machine_reset(MachineState *ms, ResetType type) for (i = 0; i < smp_cpus; i++) { CPUState *cs = CPU(cpu[i]); + /* reset CPU */ + resettable_reset(OBJECT(cs), RESET_TYPE_COLD); + cpu_set_pc(cs, firmware_entry); cpu[i]->env.psw = PSW_Q; cpu[i]->env.gr[5] = CPU_HPA + i * 0x1000; - - cs->exception_index = -1; - cs->halted = 0; } /* already initialized by machine_hppa_init()? */ diff --git a/hw/i2c/imx_i2c.c b/hw/i2c/imx_i2c.c index c565fd5b8a..d62213b9e0 100644 --- a/hw/i2c/imx_i2c.c +++ b/hw/i2c/imx_i2c.c @@ -25,18 +25,7 @@ #include "hw/i2c/i2c.h" #include "qemu/log.h" #include "qemu/module.h" - -#ifndef DEBUG_IMX_I2C -#define DEBUG_IMX_I2C 0 -#endif - -#define DPRINTF(fmt, args...) \ - do { \ - if (DEBUG_IMX_I2C) { \ - fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_I2C, \ - __func__, ##args); \ - } \ - } while (0) +#include "trace.h" static const char *imx_i2c_get_regname(unsigned offset) { @@ -152,8 +141,8 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset, break; } - DPRINTF("read %s [0x%" HWADDR_PRIx "] -> 0x%02x\n", - imx_i2c_get_regname(offset), offset, value); + trace_imx_i2c_read(DEVICE(s)->canonical_path, imx_i2c_get_regname(offset), + offset, value); return (uint64_t)value; } @@ -163,8 +152,8 @@ static void imx_i2c_write(void *opaque, hwaddr offset, { IMXI2CState *s = IMX_I2C(opaque); - DPRINTF("write %s [0x%" HWADDR_PRIx "] <- 0x%02x\n", - imx_i2c_get_regname(offset), offset, (int)value); + trace_imx_i2c_read(DEVICE(s)->canonical_path, imx_i2c_get_regname(offset), + offset, value); value &= 0xff; diff --git a/hw/i2c/trace-events b/hw/i2c/trace-events index f708a7ace1..1ad0e95c0e 100644 --- a/hw/i2c/trace-events +++ b/hw/i2c/trace-events @@ -56,3 +56,8 @@ npcm7xx_smbus_recv_fifo(const char *id, uint8_t received, uint8_t expected) "%s pca954x_write_bytes(uint8_t value) "PCA954X write data: 0x%02x" pca954x_read_data(uint8_t value) "PCA954X read data: 0x%02x" + +# imx_i2c.c + +imx_i2c_read(const char *id, const char *reg, uint64_t ofs, uint64_t value) "%s:[%s (0x%" PRIx64 ")] -> 0x%02" PRIx64 +imx_i2c_write(const char *id, const char *reg, uint64_t ofs, uint64_t value) "%s:[%s (0x%" PRIx64 ")] <- 0x%02" PRIx64 diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 32818480d2..d34ce07b21 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -43,6 +43,7 @@ config PC select SERIAL_ISA select ACPI_PCI select ACPI_VMGENID + select ACPI_VMCLOCK select VIRTIO_PMEM_SUPPORTED select VIRTIO_MEM_SUPPORTED select HV_BALLOON_SUPPORTED diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 733b8f0851..53b7306b43 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -43,6 +43,7 @@ #include "system/tpm.h" #include "hw/acpi/tpm.h" #include "hw/acpi/vmgenid.h" +#include "hw/acpi/vmclock.h" #include "hw/acpi/erst.h" #include "hw/acpi/piix4.h" #include "system/tpm_backend.h" @@ -654,6 +655,7 @@ static Aml *aml_pci_pdsm(void) Aml *acpi_index = aml_local(2); Aml *zero = aml_int(0); Aml *one = aml_int(1); + Aml *not_supp = aml_int(0xFFFFFFFF); Aml *func = aml_arg(2); Aml *params = aml_arg(4); Aml *bnum = aml_derefof(aml_index(params, aml_int(0))); @@ -678,7 +680,7 @@ static Aml *aml_pci_pdsm(void) */ ifctx1 = aml_if(aml_lnot( aml_or(aml_equal(acpi_index, zero), - aml_equal(acpi_index, aml_int(0xFFFFFFFF)), NULL) + aml_equal(acpi_index, not_supp), NULL) )); { /* have supported functions */ @@ -704,18 +706,30 @@ static Aml *aml_pci_pdsm(void) { Aml *pkg = aml_package(2); - aml_append(pkg, zero); - /* - * optional, if not impl. should return null string - */ - aml_append(pkg, aml_string("%s", "")); - aml_append(ifctx, aml_store(pkg, ret)); - aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); + aml_append(ifctx, aml_store(pkg, ret)); /* - * update acpi-index to actual value + * Windows calls func=7 without checking if it's available, + * as workaround Microsoft has suggested to return invalid for func7 + * Package, so return 2 elements package but only initialize elements + * when acpi_index is supported and leave them uninitialized, which + * leads elements to being Uninitialized ObjectType and should trip + * Windows into discarding result as an unexpected and prevent setting + * bogus 'PCI Label' on the device. */ - aml_append(ifctx, aml_store(acpi_index, aml_index(ret, zero))); + ifctx1 = aml_if(aml_lnot(aml_lor( + aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp) + ))); + { + aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero))); + /* + * optional, if not impl. should return null string + */ + aml_append(ifctx1, aml_store(aml_string("%s", ""), + aml_index(ret, one))); + } + aml_append(ifctx, ifctx1); + aml_append(ifctx, aml_return(ret)); } @@ -2432,7 +2446,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) uint8_t *u; GArray *tables_blob = tables->table_data; AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL }; - Object *vmgenid_dev; + Object *vmgenid_dev, *vmclock_dev; char *oem_id; char *oem_table_id; @@ -2505,6 +2519,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) tables->vmgenid, tables->linker, x86ms->oem_id); } + vmclock_dev = find_vmclock_dev(); + if (vmclock_dev) { + acpi_add_table(table_offsets, tables_blob); + vmclock_build_acpi(VMCLOCK(vmclock_dev), tables_blob, tables->linker, + x86ms->oem_id); + } + if (misc.has_hpet) { acpi_add_table(table_offsets, tables_blob); build_hpet(tables_blob, tables->linker, x86ms->oem_id, diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index a8c275f9ce..f366c223d0 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -48,7 +48,10 @@ /* pe operations */ #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) -#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) +#define VTD_PE_GET_FL_LEVEL(pe) \ + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM)) +#define VTD_PE_GET_SL_LEVEL(pe) \ + (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) /* * PCI bus number (or SID) is not reliable since the device is usaully @@ -67,6 +70,11 @@ struct vtd_hiod_key { uint8_t devfn; }; +struct vtd_as_raw_key { + uint16_t sid; + uint32_t pasid; +}; + struct vtd_iotlb_key { uint64_t gfn; uint32_t pasid; @@ -284,15 +292,15 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, } /* The shift of an addr for a certain level of paging structure */ -static inline uint32_t vtd_slpt_level_shift(uint32_t level) +static inline uint32_t vtd_pt_level_shift(uint32_t level) { assert(level != 0); - return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_LEVEL_BITS; } -static inline uint64_t vtd_slpt_level_page_mask(uint32_t level) +static inline uint64_t vtd_pt_level_page_mask(uint32_t level) { - return ~((1ULL << vtd_slpt_level_shift(level)) - 1); + return ~((1ULL << vtd_pt_level_shift(level)) - 1); } static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, @@ -302,9 +310,43 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; - return (entry->domain_id == info->domain_id) && - (((entry->gfn & info->mask) == gfn) || - (entry->gfn == gfn_tlb)); + + if (entry->domain_id != info->domain_id) { + return false; + } + + /* + * According to spec, IOTLB entries caching first-stage (PGTT=001b) or + * nested (PGTT=011b) mapping associated with specified domain-id are + * invalidated. Nested isn't supported yet, so only need to check 001b. + */ + if (entry->pgtt == VTD_SM_PASID_ENTRY_FLT) { + return true; + } + + return (entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb; +} + +static gboolean vtd_hash_remove_by_page_piotlb(gpointer key, gpointer value, + gpointer user_data) +{ + VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; + VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; + uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; + uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; + + /* + * According to spec, PASID-based-IOTLB Invalidation in page granularity + * doesn't invalidate IOTLB entries caching second-stage (PGTT=010b) + * or pass-through (PGTT=100b) mappings. Nested isn't supported yet, + * so only need to check first-stage (PGTT=001b) mappings. + */ + if (entry->pgtt != VTD_SM_PASID_ENTRY_FLT) { + return false; + } + + return entry->domain_id == info->domain_id && entry->pasid == info->pasid && + ((entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb); } /* Reset all the gen of VTDAddressSpace to zero and set the gen of @@ -349,7 +391,7 @@ static void vtd_reset_caches(IntelIOMMUState *s) static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) { - return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; + return (addr & vtd_pt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; } /* Must be called with IOMMU lock held */ @@ -360,7 +402,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, VTDIOTLBEntry *entry; unsigned level; - for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { + for (level = VTD_PT_LEVEL; level < VTD_PML4_LEVEL; level++) { key.gfn = vtd_get_iotlb_gfn(addr, level); key.level = level; key.sid = source_id; @@ -377,15 +419,15 @@ out: /* Must be with IOMMU lock held */ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, - uint16_t domain_id, hwaddr addr, uint64_t slpte, + uint16_t domain_id, hwaddr addr, uint64_t pte, uint8_t access_flags, uint32_t level, - uint32_t pasid) + uint32_t pasid, uint8_t pgtt) { VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); struct vtd_iotlb_key *key = g_malloc(sizeof(*key)); uint64_t gfn = vtd_get_iotlb_gfn(addr, level); - trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id); + trace_vtd_iotlb_page_update(source_id, addr, pte, domain_id); if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) { trace_vtd_iotlb_reset("iotlb exceeds size limit"); vtd_reset_iotlb_locked(s); @@ -393,10 +435,11 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, entry->gfn = gfn; entry->domain_id = domain_id; - entry->slpte = slpte; + entry->pte = pte; entry->access_flags = access_flags; - entry->mask = vtd_slpt_level_page_mask(level); + entry->mask = vtd_pt_level_page_mask(level); entry->pasid = pasid; + entry->pgtt = pgtt; key->gfn = gfn; key->sid = source_id; @@ -710,32 +753,32 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; } -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw) +static inline uint64_t vtd_get_pte_addr(uint64_t pte, uint8_t aw) { - return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw); + return pte & VTD_PT_BASE_ADDR_MASK(aw); } /* Whether the pte indicates the address of the page frame */ -static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level) +static inline bool vtd_is_last_pte(uint64_t pte, uint32_t level) { - return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK); + return level == VTD_PT_LEVEL || (pte & VTD_PT_PAGE_SIZE_MASK); } -/* Get the content of a spte located in @base_addr[@index] */ -static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) +/* Get the content of a pte located in @base_addr[@index] */ +static uint64_t vtd_get_pte(dma_addr_t base_addr, uint32_t index) { - uint64_t slpte; + uint64_t pte; - assert(index < VTD_SL_PT_ENTRY_NR); + assert(index < VTD_PT_ENTRY_NR); if (dma_memory_read(&address_space_memory, - base_addr + index * sizeof(slpte), - &slpte, sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) { - slpte = (uint64_t)-1; - return slpte; + base_addr + index * sizeof(pte), + &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) { + pte = (uint64_t)-1; + return pte; } - slpte = le64_to_cpu(slpte); - return slpte; + pte = le64_to_cpu(pte); + return pte; } /* Given an iova and the level of paging structure, return the offset @@ -743,36 +786,39 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) */ static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level) { - return (iova >> vtd_slpt_level_shift(level)) & - ((1ULL << VTD_SL_LEVEL_BITS) - 1); + return (iova >> vtd_pt_level_shift(level)) & + ((1ULL << VTD_LEVEL_BITS) - 1); } /* Check Capability Register to see if the @level of page-table is supported */ -static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) +static inline bool vtd_is_sl_level_supported(IntelIOMMUState *s, uint32_t level) { return VTD_CAP_SAGAW_MASK & s->cap & (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT)); } +static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level) +{ + return level == VTD_PML4_LEVEL; +} + /* Return true if check passed, otherwise false */ -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu, - VTDPASIDEntry *pe) +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe) { switch (VTD_PE_GET_TYPE(pe)) { case VTD_SM_PASID_ENTRY_FLT: + return !!(s->ecap & VTD_ECAP_FLTS); case VTD_SM_PASID_ENTRY_SLT: + return !!(s->ecap & VTD_ECAP_SLTS); case VTD_SM_PASID_ENTRY_NESTED: - break; + /* Not support NESTED page table type yet */ + return false; case VTD_SM_PASID_ENTRY_PT: - if (!x86_iommu->pt_supported) { - return false; - } - break; + return !!(s->ecap & VTD_ECAP_PT); default: /* Unknown type */ return false; } - return true; } static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire) @@ -796,7 +842,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, addr = pasid_dir_base + index * entry_size; if (dma_memory_read(&address_space_memory, addr, pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_DIR_ACCESS_ERR; } pdire->val = le64_to_cpu(pdire->val); @@ -814,28 +860,35 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, dma_addr_t addr, VTDPASIDEntry *pe) { + uint8_t pgtt; uint32_t index; dma_addr_t entry_size; - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); index = VTD_PASID_TABLE_INDEX(pasid); entry_size = VTD_PASID_ENTRY_SIZE; addr = addr + index * entry_size; if (dma_memory_read(&address_space_memory, addr, pe, entry_size, MEMTXATTRS_UNSPECIFIED)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_TABLE_ACCESS_ERR; } for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) { pe->val[i] = le64_to_cpu(pe->val[i]); } /* Do translation type check */ - if (!vtd_pe_type_check(x86_iommu, pe)) { - return -VTD_FR_PASID_TABLE_INV; + if (!vtd_pe_type_check(s, pe)) { + return -VTD_FR_PASID_TABLE_ENTRY_INV; } - if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) { - return -VTD_FR_PASID_TABLE_INV; + pgtt = VTD_PE_GET_TYPE(pe); + if (pgtt == VTD_SM_PASID_ENTRY_SLT && + !vtd_is_sl_level_supported(s, VTD_PE_GET_SL_LEVEL(pe))) { + return -VTD_FR_PASID_TABLE_ENTRY_INV; + } + + if (pgtt == VTD_SM_PASID_ENTRY_FLT && + !vtd_is_fl_level_supported(s, VTD_PE_GET_FL_LEVEL(pe))) { + return -VTD_FR_PASID_TABLE_ENTRY_INV; } return 0; @@ -876,7 +929,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, } if (!vtd_pdire_present(&pdire)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_DIR_ENTRY_P; } ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe); @@ -885,7 +938,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, } if (!vtd_pe_present(pe)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_ENTRY_P; } return 0; @@ -938,7 +991,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s, } if (!vtd_pdire_present(&pdire)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_DIR_ENTRY_P; } /* @@ -973,7 +1026,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s, if (s->root_scalable) { vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); - return VTD_PE_GET_LEVEL(&pe); + if (s->flts) { + return VTD_PE_GET_FL_LEVEL(&pe); + } else { + return VTD_PE_GET_SL_LEVEL(&pe); + } } return vtd_ce_get_level(ce); @@ -1041,9 +1098,9 @@ static inline uint64_t vtd_iova_limit(IntelIOMMUState *s, } /* Return true if IOVA passes range check, otherwise false. */ -static inline bool vtd_iova_range_check(IntelIOMMUState *s, - uint64_t iova, VTDContextEntry *ce, - uint8_t aw, uint32_t pasid) +static inline bool vtd_iova_sl_range_check(IntelIOMMUState *s, + uint64_t iova, VTDContextEntry *ce, + uint8_t aw, uint32_t pasid) { /* * Check if @iova is above 2^X-1, where X is the minimum of MGAW @@ -1060,7 +1117,11 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s, if (s->root_scalable) { vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; + if (s->flts) { + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; + } else { + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; + } } return vtd_ce_get_slpt_base(ce); @@ -1084,17 +1145,17 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) /* * We should have caught a guest-mis-programmed level earlier, - * via vtd_is_level_supported. + * via vtd_is_sl_level_supported. */ assert(level < VTD_SPTE_RSVD_LEN); /* - * Zero level doesn't exist. The smallest level is VTD_SL_PT_LEVEL=1 and - * checked by vtd_is_last_slpte(). + * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and + * checked by vtd_is_last_pte(). */ assert(level); - if ((level == VTD_SL_PD_LEVEL || level == VTD_SL_PDP_LEVEL) && - (slpte & VTD_SL_PT_PAGE_SIZE_MASK)) { + if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) && + (slpte & VTD_PT_PAGE_SIZE_MASK)) { /* large page */ rsvd_mask = vtd_spte_rsvd_large[level]; } else { @@ -1118,9 +1179,8 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t offset; uint64_t slpte; uint64_t access_right_check; - uint64_t xlat, size; - if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) { + if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) { error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 "," "pasid=0x%" PRIx32 ")", __func__, iova, pasid); return -VTD_FR_ADDR_BEYOND_MGAW; @@ -1131,7 +1191,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, while (true) { offset = vtd_iova_level_offset(iova, level); - slpte = vtd_get_slpte(addr, offset); + slpte = vtd_get_pte(addr, offset); if (slpte == (uint64_t)-1) { error_report_once("%s: detected read error on DMAR slpte " @@ -1162,37 +1222,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, return -VTD_FR_PAGING_ENTRY_RSVD; } - if (vtd_is_last_slpte(slpte, level)) { + if (vtd_is_last_pte(slpte, level)) { *slptep = slpte; *slpte_level = level; break; } - addr = vtd_get_slpte_addr(slpte, aw_bits); + addr = vtd_get_pte_addr(slpte, aw_bits); level--; } - xlat = vtd_get_slpte_addr(*slptep, aw_bits); - size = ~vtd_slpt_level_page_mask(level) + 1; - - /* - * From VT-d spec 3.14: Untranslated requests and translation - * requests that result in an address in the interrupt range will be - * blocked with condition code LGN.4 or SGN.8. - */ - if ((xlat > VTD_INTERRUPT_ADDR_LAST || - xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) { - return 0; - } else { - error_report_once("%s: xlat address is in interrupt range " - "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " - "slpte=0x%" PRIx64 ", write=%d, " - "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", " - "pasid=0x%" PRIx32 ")", - __func__, iova, level, slpte, is_write, - xlat, size, pasid); - return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : - -VTD_FR_INTERRUPT_ADDR; - } + return 0; } typedef int (*vtd_page_walk_hook)(const IOMMUTLBEvent *event, void *private); @@ -1323,14 +1362,14 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, trace_vtd_page_walk_level(addr, level, start, end); - subpage_size = 1ULL << vtd_slpt_level_shift(level); - subpage_mask = vtd_slpt_level_page_mask(level); + subpage_size = 1ULL << vtd_pt_level_shift(level); + subpage_mask = vtd_pt_level_page_mask(level); while (iova < end) { iova_next = (iova & subpage_mask) + subpage_size; offset = vtd_iova_level_offset(iova, level); - slpte = vtd_get_slpte(addr, offset); + slpte = vtd_get_pte(addr, offset); if (slpte == (uint64_t)-1) { trace_vtd_page_walk_skip_read(iova, iova_next); @@ -1353,12 +1392,12 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, */ entry_valid = read_cur | write_cur; - if (!vtd_is_last_slpte(slpte, level) && entry_valid) { + if (!vtd_is_last_pte(slpte, level) && entry_valid) { /* * This is a valid PDE (or even bigger than PDE). We need * to walk one further level. */ - ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->aw), + ret = vtd_page_walk_level(vtd_get_pte_addr(slpte, info->aw), iova, MIN(iova_next, end), level - 1, read_cur, write_cur, info); } else { @@ -1375,7 +1414,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, event.entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); event.entry.addr_mask = ~subpage_mask; /* NOTE: this is only meaningful if entry_valid == true */ - event.entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); + event.entry.translated_addr = vtd_get_pte_addr(slpte, info->aw); event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; ret = vtd_page_walk_one(&event, info); @@ -1409,11 +1448,11 @@ static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce, dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); uint32_t level = vtd_get_iova_level(s, ce, pasid); - if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) { + if (!vtd_iova_sl_range_check(s, start, ce, info->aw, pasid)) { return -VTD_FR_ADDR_BEYOND_MGAW; } - if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) { + if (!vtd_iova_sl_range_check(s, end, ce, info->aw, pasid)) { /* Fix end so that it reaches the maximum */ end = vtd_iova_limit(s, ce, info->aw, pasid); } @@ -1528,7 +1567,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, /* Check if the programming of context-entry is valid */ if (!s->root_scalable && - !vtd_is_level_supported(s, vtd_ce_get_level(ce))) { + !vtd_is_sl_level_supported(s, vtd_ce_get_level(ce))) { error_report_once("%s: invalid context entry: hi=%"PRIx64 ", lo=%"PRIx64" (level %d not supported)", __func__, ce->hi, ce->lo, @@ -1795,8 +1834,20 @@ static const bool vtd_qualified_faults[] = { [VTD_FR_ROOT_ENTRY_RSVD] = false, [VTD_FR_PAGING_ENTRY_RSVD] = true, [VTD_FR_CONTEXT_ENTRY_TT] = true, - [VTD_FR_PASID_TABLE_INV] = false, + [VTD_FR_PASID_DIR_ACCESS_ERR] = false, + [VTD_FR_PASID_DIR_ENTRY_P] = true, + [VTD_FR_PASID_TABLE_ACCESS_ERR] = false, + [VTD_FR_PASID_ENTRY_P] = true, + [VTD_FR_PASID_TABLE_ENTRY_INV] = true, + [VTD_FR_FS_PAGING_ENTRY_INV] = true, + [VTD_FR_FS_PAGING_ENTRY_P] = true, + [VTD_FR_FS_PAGING_ENTRY_RSVD] = true, + [VTD_FR_PASID_ENTRY_FSPTPTR_INV] = true, + [VTD_FR_FS_NON_CANONICAL] = true, + [VTD_FR_FS_PAGING_ENTRY_US] = true, + [VTD_FR_SM_WRITE] = true, [VTD_FR_SM_INTERRUPT_ADDR] = true, + [VTD_FR_FS_BIT_UPDATE_FAILED] = true, [VTD_FR_MAX] = false, }; @@ -1814,29 +1865,32 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; } -static gboolean vtd_find_as_by_sid(gpointer key, gpointer value, - gpointer user_data) +static gboolean vtd_find_as_by_sid_and_pasid(gpointer key, gpointer value, + gpointer user_data) { struct vtd_as_key *as_key = (struct vtd_as_key *)key; - uint16_t target_sid = *(uint16_t *)user_data; + struct vtd_as_raw_key *target = (struct vtd_as_raw_key *)user_data; uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn); - return sid == target_sid; + + return (as_key->pasid == target->pasid) && (sid == target->sid); } -static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid) +static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s, + uint16_t sid, + uint32_t pasid) { - uint8_t bus_num = PCI_BUS_NUM(sid); - VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num]; - - if (vtd_as && - (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) { - return vtd_as; - } + struct vtd_as_raw_key key = { + .sid = sid, + .pasid = pasid + }; - vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid); - s->vtd_as_cache[bus_num] = vtd_as; + return g_hash_table_find(s->vtd_address_spaces, + vtd_find_as_by_sid_and_pasid, &key); +} - return vtd_as; +static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid) +{ + return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID); } static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id) @@ -1858,6 +1912,157 @@ out: trace_vtd_pt_enable_fast_path(source_id, success); } +/* + * Rsvd field masks for fpte: + * vtd_fpte_rsvd 4k pages + * vtd_fpte_rsvd_large large pages + * + * We support only 4-level page tables. + */ +#define VTD_FPTE_RSVD_LEN 5 +static uint64_t vtd_fpte_rsvd[VTD_FPTE_RSVD_LEN]; +static uint64_t vtd_fpte_rsvd_large[VTD_FPTE_RSVD_LEN]; + +static bool vtd_flpte_nonzero_rsvd(uint64_t flpte, uint32_t level) +{ + uint64_t rsvd_mask; + + /* + * We should have caught a guest-mis-programmed level earlier, + * via vtd_is_fl_level_supported. + */ + assert(level < VTD_FPTE_RSVD_LEN); + /* + * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and + * checked by vtd_is_last_pte(). + */ + assert(level); + + if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) && + (flpte & VTD_PT_PAGE_SIZE_MASK)) { + /* large page */ + rsvd_mask = vtd_fpte_rsvd_large[level]; + } else { + rsvd_mask = vtd_fpte_rsvd[level]; + } + + return flpte & rsvd_mask; +} + +static inline bool vtd_flpte_present(uint64_t flpte) +{ + return !!(flpte & VTD_FL_P); +} + +/* Return true if IOVA is canonical, otherwise false. */ +static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s, uint64_t iova, + VTDContextEntry *ce, uint32_t pasid) +{ + uint64_t iova_limit = vtd_iova_limit(s, ce, s->aw_bits, pasid); + uint64_t upper_bits_mask = ~(iova_limit - 1); + uint64_t upper_bits = iova & upper_bits_mask; + bool msb = ((iova & (iova_limit >> 1)) != 0); + + if (msb) { + return upper_bits == upper_bits_mask; + } else { + return !upper_bits; + } +} + +static MemTxResult vtd_set_flag_in_pte(dma_addr_t base_addr, uint32_t index, + uint64_t pte, uint64_t flag) +{ + if (pte & flag) { + return MEMTX_OK; + } + pte |= flag; + pte = cpu_to_le64(pte); + return dma_memory_write(&address_space_memory, + base_addr + index * sizeof(pte), + &pte, sizeof(pte), + MEMTXATTRS_UNSPECIFIED); +} + +/* + * Given the @iova, get relevant @flptep. @flpte_level will be the last level + * of the translation, can be used for deciding the size of large page. + */ +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, + uint64_t iova, bool is_write, + uint64_t *flptep, uint32_t *flpte_level, + bool *reads, bool *writes, uint8_t aw_bits, + uint32_t pasid) +{ + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); + uint32_t offset; + uint64_t flpte, flag_ad = VTD_FL_A; + + if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) { + error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 "," + "pasid=0x%" PRIx32 ")", __func__, iova, pasid); + return -VTD_FR_FS_NON_CANONICAL; + } + + while (true) { + offset = vtd_iova_level_offset(iova, level); + flpte = vtd_get_pte(addr, offset); + + if (flpte == (uint64_t)-1) { + if (level == vtd_get_iova_level(s, ce, pasid)) { + /* Invalid programming of pasid-entry */ + return -VTD_FR_PASID_ENTRY_FSPTPTR_INV; + } else { + return -VTD_FR_FS_PAGING_ENTRY_INV; + } + } + + if (!vtd_flpte_present(flpte)) { + *reads = false; + *writes = false; + return -VTD_FR_FS_PAGING_ENTRY_P; + } + + /* No emulated device supports supervisor privilege request yet */ + if (!(flpte & VTD_FL_US)) { + *reads = false; + *writes = false; + return -VTD_FR_FS_PAGING_ENTRY_US; + } + + *reads = true; + *writes = (*writes) && (flpte & VTD_FL_RW); + if (is_write && !(flpte & VTD_FL_RW)) { + return -VTD_FR_SM_WRITE; + } + if (vtd_flpte_nonzero_rsvd(flpte, level)) { + error_report_once("%s: detected flpte reserved non-zero " + "iova=0x%" PRIx64 ", level=0x%" PRIx32 + "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")", + __func__, iova, level, flpte, pasid); + return -VTD_FR_FS_PAGING_ENTRY_RSVD; + } + + if (vtd_is_last_pte(flpte, level) && is_write) { + flag_ad |= VTD_FL_D; + } + + if (vtd_set_flag_in_pte(addr, offset, flpte, flag_ad) != MEMTX_OK) { + return -VTD_FR_FS_BIT_UPDATE_FAILED; + } + + if (vtd_is_last_pte(flpte, level)) { + *flptep = flpte; + *flpte_level = level; + return 0; + } + + addr = vtd_get_pte_addr(flpte, aw_bits); + level--; + } +} + static void vtd_report_fault(IntelIOMMUState *s, int err, bool is_fpd_set, uint16_t source_id, @@ -1894,16 +2099,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, VTDContextEntry ce; uint8_t bus_num = pci_bus_num(bus); VTDContextCacheEntry *cc_entry; - uint64_t slpte, page_mask; + uint64_t pte, page_mask; uint32_t level, pasid = vtd_as->pasid; uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; bool is_fpd_set = false; bool reads = true; bool writes = true; - uint8_t access_flags; + uint8_t access_flags, pgtt; bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable; VTDIOTLBEntry *iotlb_entry; + uint64_t xlat, size; /* * We have standalone memory region for interrupt addresses, we @@ -1915,13 +2121,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry = &vtd_as->context_cache_entry; - /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */ + /* Try to fetch pte from IOTLB, we don't need RID2PASID logic */ if (!rid2pasid) { iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); if (iotlb_entry) { - trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte, iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; + pte = iotlb_entry->pte; access_flags = iotlb_entry->access_flags; page_mask = iotlb_entry->mask; goto out; @@ -1993,35 +2199,65 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, return true; } - /* Try to fetch slpte form IOTLB for RID2PASID slow path */ + /* Try to fetch pte from IOTLB for RID2PASID slow path */ if (rid2pasid) { iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); if (iotlb_entry) { - trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte, iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; + pte = iotlb_entry->pte; access_flags = iotlb_entry->access_flags; page_mask = iotlb_entry->mask; goto out; } } - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level, - &reads, &writes, s->aw_bits, pasid); + if (s->flts && s->root_scalable) { + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level, + &reads, &writes, s->aw_bits, pasid); + pgtt = VTD_SM_PASID_ENTRY_FLT; + } else { + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, + &reads, &writes, s->aw_bits, pasid); + pgtt = VTD_SM_PASID_ENTRY_SLT; + } + if (!ret_fr) { + xlat = vtd_get_pte_addr(pte, s->aw_bits); + size = ~vtd_pt_level_page_mask(level) + 1; + + /* + * Per VT-d spec 4.1 section 3.15: Untranslated requests and translation + * requests that result in an address in the interrupt range will be + * blocked with condition code LGN.4 or SGN.8. + */ + if ((xlat <= VTD_INTERRUPT_ADDR_LAST && + xlat + size - 1 >= VTD_INTERRUPT_ADDR_FIRST)) { + error_report_once("%s: xlat address is in interrupt range " + "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " + "pte=0x%" PRIx64 ", write=%d, " + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", " + "pasid=0x%" PRIx32 ")", + __func__, addr, level, pte, is_write, + xlat, size, pasid); + ret_fr = s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : + -VTD_FR_INTERRUPT_ADDR; + } + } + if (ret_fr) { vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, addr, is_write, pasid != PCI_NO_PASID, pasid); goto error; } - page_mask = vtd_slpt_level_page_mask(level); + page_mask = vtd_pt_level_page_mask(level); access_flags = IOMMU_ACCESS_FLAG(reads, writes); vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid), - addr, slpte, access_flags, level, pasid); + addr, pte, access_flags, level, pasid, pgtt); out: vtd_iommu_unlock(s); entry->iova = addr & page_mask; - entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask; + entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask; entry->addr_mask = ~page_mask; entry->perm = access_flags; return true; @@ -2215,8 +2451,13 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) } } +/* + * There is no pasid field in iotlb invalidation descriptor, so PCI_NO_PASID + * is passed as parameter. Piotlb invalidation supports pasid, pasid in its + * descriptor is passed which should not be PCI_NO_PASID. + */ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, - uint16_t domain_id, hwaddr addr, + uint16_t domain_id, hwaddr addr, uint8_t am, uint32_t pasid) { VTDAddressSpace *vtd_as; @@ -2225,19 +2466,37 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, hwaddr size = (1 << am) * VTD_PAGE_SIZE; QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) { - if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) { - continue; - } ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce); if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { + uint32_t rid2pasid = PCI_NO_PASID; + + if (s->root_scalable) { + rid2pasid = VTD_CE_GET_RID2PASID(&ce); + } + + /* + * In legacy mode, vtd_as->pasid == pasid is always true. + * In scalable mode, for vtd address space backing a PCI + * device without pasid, needs to compare pasid with + * rid2pasid of this device. + */ + if (!(vtd_as->pasid == pasid || + (vtd_as->pasid == PCI_NO_PASID && pasid == rid2pasid))) { + continue; + } + if (vtd_as_has_map_notifier(vtd_as)) { /* - * As long as we have MAP notifications registered in - * any of our IOMMU notifiers, we need to sync the - * shadow page table. + * When stage-1 translation is off, as long as we have MAP + * notifications registered in any of our IOMMU notifiers, + * we need to sync the shadow page table. Otherwise VFIO + * device attaches to nested page table instead of shadow + * page table, so no need to sync. */ - vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size); + if (!s->flts || !s->root_scalable) { + vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size); + } } else { /* * For UNMAP-only notifiers, we don't need to walk the @@ -2689,6 +2948,106 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) return true; } +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value, + gpointer user_data) +{ + VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; + VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; + + return ((entry->domain_id == info->domain_id) && + (entry->pasid == info->pasid)); +} + +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s, + uint16_t domain_id, uint32_t pasid) +{ + VTDIOTLBPageInvInfo info; + VTDAddressSpace *vtd_as; + VTDContextEntry ce; + + info.domain_id = domain_id; + info.pasid = pasid; + + vtd_iommu_lock(s); + g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid, + &info); + vtd_iommu_unlock(s); + + QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) { + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce) && + domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { + uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce); + + if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) && + vtd_as->pasid != pasid) { + continue; + } + + if (!s->flts || !vtd_as_has_map_notifier(vtd_as)) { + vtd_address_space_sync(vtd_as); + } + } + } +} + +static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, + uint32_t pasid, hwaddr addr, uint8_t am) +{ + VTDIOTLBPageInvInfo info; + + info.domain_id = domain_id; + info.pasid = pasid; + info.addr = addr; + info.mask = ~((1 << am) - 1); + + vtd_iommu_lock(s); + g_hash_table_foreach_remove(s->iotlb, + vtd_hash_remove_by_page_piotlb, &info); + vtd_iommu_unlock(s); + + vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid); +} + +static bool vtd_process_piotlb_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + uint16_t domain_id; + uint32_t pasid; + hwaddr addr; + uint8_t am; + uint64_t mask[4] = {VTD_INV_DESC_PIOTLB_RSVD_VAL0, + VTD_INV_DESC_PIOTLB_RSVD_VAL1, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true, + __func__, "piotlb inv")) { + return false; + } + + domain_id = VTD_INV_DESC_PIOTLB_DID(inv_desc->val[0]); + pasid = VTD_INV_DESC_PIOTLB_PASID(inv_desc->val[0]); + switch (inv_desc->val[0] & VTD_INV_DESC_PIOTLB_G) { + case VTD_INV_DESC_PIOTLB_ALL_IN_PASID: + vtd_piotlb_pasid_invalidate(s, domain_id, pasid); + break; + + case VTD_INV_DESC_PIOTLB_PSI_IN_PASID: + am = VTD_INV_DESC_PIOTLB_AM(inv_desc->val[1]); + addr = (hwaddr) VTD_INV_DESC_PIOTLB_ADDR(inv_desc->val[1]); + vtd_piotlb_page_invalidate(s, domain_id, pasid, addr, am); + break; + + default: + error_report_once("%s: invalid piotlb inv desc: hi=0x%"PRIx64 + ", lo=0x%"PRIx64" (type mismatch: 0x%llx)", + __func__, inv_desc->val[1], inv_desc->val[0], + inv_desc->val[0] & VTD_INV_DESC_IOTLB_G); + return false; + } + return true; +} + static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { @@ -2742,6 +3101,49 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as, memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); } +static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + uint16_t sid; + VTDAddressSpace *vtd_dev_as; + bool size; + bool global; + hwaddr addr; + uint32_t pasid; + uint64_t mask[4] = {VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0, + VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true, + __func__, "device piotlb inv")) { + return false; + } + + global = VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(inv_desc->hi); + size = VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(inv_desc->hi); + addr = VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(inv_desc->hi); + sid = VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(inv_desc->lo); + if (global) { + QLIST_FOREACH(vtd_dev_as, &s->vtd_as_with_notifiers, next) { + if ((vtd_dev_as->pasid != PCI_NO_PASID) && + (PCI_BUILD_BDF(pci_bus_num(vtd_dev_as->bus), + vtd_dev_as->devfn) == sid)) { + do_invalidate_device_tlb(vtd_dev_as, size, addr); + } + } + } else { + pasid = VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(inv_desc->lo); + vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, sid, pasid); + if (!vtd_dev_as) { + return true; + } + + do_invalidate_device_tlb(vtd_dev_as, size, addr); + } + + return true; +} + static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { @@ -2807,6 +3209,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_PIOTLB: + trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]); + if (!vtd_process_piotlb_desc(s, &inv_desc)) { + return false; + } + break; + case VTD_INV_DESC_WAIT: trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo); if (!vtd_process_wait_desc(s, &inv_desc)) { @@ -2821,6 +3230,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_DEV_PIOTLB: + trace_vtd_inv_desc("device-piotlb", inv_desc.hi, inv_desc.lo); + if (!vtd_process_device_piotlb_desc(s, &inv_desc)) { + return false; + } + break; + case VTD_INV_DESC_DEVICE: trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo); if (!vtd_process_device_iotlb_desc(s, &inv_desc)) { @@ -2834,7 +3250,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) * iommu driver) work, just return true is enough so far. */ case VTD_INV_DESC_PC: - case VTD_INV_DESC_PIOTLB: if (s->scalable_mode) { break; } @@ -3413,11 +3828,13 @@ static const Property vtd_properties[] = { VTD_HOST_ADDRESS_WIDTH), DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE), + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, flts, FALSE), DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false), DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false), + DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true), }; /* Read IRTE entry with specific index */ @@ -3914,7 +4331,13 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod, return false; } - return true; + if (!s->flts) { + /* All checks requested by VTD stage-2 translation pass */ + return true; + } + + error_setg(errp, "host device is uncompatible with stage-1 translation"); + return false; } static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, @@ -4137,7 +4560,12 @@ static void vtd_cap_init(IntelIOMMUState *s) } /* TODO: read cap/ecap from host to decide which cap to be exposed. */ - if (s->scalable_mode) { + if (s->flts) { + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS; + if (s->fs1gp) { + s->cap |= VTD_CAP_FS1GP; + } + } else if (s->scalable_mode) { s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS; } @@ -4193,6 +4621,18 @@ static void vtd_init(IntelIOMMUState *s) vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, x86_iommu->dt_supported && s->stale_tm); + /* + * Rsvd field masks for fpte + */ + vtd_fpte_rsvd[0] = ~0ULL; + vtd_fpte_rsvd[1] = VTD_FPTE_PAGE_L1_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd[2] = VTD_FPTE_PAGE_L2_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd[3] = VTD_FPTE_PAGE_L3_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd[4] = VTD_FPTE_PAGE_L4_RSVD_MASK(s->aw_bits); + + vtd_fpte_rsvd_large[2] = VTD_FPTE_LPAGE_L2_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd_large[3] = VTD_FPTE_LPAGE_L3_RSVD_MASK(s->aw_bits); + if (s->scalable_mode || s->snoop_control) { vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; @@ -4304,14 +4744,26 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) } } - /* Currently only address widths supported are 39 and 48 bits */ - if ((s->aw_bits != VTD_HOST_AW_39BIT) && - (s->aw_bits != VTD_HOST_AW_48BIT)) { - error_setg(errp, "Supported values for aw-bits are: %d, %d", + if (!s->scalable_mode && s->flts) { + error_setg(errp, "x-flts is only available in scalable mode"); + return false; + } + + if (!s->flts && s->aw_bits != VTD_HOST_AW_39BIT && + s->aw_bits != VTD_HOST_AW_48BIT) { + error_setg(errp, "%s: supported values for aw-bits are: %d, %d", + s->scalable_mode ? "Scalable mode(flts=off)" : "Legacy mode", VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT); return false; } + if (s->flts && s->aw_bits != VTD_HOST_AW_48BIT) { + error_setg(errp, + "Scalable mode(flts=on): supported value for aw-bits is: %d", + VTD_HOST_AW_48BIT); + return false; + } + if (s->scalable_mode && !s->dma_drain) { error_setg(errp, "Need to set dma_drain for scalable mode"); return false; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 4323fc5d6d..e8b211e8b0 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -195,6 +195,7 @@ #define VTD_ECAP_PASID (1ULL << 40) #define VTD_ECAP_SMTS (1ULL << 43) #define VTD_ECAP_SLTS (1ULL << 46) +#define VTD_ECAP_FLTS (1ULL << 47) /* CAP_REG */ /* (offset >> 4) << 24 */ @@ -211,6 +212,7 @@ #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35)) #define VTD_CAP_DRAIN_WRITE (1ULL << 54) #define VTD_CAP_DRAIN_READ (1ULL << 55) +#define VTD_CAP_FS1GP (1ULL << 56) #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE) #define VTD_CAP_CM (1ULL << 7) #define VTD_PASID_ID_SHIFT 20 @@ -311,10 +313,28 @@ typedef enum VTDFaultReason { * request while disabled */ VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ - VTD_FR_PASID_TABLE_INV = 0x58, /*Invalid PASID table entry */ + /* PASID directory entry access failure */ + VTD_FR_PASID_DIR_ACCESS_ERR = 0x50, + /* The Present(P) field of pasid directory entry is 0 */ + VTD_FR_PASID_DIR_ENTRY_P = 0x51, + VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */ + /* The Present(P) field of pasid table entry is 0 */ + VTD_FR_PASID_ENTRY_P = 0x59, + VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b, /*Invalid PASID table entry */ + + /* Fail to access a first-level paging entry (not FS_PML4E) */ + VTD_FR_FS_PAGING_ENTRY_INV = 0x70, + VTD_FR_FS_PAGING_ENTRY_P = 0x71, + /* Non-zero reserved field in present first-stage paging entry */ + VTD_FR_FS_PAGING_ENTRY_RSVD = 0x72, + VTD_FR_PASID_ENTRY_FSPTPTR_INV = 0x73, /* Invalid FSPTPTR in PASID entry */ + VTD_FR_FS_NON_CANONICAL = 0x80, /* SNG.1 : Address for FS not canonical.*/ + VTD_FR_FS_PAGING_ENTRY_US = 0x81, /* Privilege violation */ + VTD_FR_SM_WRITE = 0x85, /* No write permission */ /* Output address in the interrupt address range for scalable mode */ VTD_FR_SM_INTERRUPT_ADDR = 0x87, + VTD_FR_FS_BIT_UPDATE_FAILED = 0x91, /* SFS.10 */ VTD_FR_MAX, /* Guard */ } VTDFaultReason; @@ -367,6 +387,7 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */ #define VTD_INV_DESC_PIOTLB 0x6 /* PASID-IOTLB Invalidate Desc */ #define VTD_INV_DESC_PC 0x7 /* PASID-cache Invalidate Desc */ +#define VTD_INV_DESC_DEV_PIOTLB 0x8 /* PASID-based-DIOTLB inv_desc*/ #define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */ /* Masks for Invalidation Wait Descriptor*/ @@ -397,11 +418,6 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000f100ULL #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL -#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4) -#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4) -#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK) -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL /* Mask for Device IOTLB Invalidate Descriptor */ #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL) @@ -413,6 +429,16 @@ typedef union VTDInvDesc VTDInvDesc; /* Masks for Interrupt Entry Invalidate Descriptor */ #define VTD_INV_DESC_IEC_RSVD 0xffff000007fff1e0ULL +/* Masks for PASID based Device IOTLB Invalidate Descriptor */ +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(val) ((val) & \ + 0xfffffffffffff000ULL) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(val) ((val >> 11) & 0x1) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(val) ((val) & 0x1) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(val) (((val) >> 16) & 0xffffULL) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(val) ((val >> 32) & 0xfffffULL) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0 0xfff000000000f000ULL +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1 0x7feULL + /* Rsvd field masks for spte */ #define VTD_SPTE_SNP 0x800ULL @@ -436,6 +462,34 @@ typedef union VTDInvDesc VTDInvDesc; (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \ (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +/* Rsvd field masks for fpte */ +#define VTD_FS_UPPER_IGNORED 0xfff0000000000000ULL +#define VTD_FPTE_PAGE_L1_RSVD_MASK(aw) \ + (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_PAGE_L2_RSVD_MASK(aw) \ + (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_PAGE_L3_RSVD_MASK(aw) \ + (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_PAGE_L4_RSVD_MASK(aw) \ + (0x80ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) + +#define VTD_FPTE_LPAGE_L2_RSVD_MASK(aw) \ + (0x1fe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_LPAGE_L3_RSVD_MASK(aw) \ + (0x3fffe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) + +/* Masks for PIOTLB Invalidate Descriptor */ +#define VTD_INV_DESC_PIOTLB_G (3ULL << 4) +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID (2ULL << 4) +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID (3ULL << 4) +#define VTD_INV_DESC_PIOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) +#define VTD_INV_DESC_PIOTLB_PASID(val) (((val) >> 32) & 0xfffffULL) +#define VTD_INV_DESC_PIOTLB_AM(val) ((val) & 0x3fULL) +#define VTD_INV_DESC_PIOTLB_IH(val) (((val) >> 6) & 0x1) +#define VTD_INV_DESC_PIOTLB_ADDR(val) ((val) & ~0xfffULL) +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL + /* Information about page-selective IOTLB invalidate */ struct VTDIOTLBPageInvInfo { uint16_t domain_id; @@ -519,27 +573,38 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */ #define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK) +#define VTD_SM_PASID_ENTRY_FLPM 3ULL +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL) + +/* First Level Paging Structure */ +/* Masks for First Level Paging Entry */ +#define VTD_FL_P 1ULL +#define VTD_FL_RW (1ULL << 1) +#define VTD_FL_US (1ULL << 2) +#define VTD_FL_A (1ULL << 5) +#define VTD_FL_D (1ULL << 6) + /* Second Level Page Translation Pointer*/ #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) -/* Paging Structure common */ -#define VTD_SL_PT_PAGE_SIZE_MASK (1ULL << 7) -/* Bits to decide the offset for each level */ -#define VTD_SL_LEVEL_BITS 9 - /* Second Level Paging Structure */ -#define VTD_SL_PML4_LEVEL 4 -#define VTD_SL_PDP_LEVEL 3 -#define VTD_SL_PD_LEVEL 2 -#define VTD_SL_PT_LEVEL 1 -#define VTD_SL_PT_ENTRY_NR 512 - /* Masks for Second Level Paging Entry */ #define VTD_SL_RW_MASK 3ULL #define VTD_SL_R 1ULL #define VTD_SL_W (1ULL << 1) -#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) #define VTD_SL_IGN_COM 0xbff0000000000000ULL #define VTD_SL_TM (1ULL << 62) +/* Common for both First Level and Second Level */ +#define VTD_PML4_LEVEL 4 +#define VTD_PDP_LEVEL 3 +#define VTD_PD_LEVEL 2 +#define VTD_PT_LEVEL 1 +#define VTD_PT_ENTRY_NR 512 +#define VTD_PT_PAGE_SIZE_MASK (1ULL << 7) +#define VTD_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) + +/* Bits to decide the offset for each level */ +#define VTD_LEVEL_BITS 9 + #endif diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index bd2a3cbee0..58484f308e 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -140,6 +140,8 @@ struct XenEvtchnState { uint64_t callback_param; bool evtchn_in_kernel; + bool setting_callback_gsi; + int extern_gsi_level; uint32_t callback_gsi; QEMUBH *gsi_bh; @@ -431,9 +433,22 @@ void xen_evtchn_set_callback_level(int level) } if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) { - qemu_set_irq(s->callback_gsis[s->callback_gsi], level); - if (level) { - /* Ensure the vCPU polls for deassertion */ + /* + * Ugly, but since we hold the BQL we can set this flag so that + * xen_evtchn_set_gsi() can tell the difference between this code + * setting the GSI, and an external device (PCI INTx) doing so. + */ + s->setting_callback_gsi = true; + /* Do not deassert the line if an external device is asserting it. */ + qemu_set_irq(s->callback_gsis[s->callback_gsi], + level || s->extern_gsi_level); + s->setting_callback_gsi = false; + + /* + * If the callback GSI is the only one asserted, ensure the status + * is polled for deassertion in kvm_arch_post_run(). + */ + if (level && !s->extern_gsi_level) { kvm_xen_set_callback_asserted(); } } @@ -1596,7 +1611,7 @@ static int allocate_pirq(XenEvtchnState *s, int type, int gsi) return pirq; } -bool xen_evtchn_set_gsi(int gsi, int level) +bool xen_evtchn_set_gsi(int gsi, int *level) { XenEvtchnState *s = xen_evtchn_singleton; int pirq; @@ -1608,16 +1623,35 @@ bool xen_evtchn_set_gsi(int gsi, int level) } /* - * Check that that it *isn't* the event channel GSI, and thus - * that we are not recursing and it's safe to take s->port_lock. - * - * Locking aside, it's perfectly sane to bail out early for that - * special case, as it would make no sense for the event channel - * GSI to be routed back to event channels, when the delivery - * method is to raise the GSI... that recursion wouldn't *just* - * be a locking issue. + * For the callback_gsi we need to implement a logical OR of the event + * channel GSI and the external input (e.g. from PCI INTx), because + * QEMU itself doesn't support shared level interrupts via demux or + * resamplers. */ if (gsi && gsi == s->callback_gsi) { + /* Remember the external state of the GSI pin (e.g. from PCI INTx) */ + if (!s->setting_callback_gsi) { + s->extern_gsi_level = *level; + + /* + * Don't allow the external device to deassert the line if the + * eveht channel GSI should still be asserted. + */ + if (!s->extern_gsi_level) { + struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0); + if (vi && vi->evtchn_upcall_pending) { + /* Need to poll for deassertion */ + kvm_xen_set_callback_asserted(); + *level = 1; + } + } + } + + /* + * The event channel GSI cannot be routed to PIRQ, as that would make + * no sense. It could also deadlock on s->port_lock, if we proceed. + * So bail out now. + */ return false; } @@ -1628,7 +1662,7 @@ bool xen_evtchn_set_gsi(int gsi, int level) return false; } - if (level) { + if (*level) { int port = s->pirq[pirq].port; s->pirq_gsi_set |= (1U << gsi); diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h index b740acfc0d..0521ebc092 100644 --- a/hw/i386/kvm/xen_evtchn.h +++ b/hw/i386/kvm/xen_evtchn.h @@ -23,7 +23,7 @@ void xen_evtchn_set_callback_level(int level); int xen_evtchn_set_port(uint16_t port); -bool xen_evtchn_set_gsi(int gsi, int level); +bool xen_evtchn_set_gsi(int gsi, int *level); void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector, uint64_t addr, uint32_t data, bool is_masked); void xen_evtchn_remove_pci_device(PCIDevice *dev); diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 5969105667..17802aa33d 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -532,6 +532,10 @@ static void xs_read(XenXenstoreState *s, unsigned int req_id, return; } + if (!len) { + return; + } + memcpy(&rsp_data[rsp->len], data->data, len); rsp->len += len; } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7111876588..b46975c8a4 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -86,6 +86,7 @@ GlobalProperty pc_compat_9_1[] = { { "ICH9-LPC", "x-smi-swsmi-timer", "off" }, { "ICH9-LPC", "x-smi-periodic-timer", "off" }, { TYPE_INTEL_IOMMU_DEVICE, "stale-tm", "on" }, + { TYPE_INTEL_IOMMU_DEVICE, "aw-bits", "39" }, }; const size_t pc_compat_9_1_len = G_N_ELEMENTS(pc_compat_9_1); @@ -463,7 +464,7 @@ static int check_fdc(Object *obj, void *opaque) } static const char * const fdc_container_path[] = { - "/unattached", "/peripheral", "/peripheral-anon" + "unattached", "peripheral", "peripheral-anon" }; /* @@ -477,7 +478,7 @@ static ISADevice *pc_find_fdc0(void) CheckFdcState state = { 0 }; for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) { - container = container_get(qdev_get_machine(), fdc_container_path[i]); + container = machine_get_container(fdc_container_path[i]); object_child_foreach(container, check_fdc, &state); } diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c index d5a44af243..008496b5b8 100644 --- a/hw/i386/x86-common.c +++ b/hw/i386/x86-common.c @@ -446,8 +446,27 @@ static long get_file_size(FILE *f) void gsi_handler(void *opaque, int n, int level) { GSIState *s = opaque; + bool bypass_ioapic = false; trace_x86_gsi_interrupt(n, level); + +#ifdef CONFIG_XEN_EMU + /* + * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC + * routing actually works properly under Xen). And then to + * *either* the PIRQ handling or the I/OAPIC depending on whether + * the former wants it. + * + * Additionally, this hook allows the Xen event channel GSI to + * work around QEMU's lack of support for shared level interrupts, + * by keeping track of the externally driven state of the pin and + * implementing a logical OR with the state of the evtchn GSI. + */ + if (xen_mode == XEN_EMULATE) { + bypass_ioapic = xen_evtchn_set_gsi(n, &level); + } +#endif + switch (n) { case 0 ... ISA_NUM_IRQS - 1: if (s->i8259_irq[n]) { @@ -456,18 +475,9 @@ void gsi_handler(void *opaque, int n, int level) } /* fall through */ case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: -#ifdef CONFIG_XEN_EMU - /* - * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC - * routing actually works properly under Xen). And then to - * *either* the PIRQ handling or the I/OAPIC depending on - * whether the former wants it. - */ - if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { - break; + if (!bypass_ioapic) { + qemu_set_irq(s->ioapic_irq[n], level); } -#endif - qemu_set_irq(s->ioapic_irq[n], level); break; case IO_APIC_SECONDARY_IRQBASE ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c index 4a1a7c357c..f3055ec4d2 100644 --- a/hw/intc/loongarch_extioi.c +++ b/hw/intc/loongarch_extioi.c @@ -15,6 +15,23 @@ #include "hw/intc/loongarch_extioi.h" #include "trace.h" +static int extioi_get_index_from_archid(LoongArchExtIOICommonState *s, + uint64_t arch_id) +{ + int i; + + for (i = 0; i < s->num_cpu; i++) { + if (s->cpu[i].arch_id == arch_id) { + break; + } + } + + if ((i < s->num_cpu) && s->cpu[i].cpu) { + return i; + } + + return -1; +} static void extioi_update_irq(LoongArchExtIOICommonState *s, int irq, int level) { @@ -125,7 +142,7 @@ static inline void extioi_enable_irq(LoongArchExtIOICommonState *s, int index,\ static inline void extioi_update_sw_coremap(LoongArchExtIOICommonState *s, int irq, uint64_t val, bool notify) { - int i, cpu; + int i, cpu, cpuid; /* * loongarch only support little endian, @@ -134,12 +151,17 @@ static inline void extioi_update_sw_coremap(LoongArchExtIOICommonState *s, val = cpu_to_le64(val); for (i = 0; i < 4; i++) { - cpu = val & 0xff; + cpuid = val & 0xff; val = val >> 8; if (!(s->status & BIT(EXTIOI_ENABLE_CPU_ENCODE))) { - cpu = ctz32(cpu); - cpu = (cpu >= 4) ? 0 : cpu; + cpuid = ctz32(cpuid); + cpuid = (cpuid >= 4) ? 0 : cpuid; + } + + cpu = extioi_get_index_from_archid(s, cpuid); + if (cpu < 0) { + continue; } if (s->sw_coremap[irq + i] == cpu) { @@ -347,12 +369,6 @@ static void loongarch_extioi_realize(DeviceState *dev, Error **errp) s->status |= BIT(EXTIOI_ENABLE); } - s->cpu = g_new0(ExtIOICore, s->num_cpu); - if (s->cpu == NULL) { - error_setg(errp, "Memory allocation for ExtIOICore faile"); - return; - } - for (i = 0; i < s->num_cpu; i++) { for (pin = 0; pin < LS3A_INTC_IP; pin++) { qdev_init_gpio_out(dev, &s->cpu[i].parent_irq[pin], 1); diff --git a/hw/intc/loongarch_extioi_common.c b/hw/intc/loongarch_extioi_common.c index e4c1cc3c98..fd56253d10 100644 --- a/hw/intc/loongarch_extioi_common.c +++ b/hw/intc/loongarch_extioi_common.c @@ -13,11 +13,24 @@ static void loongarch_extioi_common_realize(DeviceState *dev, Error **errp) { LoongArchExtIOICommonState *s = (LoongArchExtIOICommonState *)dev; + MachineState *machine = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *id_list; + int i; - if (s->num_cpu == 0) { - error_setg(errp, "num-cpu must be at least 1"); + assert(mc->possible_cpu_arch_ids); + id_list = mc->possible_cpu_arch_ids(machine); + s->num_cpu = id_list->len; + s->cpu = g_new0(ExtIOICore, s->num_cpu); + if (s->cpu == NULL) { + error_setg(errp, "Memory allocation for ExtIOICore faile"); return; } + + for (i = 0; i < s->num_cpu; i++) { + s->cpu[i].arch_id = id_list->cpus[i].arch_id; + s->cpu[i].cpu = CPU(id_list->cpus[i].cpu); + } } static int loongarch_extioi_common_pre_save(void *opaque) @@ -82,7 +95,6 @@ static const VMStateDescription vmstate_loongarch_extioi = { }; static const Property extioi_properties[] = { - DEFINE_PROP_UINT32("num-cpu", LoongArchExtIOICommonState, num_cpu, 1), DEFINE_PROP_BIT("has-virtualization-extension", LoongArchExtIOICommonState, features, EXTIOI_HAS_VIRT_EXTENSION, 0), }; diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c index 2ae1a42c46..5376f1e084 100644 --- a/hw/intc/loongarch_ipi.c +++ b/hw/intc/loongarch_ipi.c @@ -7,7 +7,9 @@ #include "qemu/osdep.h" #include "hw/boards.h" +#include "qapi/error.h" #include "hw/intc/loongarch_ipi.h" +#include "hw/qdev-properties.h" #include "target/loongarch/cpu.h" static AddressSpace *get_iocsr_as(CPUState *cpu) @@ -15,44 +17,73 @@ static AddressSpace *get_iocsr_as(CPUState *cpu) return LOONGARCH_CPU(cpu)->env.address_space_iocsr; } -static int archid_cmp(const void *a, const void *b) +static int loongarch_ipi_cmp(const void *a, const void *b) { - CPUArchId *archid_a = (CPUArchId *)a; - CPUArchId *archid_b = (CPUArchId *)b; + IPICore *ipi_a = (IPICore *)a; + IPICore *ipi_b = (IPICore *)b; - return archid_a->arch_id - archid_b->arch_id; + return ipi_a->arch_id - ipi_b->arch_id; } -static CPUArchId *find_cpu_by_archid(MachineState *ms, uint32_t id) +static int loongarch_cpu_by_arch_id(LoongsonIPICommonState *lics, + int64_t arch_id, int *index, CPUState **pcs) { - CPUArchId apic_id, *found_cpu; + IPICore ipi, *found; - apic_id.arch_id = id; - found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, - ms->possible_cpus->len, - sizeof(*ms->possible_cpus->cpus), - archid_cmp); + ipi.arch_id = arch_id; + found = bsearch(&ipi, lics->cpu, lics->num_cpu, sizeof(IPICore), + loongarch_ipi_cmp); + if (found && found->cpu) { + if (index) { + *index = found - lics->cpu; + } - return found_cpu; + if (pcs) { + *pcs = found->cpu; + } + + return MEMTX_OK; + } + + return MEMTX_ERROR; } -static CPUState *loongarch_cpu_by_arch_id(int64_t arch_id) +static void loongarch_ipi_realize(DeviceState *dev, Error **errp) { + LoongsonIPICommonState *lics = LOONGSON_IPI_COMMON(dev); + LoongarchIPIClass *lic = LOONGARCH_IPI_GET_CLASS(dev); MachineState *machine = MACHINE(qdev_get_machine()); - CPUArchId *archid; + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *id_list; + Error *local_err = NULL; + int i; - archid = find_cpu_by_archid(machine, arch_id); - if (archid) { - return CPU(archid->cpu); + lic->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; } - return NULL; + assert(mc->possible_cpu_arch_ids); + id_list = mc->possible_cpu_arch_ids(machine); + lics->num_cpu = id_list->len; + lics->cpu = g_new0(IPICore, lics->num_cpu); + for (i = 0; i < lics->num_cpu; i++) { + lics->cpu[i].arch_id = id_list->cpus[i].arch_id; + lics->cpu[i].cpu = CPU(id_list->cpus[i].cpu); + lics->cpu[i].ipi = lics; + qdev_init_gpio_out(dev, &lics->cpu[i].irq, 1); + } } static void loongarch_ipi_class_init(ObjectClass *klass, void *data) { LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass); + LoongarchIPIClass *lic = LOONGARCH_IPI_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + device_class_set_parent_realize(dc, loongarch_ipi_realize, + &lic->parent_realize); licc->get_iocsr_as = get_iocsr_as; licc->cpu_by_arch_id = loongarch_cpu_by_arch_id; } @@ -61,6 +92,8 @@ static const TypeInfo loongarch_ipi_types[] = { { .name = TYPE_LOONGARCH_IPI, .parent = TYPE_LOONGSON_IPI_COMMON, + .instance_size = sizeof(LoongarchIPIState), + .class_size = sizeof(LoongarchIPIClass), .class_init = loongarch_ipi_class_init, } }; diff --git a/hw/intc/loongson_ipi.c b/hw/intc/loongson_ipi.c index 4e08f03510..d2268a27f8 100644 --- a/hw/intc/loongson_ipi.c +++ b/hw/intc/loongson_ipi.c @@ -7,6 +7,7 @@ #include "qemu/osdep.h" #include "hw/intc/loongson_ipi.h" +#include "hw/qdev-properties.h" #include "qapi/error.h" #include "target/mips/cpu.h" @@ -19,6 +20,27 @@ static AddressSpace *get_iocsr_as(CPUState *cpu) return NULL; } +static int loongson_cpu_by_arch_id(LoongsonIPICommonState *lics, + int64_t arch_id, int *index, CPUState **pcs) +{ + CPUState *cs; + + cs = cpu_by_arch_id(arch_id); + if (cs == NULL) { + return MEMTX_ERROR; + } + + if (index) { + *index = cs->cpu_index; + } + + if (pcs) { + *pcs = cs; + } + + return MEMTX_OK; +} + static const MemoryRegionOps loongson_ipi_core_ops = { .read_with_attrs = loongson_ipi_core_readl, .write_with_attrs = loongson_ipi_core_writel, @@ -36,6 +58,7 @@ static void loongson_ipi_realize(DeviceState *dev, Error **errp) LoongsonIPIClass *lic = LOONGSON_IPI_GET_CLASS(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); Error *local_err = NULL; + int i; lic->parent_realize(dev, &local_err); if (local_err) { @@ -43,8 +66,19 @@ static void loongson_ipi_realize(DeviceState *dev, Error **errp) return; } + if (sc->num_cpu == 0) { + error_setg(errp, "num-cpu must be at least 1"); + return; + } + + sc->cpu = g_new0(IPICore, sc->num_cpu); + for (i = 0; i < sc->num_cpu; i++) { + sc->cpu[i].ipi = sc; + qdev_init_gpio_out(dev, &sc->cpu[i].irq, 1); + } + s->ipi_mmio_mem = g_new0(MemoryRegion, sc->num_cpu); - for (unsigned i = 0; i < sc->num_cpu; i++) { + for (i = 0; i < sc->num_cpu; i++) { g_autofree char *name = g_strdup_printf("loongson_ipi_cpu%d_mmio", i); memory_region_init_io(&s->ipi_mmio_mem[i], OBJECT(dev), @@ -63,6 +97,10 @@ static void loongson_ipi_unrealize(DeviceState *dev) k->parent_unrealize(dev); } +static const Property loongson_ipi_properties[] = { + DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1), +}; + static void loongson_ipi_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -73,8 +111,9 @@ static void loongson_ipi_class_init(ObjectClass *klass, void *data) &lic->parent_realize); device_class_set_parent_unrealize(dc, loongson_ipi_unrealize, &lic->parent_unrealize); + device_class_set_props(dc, loongson_ipi_properties); licc->get_iocsr_as = get_iocsr_as; - licc->cpu_by_arch_id = cpu_by_arch_id; + licc->cpu_by_arch_id = loongson_cpu_by_arch_id; } static const TypeInfo loongson_ipi_types[] = { diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c index 9a081565f5..f5ab5024c0 100644 --- a/hw/intc/loongson_ipi_common.c +++ b/hw/intc/loongson_ipi_common.c @@ -9,8 +9,6 @@ #include "hw/sysbus.h" #include "hw/intc/loongson_ipi_common.h" #include "hw/irq.h" -#include "hw/qdev-properties.h" -#include "qapi/error.h" #include "qemu/log.h" #include "migration/vmstate.h" #include "trace.h" @@ -105,16 +103,17 @@ static MemTxResult mail_send(LoongsonIPICommonState *ipi, uint32_t cpuid; hwaddr addr; CPUState *cs; + int cpu, ret; cpuid = extract32(val, 16, 10); - cs = licc->cpu_by_arch_id(cpuid); - if (cs == NULL) { + ret = licc->cpu_by_arch_id(ipi, cpuid, &cpu, &cs); + if (ret != MEMTX_OK) { return MEMTX_DECODE_ERROR; } /* override requester_id */ addr = SMP_IPI_MAILBOX + CORE_BUF_20 + (val & 0x1c); - attrs.requester_id = cs->cpu_index; + attrs.requester_id = cpu; return send_ipi_data(ipi, cs, val, addr, attrs); } @@ -125,16 +124,17 @@ static MemTxResult any_send(LoongsonIPICommonState *ipi, uint32_t cpuid; hwaddr addr; CPUState *cs; + int cpu, ret; cpuid = extract32(val, 16, 10); - cs = licc->cpu_by_arch_id(cpuid); - if (cs == NULL) { + ret = licc->cpu_by_arch_id(ipi, cpuid, &cpu, &cs); + if (ret != MEMTX_OK) { return MEMTX_DECODE_ERROR; } /* override requester_id */ addr = val & 0xffff; - attrs.requester_id = cs->cpu_index; + attrs.requester_id = cpu; return send_ipi_data(ipi, cs, val, addr, attrs); } @@ -148,6 +148,7 @@ MemTxResult loongson_ipi_core_writel(void *opaque, hwaddr addr, uint64_t val, uint32_t cpuid; uint8_t vector; CPUState *cs; + int cpu, ret; addr &= 0xff; trace_loongson_ipi_write(size, (uint64_t)addr, val); @@ -178,11 +179,11 @@ MemTxResult loongson_ipi_core_writel(void *opaque, hwaddr addr, uint64_t val, cpuid = extract32(val, 16, 10); /* IPI status vector */ vector = extract8(val, 0, 5); - cs = licc->cpu_by_arch_id(cpuid); - if (cs == NULL || cs->cpu_index >= ipi->num_cpu) { + ret = licc->cpu_by_arch_id(ipi, cpuid, &cpu, &cs); + if (ret != MEMTX_OK || cpu >= ipi->num_cpu) { return MEMTX_DECODE_ERROR; } - loongson_ipi_core_writel(&ipi->cpu[cs->cpu_index], CORE_SET_OFF, + loongson_ipi_core_writel(&ipi->cpu[cpu], CORE_SET_OFF, BIT(vector), 4, attrs); break; default: @@ -253,12 +254,6 @@ static void loongson_ipi_common_realize(DeviceState *dev, Error **errp) { LoongsonIPICommonState *s = LOONGSON_IPI_COMMON(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - int i; - - if (s->num_cpu == 0) { - error_setg(errp, "num-cpu must be at least 1"); - return; - } memory_region_init_io(&s->ipi_iocsr_mem, OBJECT(dev), &loongson_ipi_iocsr_ops, @@ -273,13 +268,6 @@ static void loongson_ipi_common_realize(DeviceState *dev, Error **errp) &loongson_ipi64_ops, s, "loongson_ipi64_iocsr", 0x118); sysbus_init_mmio(sbd, &s->ipi64_iocsr_mem); - - s->cpu = g_new0(IPICore, s->num_cpu); - for (i = 0; i < s->num_cpu; i++) { - s->cpu[i].ipi = s; - - qdev_init_gpio_out(dev, &s->cpu[i].irq, 1); - } } static void loongson_ipi_common_unrealize(DeviceState *dev) @@ -315,10 +303,6 @@ static const VMStateDescription vmstate_loongson_ipi_common = { } }; -static const Property ipi_common_properties[] = { - DEFINE_PROP_UINT32("num-cpu", LoongsonIPICommonState, num_cpu, 1), -}; - static void loongson_ipi_common_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -328,7 +312,6 @@ static void loongson_ipi_common_class_init(ObjectClass *klass, void *data) &licc->parent_realize); device_class_set_parent_unrealize(dc, loongson_ipi_common_unrealize, &licc->parent_unrealize); - device_class_set_props(dc, ipi_common_properties); dc->vmsd = &vmstate_loongson_ipi_common; } diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c index 3f3fa939d3..c20f4c1075 100644 --- a/hw/intc/s390_flic.c +++ b/hw/intc/s390_flic.c @@ -471,8 +471,6 @@ static void qemu_s390_flic_class_init(ObjectClass *oc, void *data) } static const Property s390_flic_common_properties[] = { - DEFINE_PROP_UINT32("adapter_routes_max_batch", S390FLICState, - adapter_routes_max_batch, ADAPTER_ROUTES_MAX_GSI), DEFINE_PROP_BOOL("migration-enabled", S390FLICState, migration_enabled, true), }; @@ -480,13 +478,6 @@ static const Property s390_flic_common_properties[] = { static void s390_flic_common_realize(DeviceState *dev, Error **errp) { S390FLICState *fs = S390_FLIC_COMMON(dev); - uint32_t max_batch = fs->adapter_routes_max_batch; - - if (max_batch > ADAPTER_ROUTES_MAX_GSI) { - error_setg(errp, "flic property adapter_routes_max_batch too big" - " (%d > %d)", max_batch, ADAPTER_ROUTES_MAX_GSI); - return; - } fs->ais_supported = s390_has_feat(S390_FEAT_ADAPTER_INT_SUPPRESSION); } diff --git a/hw/intc/xilinx_intc.c b/hw/intc/xilinx_intc.c index d99cf567ae..6930f83907 100644 --- a/hw/intc/xilinx_intc.c +++ b/hw/intc/xilinx_intc.c @@ -144,6 +144,10 @@ static const MemoryRegionOps pic_ops = { .read = pic_read, .write = pic_write, .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, .valid = { .min_access_size = 4, .max_access_size = 4 diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c index 9eb5fb68bf..fdd62acf7e 100644 --- a/hw/loongarch/acpi-build.c +++ b/hw/loongarch/acpi-build.c @@ -456,8 +456,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine) acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); - for (i = 0; i < VIRT_UART_COUNT; i++) + for (i = 0; i < VIRT_UART_COUNT; i++) { build_uart_device_aml(dsdt, i); + } build_pci_device_aml(dsdt, lvms); build_la_ged_aml(dsdt, machine); build_flash_aml(dsdt, lvms); diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c index 48154cdce6..bd8763c61c 100644 --- a/hw/loongarch/boot.c +++ b/hw/loongarch/boot.c @@ -15,6 +15,26 @@ #include "system/reset.h" #include "system/qtest.h" +/* + * Linux Image Format + * https://docs.kernel.org/arch/loongarch/booting.html + */ +#define LINUX_PE_MAGIC 0x818223cd +#define MZ_MAGIC 0x5a4d /* "MZ" */ + +struct loongarch_linux_hdr { + uint32_t mz_magic; + uint32_t res0; + uint64_t kernel_entry; + uint64_t kernel_size; + uint64_t load_offset; + uint64_t res1; + uint64_t res2; + uint64_t res3; + uint32_t linux_pe_magic; + uint32_t pe_header_offset; +} QEMU_PACKED; + struct memmap_entry *memmap_table; unsigned memmap_entries; @@ -171,6 +191,50 @@ static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr) return addr & MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS); } +static int64_t load_loongarch_linux_image(const char *filename, + uint64_t *kernel_entry, + uint64_t *kernel_low, + uint64_t *kernel_high) +{ + gsize len; + ssize_t size; + uint8_t *buffer; + struct loongarch_linux_hdr *hdr; + + /* Load as raw file otherwise */ + if (!g_file_get_contents(filename, (char **)&buffer, &len, NULL)) { + return -1; + } + size = len; + + /* Unpack the image if it is a EFI zboot image */ + if (unpack_efi_zboot_image(&buffer, &size) < 0) { + g_free(buffer); + return -1; + } + + hdr = (struct loongarch_linux_hdr *)buffer; + + if (extract32(le32_to_cpu(hdr->mz_magic), 0, 16) != MZ_MAGIC || + le32_to_cpu(hdr->linux_pe_magic) != LINUX_PE_MAGIC) { + g_free(buffer); + return -1; + } + + /* Early kernel versions may have those fields in virtual address */ + *kernel_entry = extract64(le64_to_cpu(hdr->kernel_entry), + 0, TARGET_PHYS_ADDR_SPACE_BITS); + *kernel_low = extract64(le64_to_cpu(hdr->load_offset), + 0, TARGET_PHYS_ADDR_SPACE_BITS); + *kernel_high = *kernel_low + size; + + rom_add_blob_fixed(filename, buffer, size, *kernel_low); + + g_free(buffer); + + return size; +} + static int64_t load_kernel_info(struct loongarch_boot_info *info) { uint64_t kernel_entry, kernel_low, kernel_high; @@ -181,6 +245,11 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info) &kernel_entry, &kernel_low, &kernel_high, NULL, 0, EM_LOONGARCH, 1, 0); + if (kernel_size < 0) { + kernel_size = load_loongarch_linux_image(info->kernel_filename, + &kernel_entry, &kernel_low, + &kernel_high); + } if (kernel_size < 0) { error_report("could not load kernel '%s': %s", @@ -223,7 +292,7 @@ static void reset_load_elf(void *opaque) cpu_reset(CPU(cpu)); if (env->load_elf) { - if (cpu == LOONGARCH_CPU(first_cpu)) { + if (cpu == LOONGARCH_CPU(first_cpu)) { env->gpr[4] = env->boot_info->a0; env->gpr[5] = env->boot_info->a1; env->gpr[6] = env->boot_info->a2; @@ -285,7 +354,7 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) if (info->kernel_filename) { kernel_addr = load_kernel_info(info); } else { - if(!qtest_enabled()) { + if (!qtest_enabled()) { warn_report("No kernel provided, booting from flash drive."); } } diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c index 60bd4dc9d3..63fa0f4e32 100644 --- a/hw/loongarch/virt.c +++ b/hw/loongarch/virt.c @@ -331,8 +331,9 @@ static void fdt_add_uart_node(LoongArchVirtMachineState *lvms, qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "ns16550a"); qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0x0, base, 0x0, size); qemu_fdt_setprop_cell(ms->fdt, nodename, "clock-frequency", 100000000); - if (chosen) + if (chosen) { qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename); + } qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", irq, 0x4); qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", *pch_pic_phandle); @@ -815,7 +816,7 @@ static void virt_devices_init(DeviceState *pch_pic, * Create uart fdt node in reverse order so that they appear * in the finished device tree lowest address first */ - for (i = VIRT_UART_COUNT; i --> 0;) { + for (i = VIRT_UART_COUNT; i-- > 0;) { hwaddr base = VIRT_UART_BASE + i * VIRT_UART_SIZE; int irq = VIRT_UART_IRQ + i - VIRT_GSI_BASE; serial_mm_init(get_system_memory(), base, 0, @@ -898,7 +899,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) /* Create IPI device */ ipi = qdev_new(TYPE_LOONGARCH_IPI); - qdev_prop_set_uint32(ipi, "num-cpu", ms->smp.cpus); sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); /* IPI iocsr memory region */ @@ -921,7 +921,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) /* Create EXTIOI device */ extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); - qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.cpus); if (virt_is_veiointc_enabled(lvms)) { qdev_prop_set_bit(extioi, "has-virtualization-extension", true); } @@ -1176,8 +1175,9 @@ static void fw_cfg_add_memory(MachineState *ms) size = ram_size - numa_info[0].node_mem; } - if (size) + if (size) { memmap_add_entry(base, size, 1); + } } static void virt_init(MachineState *machine) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index bd7652740f..0ae1704a34 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -843,7 +843,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) ComponentRegisters *regs = &cxl_cstate->crb; MemoryRegion *mr = ®s->component_registers; uint8_t *pci_conf = pci_dev->config; - unsigned short msix_num = 6; + unsigned short msix_num = 10; int i, rc; uint16_t count; diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c index 016a302e67..01663407ec 100644 --- a/hw/misc/arm_sysctl.c +++ b/hw/misc/arm_sysctl.c @@ -520,7 +520,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, * as zero. */ s->sys_cfgctrl = val & ~((3 << 18) | (1 << 31)); - if (val & (1 << 31)) { + if (extract64(val, 31, 1)) { /* Start bit set -- actually do something */ unsigned int dcc = extract32(s->sys_cfgctrl, 26, 4); unsigned int function = extract32(s->sys_cfgctrl, 20, 6); diff --git a/hw/misc/imx6_src.c b/hw/misc/imx6_src.c index dc6a2b92ba..06cc46292e 100644 --- a/hw/misc/imx6_src.c +++ b/hw/misc/imx6_src.c @@ -17,18 +17,7 @@ #include "qemu/module.h" #include "target/arm/arm-powerctl.h" #include "hw/core/cpu.h" - -#ifndef DEBUG_IMX6_SRC -#define DEBUG_IMX6_SRC 0 -#endif - -#define DPRINTF(fmt, args...) \ - do { \ - if (DEBUG_IMX6_SRC) { \ - fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX6_SRC, \ - __func__, ##args); \ - } \ - } while (0) +#include "trace.h" static const char *imx6_src_reg_name(uint32_t reg) { @@ -87,7 +76,7 @@ static void imx6_src_reset(DeviceState *dev) { IMX6SRCState *s = IMX6_SRC(dev); - DPRINTF("\n"); + trace_imx6_src_reset(); memset(s->regs, 0, sizeof(s->regs)); @@ -111,7 +100,7 @@ static uint64_t imx6_src_read(void *opaque, hwaddr offset, unsigned size) } - DPRINTF("reg[%s] => 0x%" PRIx32 "\n", imx6_src_reg_name(index), value); + trace_imx6_src_read(imx6_src_reg_name(index), value); return value; } @@ -134,8 +123,7 @@ static void imx6_clear_reset_bit(CPUState *cpu, run_on_cpu_data data) assert(bql_locked()); s->regs[SRC_SCR] = deposit32(s->regs[SRC_SCR], ri->reset_bit, 1, 0); - DPRINTF("reg[%s] <= 0x%" PRIx32 "\n", - imx6_src_reg_name(SRC_SCR), s->regs[SRC_SCR]); + trace_imx6_clear_reset_bit(imx6_src_reg_name(SRC_SCR), s->regs[SRC_SCR]); g_free(ri); } @@ -173,8 +161,7 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value, return; } - DPRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx6_src_reg_name(index), - (uint32_t)current_value); + trace_imx6_src_write(imx6_src_reg_name(index), value); change_mask = s->regs[index] ^ (uint32_t)current_value; diff --git a/hw/misc/npcm7xx_mft.c b/hw/misc/npcm7xx_mft.c index 9fcc69fe5c..e565cac05d 100644 --- a/hw/misc/npcm7xx_mft.c +++ b/hw/misc/npcm7xx_mft.c @@ -172,8 +172,9 @@ static NPCM7xxMFTCaptureState npcm7xx_mft_compute_cnt( * RPM = revolution/min. The time for one revlution (in ns) is * MINUTE_TO_NANOSECOND / RPM. */ - count = clock_ns_to_ticks(clock, (60 * NANOSECONDS_PER_SECOND) / - (rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION)); + count = clock_ns_to_ticks(clock, + (uint64_t)(60 * NANOSECONDS_PER_SECOND) / + ((uint64_t)rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION)); } if (count > NPCM7XX_MFT_MAX_CNT) { diff --git a/hw/misc/trace-events b/hw/misc/trace-events index 0f5d2b5666..cf1abe6928 100644 --- a/hw/misc/trace-events +++ b/hw/misc/trace-events @@ -253,6 +253,12 @@ ccm_clock_freq(uint32_t clock, uint32_t freq) "(Clock = %d) = %d" ccm_read_reg(const char *reg_name, uint32_t value) "reg[%s] <= 0x%" PRIx32 ccm_write_reg(const char *reg_name, uint32_t value) "reg[%s] => 0x%" PRIx32 +# imx6_src.c +imx6_src_read(const char *reg_name, uint32_t value) "reg[%s] => 0x%" PRIx32 +imx6_src_write(const char *reg_name, uint64_t value) "reg[%s] <= 0x%" PRIx64 +imx6_clear_reset_bit(const char *reg_name, uint32_t value) "reg[%s] <= 0x%" PRIx32 +imx6_src_reset(void) "" + # imx7_src.c imx7_src_read(const char *reg_name, uint32_t value) "reg[%s] => 0x%" PRIx32 imx7_src_write(const char *reg_name, uint32_t value) "reg[%s] <= 0x%" PRIx32 diff --git a/hw/misc/vmcoreinfo.c b/hw/misc/vmcoreinfo.c index b1fcc22e92..b0145fa504 100644 --- a/hw/misc/vmcoreinfo.c +++ b/hw/misc/vmcoreinfo.c @@ -26,9 +26,9 @@ static void fw_cfg_vmci_write(void *opaque, off_t offset, size_t len) && s->vmcoreinfo.guest_format != FW_CFG_VMCOREINFO_FORMAT_NONE; } -static void vmcoreinfo_reset(void *opaque) +static void vmcoreinfo_reset_hold(Object *obj, ResetType type) { - VMCoreInfoState *s = opaque; + VMCoreInfoState *s = VMCOREINFO(obj); s->has_vmcoreinfo = false; memset(&s->vmcoreinfo, 0, sizeof(s->vmcoreinfo)); @@ -47,13 +47,13 @@ static void vmcoreinfo_realize(DeviceState *dev, Error **errp) */ if (!vmcoreinfo_find()) { error_setg(errp, "at most one %s device is permitted", - VMCOREINFO_DEVICE); + TYPE_VMCOREINFO); return; } if (!fw_cfg || !fw_cfg->dma_enabled) { error_setg(errp, "%s device requires fw_cfg with DMA", - VMCOREINFO_DEVICE); + TYPE_VMCOREINFO); return; } @@ -65,7 +65,7 @@ static void vmcoreinfo_realize(DeviceState *dev, Error **errp) * This device requires to register a global reset because it is * not plugged to a bus (which, as its QOM parent, would reset it). */ - qemu_register_reset(vmcoreinfo_reset, s); + qemu_register_resettable(OBJECT(s)); vmcoreinfo_state = s; } @@ -86,16 +86,18 @@ static const VMStateDescription vmstate_vmcoreinfo = { static void vmcoreinfo_device_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); dc->vmsd = &vmstate_vmcoreinfo; dc->realize = vmcoreinfo_realize; dc->hotpluggable = false; set_bit(DEVICE_CATEGORY_MISC, dc->categories); + rc->phases.hold = vmcoreinfo_reset_hold; } static const TypeInfo vmcoreinfo_types[] = { { - .name = VMCOREINFO_DEVICE, + .name = TYPE_VMCOREINFO, .parent = TYPE_DEVICE, .instance_size = sizeof(VMCoreInfoState), .class_init = vmcoreinfo_device_class_init, diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 06f096abf6..85e14b788c 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3337,6 +3337,117 @@ static const VMStateDescription vmstate_virtio_net_rss = { }, }; +static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc; + struct vhost_net *net; + + if (!n->nic) { + return NULL; + } + + nc = qemu_get_queue(n->nic); + if (!nc) { + return NULL; + } + + net = get_vhost_net(nc->peer); + if (!net) { + return NULL; + } + + return &net->dev; +} + +static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, + JSONWriter *vmdesc) +{ + VirtIONet *n = pv; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + Error *local_error = NULL; + int ret; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + error_reportf_err(local_error, + "Error getting vhost back-end of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return -1; + } + + ret = vhost_save_backend_state(vhdev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error saving back-end state of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return ret; + } + + return 0; +} + +static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + VirtIONet *n = pv; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + Error *local_error = NULL; + int ret; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + error_reportf_err(local_error, + "Error getting vhost back-end of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return -1; + } + + ret = vhost_load_backend_state(vhdev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error loading back-end state of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return ret; + } + + return 0; +} + +static bool vhost_user_net_is_internal_migration(void *opaque) +{ + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + return false; + } + + return vhost_supports_device_state(vhdev); +} + +static const VMStateDescription vhost_user_net_backend_state = { + .name = "virtio-net-device/backend", + .version_id = 0, + .needed = vhost_user_net_is_internal_migration, + .fields = (const VMStateField[]) { + { + .name = "backend", + .info = &(const VMStateInfo) { + .name = "virtio-net vhost-user backend state", + .get = vhost_user_net_load_state, + .put = vhost_user_net_save_state, + }, + }, + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_net_device = { .name = "virtio-net-device", .version_id = VIRTIO_NET_VM_VERSION, @@ -3389,6 +3500,7 @@ static const VMStateDescription vmstate_virtio_net_device = { }, .subsections = (const VMStateDescription * const []) { &vmstate_virtio_net_rss, + &vhost_user_net_backend_state, NULL } }; @@ -3950,29 +4062,6 @@ static bool dev_unplug_pending(void *opaque) return vdc->primary_unplug_pending(dev); } -static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) -{ - VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc; - struct vhost_net *net; - - if (!n->nic) { - return NULL; - } - - nc = qemu_get_queue(n->nic); - if (!nc) { - return NULL; - } - - net = get_vhost_net(nc->peer); - if (!net) { - return NULL; - } - - return &net->dev; -} - static const VMStateDescription vmstate_virtio_net = { .name = "virtio-net", .minimum_version_id = VIRTIO_NET_VM_VERSION, diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 97ebd9fa30..5410039490 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -510,23 +510,22 @@ static char *xen_netdev_get_name(XenDevice *xendev, Error **errp) if (netdev->dev == -1) { XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); - char fe_path[XENSTORE_ABS_PATH_MAX + 1]; int idx = (xen_mode == XEN_EMULATE) ? 0 : 1; + Error *local_err = NULL; char *value; /* Theoretically we could go up to INT_MAX here but that's overkill */ while (idx < 100) { - snprintf(fe_path, sizeof(fe_path), - "/local/domain/%u/device/vif/%u", - xendev->frontend_id, idx); - value = qemu_xen_xs_read(xenbus->xsh, XBT_NULL, fe_path, NULL); + value = xs_node_read(xenbus->xsh, XBT_NULL, NULL, &local_err, + "/local/domain/%u/device/vif/%u", + xendev->frontend_id, idx); if (!value) { if (errno == ENOENT) { netdev->dev = idx; + error_free(local_err); goto found; } - error_setg(errp, "cannot read %s: %s", fe_path, - strerror(errno)); + error_propagate(errp, local_err); return NULL; } free(value); diff --git a/hw/net/xilinx_ethlite.c b/hw/net/xilinx_ethlite.c index 4c0c7fcae3..14bf2b2e17 100644 --- a/hw/net/xilinx_ethlite.c +++ b/hw/net/xilinx_ethlite.c @@ -2,6 +2,7 @@ * QEMU model of the Xilinx Ethernet Lite MAC. * * Copyright (c) 2009 Edgar E. Iglesias. + * Copyright (c) 2024 Linaro, Ltd * * DS580: https://docs.amd.com/v/u/en-US/xps_ethernetlite * LogiCORE IP XPS Ethernet Lite Media Access Controller @@ -27,28 +28,34 @@ #include "qemu/osdep.h" #include "qemu/module.h" +#include "qemu/bitops.h" #include "qom/object.h" -#include "exec/tswap.h" +#include "qapi/error.h" #include "hw/sysbus.h" #include "hw/irq.h" #include "hw/qdev-properties.h" +#include "hw/misc/unimp.h" #include "net/net.h" #include "trace.h" -#define R_TX_BUF0 0 #define BUFSZ_MAX 0x07e4 -#define R_TX_LEN0 (0x07f4 / 4) -#define R_TX_GIE0 (0x07f8 / 4) -#define R_TX_CTRL0 (0x07fc / 4) -#define R_TX_BUF1 (0x0800 / 4) -#define R_TX_LEN1 (0x0ff4 / 4) -#define R_TX_CTRL1 (0x0ffc / 4) - -#define R_RX_BUF0 (0x1000 / 4) -#define R_RX_CTRL0 (0x17fc / 4) -#define R_RX_BUF1 (0x1800 / 4) -#define R_RX_CTRL1 (0x1ffc / 4) -#define R_MAX (0x2000 / 4) +#define A_MDIO_BASE 0x07e4 +#define A_TX_BASE0 0x07f4 +#define A_TX_BASE1 0x0ff4 +#define A_RX_BASE0 0x17fc +#define A_RX_BASE1 0x1ffc + +enum { + TX_LEN = 0, + TX_GIE = 1, + TX_CTRL = 2, + TX_MAX +}; + +enum { + RX_CTRL = 0, + RX_MAX +}; #define GIE_GIE 0x80000000 @@ -56,6 +63,21 @@ #define CTRL_P 0x2 #define CTRL_S 0x1 +typedef struct XlnxXpsEthLitePort { + MemoryRegion txio; + MemoryRegion rxio; + MemoryRegion txbuf; + MemoryRegion rxbuf; + + struct { + uint32_t tx_len; + uint32_t tx_gie; + uint32_t tx_ctrl; + + uint32_t rx_ctrl; + } reg; +} XlnxXpsEthLitePort; + #define TYPE_XILINX_ETHLITE "xlnx.xps-ethernetlite" OBJECT_DECLARE_SIMPLE_TYPE(XlnxXpsEthLite, XILINX_ETHLITE) @@ -63,7 +85,7 @@ struct XlnxXpsEthLite { SysBusDevice parent_obj; - MemoryRegion mmio; + MemoryRegion container; qemu_irq irq; NICState *nic; NICConf conf; @@ -72,125 +94,176 @@ struct XlnxXpsEthLite uint32_t c_rx_pingpong; unsigned int port_index; /* dual port RAM index */ - uint32_t regs[R_MAX]; + UnimplementedDeviceState rsvd; + UnimplementedDeviceState mdio; + XlnxXpsEthLitePort port[2]; }; static inline void eth_pulse_irq(XlnxXpsEthLite *s) { /* Only the first gie reg is active. */ - if (s->regs[R_TX_GIE0] & GIE_GIE) { + if (s->port[0].reg.tx_gie & GIE_GIE) { qemu_irq_pulse(s->irq); } } -static uint64_t -eth_read(void *opaque, hwaddr addr, unsigned int size) +static unsigned addr_to_port_index(hwaddr addr) +{ + return extract64(addr, 11, 1); +} + +static void *txbuf_ptr(XlnxXpsEthLite *s, unsigned port_index) +{ + return memory_region_get_ram_ptr(&s->port[port_index].txbuf); +} + +static void *rxbuf_ptr(XlnxXpsEthLite *s, unsigned port_index) +{ + return memory_region_get_ram_ptr(&s->port[port_index].rxbuf); +} + +static uint64_t port_tx_read(void *opaque, hwaddr addr, unsigned int size) { XlnxXpsEthLite *s = opaque; + unsigned port_index = addr_to_port_index(addr); uint32_t r = 0; - addr >>= 2; - - switch (addr) - { - case R_TX_GIE0: - case R_TX_LEN0: - case R_TX_LEN1: - case R_TX_CTRL1: - case R_TX_CTRL0: - case R_RX_CTRL1: - case R_RX_CTRL0: - r = s->regs[addr]; - break; - - default: - r = tswap32(s->regs[addr]); - break; + switch (addr >> 2) { + case TX_LEN: + r = s->port[port_index].reg.tx_len; + break; + case TX_GIE: + r = s->port[port_index].reg.tx_gie; + break; + case TX_CTRL: + r = s->port[port_index].reg.tx_ctrl; + break; + default: + g_assert_not_reached(); } + return r; } -static void -eth_write(void *opaque, hwaddr addr, - uint64_t val64, unsigned int size) +static void port_tx_write(void *opaque, hwaddr addr, uint64_t value, + unsigned int size) { XlnxXpsEthLite *s = opaque; - unsigned int base = 0; - uint32_t value = val64; - - addr >>= 2; - switch (addr) - { - case R_TX_CTRL0: - case R_TX_CTRL1: - if (addr == R_TX_CTRL1) - base = 0x800 / 4; - - if ((value & (CTRL_P | CTRL_S)) == CTRL_S) { - qemu_send_packet(qemu_get_queue(s->nic), - (void *) &s->regs[base], - s->regs[base + R_TX_LEN0]); - if (s->regs[base + R_TX_CTRL0] & CTRL_I) - eth_pulse_irq(s); - } else if ((value & (CTRL_P | CTRL_S)) == (CTRL_P | CTRL_S)) { - memcpy(&s->conf.macaddr.a[0], &s->regs[base], 6); - if (s->regs[base + R_TX_CTRL0] & CTRL_I) - eth_pulse_irq(s); + unsigned port_index = addr_to_port_index(addr); + + switch (addr >> 2) { + case TX_LEN: + s->port[port_index].reg.tx_len = value; + break; + case TX_GIE: + s->port[port_index].reg.tx_gie = value; + break; + case TX_CTRL: + if ((value & (CTRL_P | CTRL_S)) == CTRL_S) { + qemu_send_packet(qemu_get_queue(s->nic), + txbuf_ptr(s, port_index), + s->port[port_index].reg.tx_len); + if (s->port[port_index].reg.tx_ctrl & CTRL_I) { + eth_pulse_irq(s); + } + } else if ((value & (CTRL_P | CTRL_S)) == (CTRL_P | CTRL_S)) { + memcpy(&s->conf.macaddr.a[0], txbuf_ptr(s, port_index), 6); + if (s->port[port_index].reg.tx_ctrl & CTRL_I) { + eth_pulse_irq(s); } + } + /* + * We are fast and get ready pretty much immediately + * so we actually never flip the S nor P bits to one. + */ + s->port[port_index].reg.tx_ctrl = value & ~(CTRL_P | CTRL_S); + break; + default: + g_assert_not_reached(); + } +} + +static const MemoryRegionOps eth_porttx_ops = { + .read = port_tx_read, + .write = port_tx_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; - /* We are fast and get ready pretty much immediately so - we actually never flip the S nor P bits to one. */ - s->regs[addr] = value & ~(CTRL_P | CTRL_S); - break; +static uint64_t port_rx_read(void *opaque, hwaddr addr, unsigned int size) +{ + XlnxXpsEthLite *s = opaque; + unsigned port_index = addr_to_port_index(addr); + uint32_t r = 0; - /* Keep these native. */ - case R_RX_CTRL0: - case R_RX_CTRL1: - if (!(value & CTRL_S)) { - qemu_flush_queued_packets(qemu_get_queue(s->nic)); - } - /* fall through */ - case R_TX_LEN0: - case R_TX_LEN1: - case R_TX_GIE0: - s->regs[addr] = value; - break; - - default: - s->regs[addr] = tswap32(value); - break; + switch (addr >> 2) { + case RX_CTRL: + r = s->port[port_index].reg.rx_ctrl; + break; + default: + g_assert_not_reached(); } + + return r; } -static const MemoryRegionOps eth_ops = { - .read = eth_read, - .write = eth_write, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 4, - .max_access_size = 4 +static void port_rx_write(void *opaque, hwaddr addr, uint64_t value, + unsigned int size) +{ + XlnxXpsEthLite *s = opaque; + unsigned port_index = addr_to_port_index(addr); + + switch (addr >> 2) { + case RX_CTRL: + if (!(value & CTRL_S)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + s->port[port_index].reg.rx_ctrl = value; + break; + default: + g_assert_not_reached(); } +} + +static const MemoryRegionOps eth_portrx_ops = { + .read = port_rx_read, + .write = port_rx_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, }; static bool eth_can_rx(NetClientState *nc) { XlnxXpsEthLite *s = qemu_get_nic_opaque(nc); - unsigned int rxbase = s->port_index * (0x800 / 4); - return !(s->regs[rxbase + R_RX_CTRL0] & CTRL_S); + return !(s->port[s->port_index].reg.rx_ctrl & CTRL_S); } static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size) { XlnxXpsEthLite *s = qemu_get_nic_opaque(nc); - unsigned int rxbase = s->port_index * (0x800 / 4); + unsigned int port_index = s->port_index; /* DA filter. */ if (!(buf[0] & 0x80) && memcmp(&s->conf.macaddr.a[0], buf, 6)) return size; - if (s->regs[rxbase + R_RX_CTRL0] & CTRL_S) { - trace_ethlite_pkt_lost(s->regs[R_RX_CTRL0]); + if (s->port[port_index].reg.rx_ctrl & CTRL_S) { + trace_ethlite_pkt_lost(s->port[port_index].reg.rx_ctrl); return -1; } @@ -198,10 +271,10 @@ static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size) trace_ethlite_pkt_size_too_big(size); return -1; } - memcpy(&s->regs[rxbase + R_RX_BUF0], buf, size); + memcpy(rxbuf_ptr(s, port_index), buf, size); - s->regs[rxbase + R_RX_CTRL0] |= CTRL_S; - if (s->regs[R_RX_CTRL0] & CTRL_I) { + s->port[port_index].reg.rx_ctrl |= CTRL_S; + if (s->port[port_index].reg.rx_ctrl & CTRL_I) { eth_pulse_irq(s); } @@ -228,6 +301,52 @@ static void xilinx_ethlite_realize(DeviceState *dev, Error **errp) { XlnxXpsEthLite *s = XILINX_ETHLITE(dev); + memory_region_init(&s->container, OBJECT(dev), + "xlnx.xps-ethernetlite", 0x2000); + + object_initialize_child(OBJECT(dev), "ethlite.reserved", &s->rsvd, + TYPE_UNIMPLEMENTED_DEVICE); + qdev_prop_set_string(DEVICE(&s->rsvd), "name", "ethlite.reserved"); + qdev_prop_set_uint64(DEVICE(&s->rsvd), "size", + memory_region_size(&s->container)); + sysbus_realize(SYS_BUS_DEVICE(&s->rsvd), &error_fatal); + memory_region_add_subregion_overlap(&s->container, 0, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->rsvd), 0), + -1); + + object_initialize_child(OBJECT(dev), "ethlite.mdio", &s->mdio, + TYPE_UNIMPLEMENTED_DEVICE); + qdev_prop_set_string(DEVICE(&s->mdio), "name", "ethlite.mdio"); + qdev_prop_set_uint64(DEVICE(&s->mdio), "size", 4 * 4); + sysbus_realize(SYS_BUS_DEVICE(&s->mdio), &error_fatal); + memory_region_add_subregion(&s->container, A_MDIO_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->mdio), 0)); + + for (unsigned i = 0; i < 2; i++) { + memory_region_init_ram(&s->port[i].txbuf, OBJECT(dev), + i ? "ethlite.tx[1]buf" : "ethlite.tx[0]buf", + BUFSZ_MAX, &error_abort); + memory_region_add_subregion(&s->container, 0x0800 * i, &s->port[i].txbuf); + memory_region_init_io(&s->port[i].txio, OBJECT(dev), + ð_porttx_ops, s, + i ? "ethlite.tx[1]io" : "ethlite.tx[0]io", + 4 * TX_MAX); + memory_region_add_subregion(&s->container, i ? A_TX_BASE1 : A_TX_BASE0, + &s->port[i].txio); + + memory_region_init_ram(&s->port[i].rxbuf, OBJECT(dev), + i ? "ethlite.rx[1]buf" : "ethlite.rx[0]buf", + BUFSZ_MAX, &error_abort); + memory_region_add_subregion(&s->container, 0x1000 + 0x0800 * i, + &s->port[i].rxbuf); + memory_region_init_io(&s->port[i].rxio, OBJECT(dev), + ð_portrx_ops, s, + i ? "ethlite.rx[1]io" : "ethlite.rx[0]io", + 4 * RX_MAX); + memory_region_add_subregion(&s->container, i ? A_RX_BASE1 : A_RX_BASE0, + &s->port[i].rxio); + } + qemu_macaddr_default_if_unset(&s->conf.macaddr); s->nic = qemu_new_nic(&net_xilinx_ethlite_info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, @@ -240,10 +359,7 @@ static void xilinx_ethlite_init(Object *obj) XlnxXpsEthLite *s = XILINX_ETHLITE(obj); sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); - - memory_region_init_io(&s->mmio, obj, ð_ops, s, - "xlnx.xps-ethernetlite", R_MAX * 4); - sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->container); } static const Property xilinx_ethlite_properties[] = { diff --git a/hw/pci/msix.c b/hw/pci/msix.c index d8a55a6474..57ec7084a4 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -250,7 +250,7 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, PCIDevice *dev = opaque; if (dev->msix_vector_poll_notifier) { unsigned vector_start = addr * 8; - unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr); + unsigned vector_end = MIN((addr + size) * 8, dev->msix_entries_nr); dev->msix_vector_poll_notifier(dev, vector_start, vector_end); } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index b6c630c323..2afa423925 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -85,7 +85,7 @@ static const Property pci_props[] = { DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), DEFINE_PROP_STRING("romfile", PCIDevice, romfile), DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, UINT32_MAX), - DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1), + DEFINE_PROP_INT32("rombar", PCIDevice, rom_bar, -1), DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present, QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false), DEFINE_PROP_BIT("x-pcie-lnksta-dllla", PCIDevice, cap_present, @@ -1598,7 +1598,7 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); - if (!d->has_power) { + if (!d->enabled) { new_addr = PCI_BAR_UNMAPPED; } @@ -1686,7 +1686,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int pci_update_irq_disabled(d, was_irq_disabled); memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->has_power); + & PCI_COMMAND_MASTER) && d->enabled); } msi_write_config(d, addr, val_in, l); @@ -2963,16 +2963,21 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) void pci_set_power(PCIDevice *d, bool state) { - if (d->has_power == state) { + pci_set_enabled(d, state); +} + +void pci_set_enabled(PCIDevice *d, bool state) +{ + if (d->enabled == state) { return; } - d->has_power = state; + d->enabled = state; pci_update_mappings(d); memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->has_power); - if (!d->has_power) { + & PCI_COMMAND_MASTER) && d->enabled); + if (!d->enabled) { pci_device_reset(d); } } diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index 4510890dfc..80f91f409f 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, * allowing direct removal of unexposed functions. */ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || - !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { + !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { return; } @@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, * allowing direct removal of unexposed functions. */ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || - !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { + !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { return ~0x0; } diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 0b455c8654..1b12db6fa2 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -1113,18 +1113,22 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev) if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) { lnksta &= ~PCI_EXP_LNKSTA_NLW; lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW; - } else if (!(lnksta & PCI_EXP_LNKSTA_NLW)) { - lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1); } if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) { lnksta &= ~PCI_EXP_LNKSTA_CLS; lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS; - } else if (!(lnksta & PCI_EXP_LNKSTA_CLS)) { - lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT); } } + if (!(lnksta & PCI_EXP_LNKSTA_NLW)) { + lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1); + } + + if (!(lnksta & PCI_EXP_LNKSTA_CLS)) { + lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT); + } + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta & diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c index 04ab1f6402..c48d5571b5 100644 --- a/hw/s390x/css-bridge.c +++ b/hw/s390x/css-bridge.c @@ -66,16 +66,8 @@ static char *virtual_css_bus_get_dev_path(DeviceState *dev) { CcwDevice *ccw_dev = CCW_DEVICE(dev); SubchDev *sch = ccw_dev->sch; - VirtualCssBridge *bridge = - VIRTUAL_CSS_BRIDGE(qdev_get_parent_bus(dev)->parent); - /* - * We can't provide a dev path for backward compatibility on - * older machines, as it is visible in the migration stream. - */ - return bridge->css_dev_path ? - g_strdup_printf("/%02x.%1x.%04x", sch->cssid, sch->ssid, sch->devno) : - NULL; + return g_strdup_printf("/%02x.%1x.%04x", sch->cssid, sch->ssid, sch->devno); } static void virtual_css_bus_class_init(ObjectClass *klass, void *data) @@ -120,11 +112,6 @@ VirtualCssBus *virtual_css_bus_init(void) /***************** Virtual-css Bus Bridge Device ********************/ -static const Property virtual_css_bridge_properties[] = { - DEFINE_PROP_BOOL("css_dev_path", VirtualCssBridge, css_dev_path, - true), -}; - static bool prop_get_true(Object *obj, Error **errp) { return true; @@ -137,7 +124,6 @@ static void virtual_css_bridge_class_init(ObjectClass *klass, void *data) hc->unplug = ccw_device_unplug; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - device_class_set_props(dc, virtual_css_bridge_properties); object_class_property_add_bool(klass, "cssid-unrestricted", prop_get_true, NULL); object_class_property_set_description(klass, "cssid-unrestricted", diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 3a946be7a5..4aa21c91fc 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -49,13 +49,6 @@ #define BIOS_MAX_SIZE 0x300000UL #define IPL_PSW_MASK (PSW_MASK_32 | PSW_MASK_64) -static bool iplb_extended_needed(void *opaque) -{ - S390IPLState *ipl = S390_IPL(object_resolve_path(TYPE_S390_IPL, NULL)); - - return ipl->iplbext_migration; -} - /* Place the IPLB chain immediately before the BIOS in memory */ static uint64_t find_iplb_chain_addr(uint64_t bios_addr, uint16_t count) { @@ -67,7 +60,6 @@ static const VMStateDescription vmstate_iplb_extended = { .name = "ipl/iplb_extended", .version_id = 0, .minimum_version_id = 0, - .needed = iplb_extended_needed, .fields = (const VMStateField[]) { VMSTATE_UINT8_ARRAY(reserved_ext, IplParameterBlock, 4096 - 200), VMSTATE_END_OF_LIST() @@ -297,8 +289,6 @@ static const Property s390_ipl_properties[] = { DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline), DEFINE_PROP_STRING("firmware", S390IPLState, firmware), DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false), - DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration, - true), }; static void s390_ipl_set_boot_menu(S390IPLState *ipl) diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h index d7d0b7bfd2..8e3882d506 100644 --- a/hw/s390x/ipl.h +++ b/hw/s390x/ipl.h @@ -80,7 +80,6 @@ struct S390IPLState { uint8_t cssid; uint8_t ssid; uint16_t devno; - bool iplbext_migration; }; QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong"); diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c index dda96ea32a..995817f4a3 100644 --- a/hw/s390x/s390-skeys.c +++ b/hw/s390x/s390-skeys.c @@ -469,23 +469,15 @@ static void s390_skeys_realize(DeviceState *dev, Error **errp) { S390SKeysState *ss = S390_SKEYS(dev); - if (ss->migration_enabled) { - register_savevm_live(TYPE_S390_SKEYS, 0, 1, - &savevm_s390_storage_keys, ss); - } + register_savevm_live(TYPE_S390_SKEYS, 0, 1, &savevm_s390_storage_keys, ss); } -static const Property s390_skeys_props[] = { - DEFINE_PROP_BOOL("migration-enabled", S390SKeysState, migration_enabled, true), -}; - static void s390_skeys_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); dc->hotpluggable = false; dc->realize = s390_skeys_realize; - device_class_set_props(dc, s390_skeys_props); set_bit(DEVICE_CATEGORY_MISC, dc->categories); } diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 2be8da2913..38aeba14ee 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -725,10 +725,9 @@ static S390CcwMachineClass *current_mc; * various "*_allowed" variables are enabled, so that the *_allowed() wrappers * below return the correct default value for the "none" machine. * - * Attention! Do *not* add additional new wrappers for CPU features (e.g. like - * the ri_allowed() wrapper) via this mechanism anymore. CPU features should - * be handled via the CPU models, i.e. checking with cpu_model_allowed() during - * CPU initialization and s390_has_feat() later should be sufficient. + * Attention! Do *not* add additional new wrappers for CPU features via this + * mechanism anymore. CPU features should be handled via the CPU models, + * i.e. checking with s390_has_feat() should be sufficient. */ static S390CcwMachineClass *get_machine_class(void) { @@ -744,16 +743,6 @@ static S390CcwMachineClass *get_machine_class(void) return current_mc; } -bool ri_allowed(void) -{ - return get_machine_class()->ri_allowed; -} - -bool cpu_model_allowed(void) -{ - return get_machine_class()->cpu_model_allowed; -} - bool hpage_1m_allowed(void) { return get_machine_class()->hpage_1m_allowed; @@ -791,8 +780,6 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data) HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); - s390mc->ri_allowed = true; - s390mc->cpu_model_allowed = true; s390mc->hpage_1m_allowed = true; s390mc->max_threads = 1; mc->init = ccw_init; @@ -1257,6 +1244,7 @@ static void ccw_machine_2_9_instance_options(MachineState *machine) s390_cpudef_featoff_greater(12, 1, S390_FEAT_ZPCI); s390_cpudef_featoff_greater(12, 1, S390_FEAT_ADAPTER_INT_SUPPRESSION); s390_cpudef_featoff_greater(12, 1, S390_FEAT_ADAPTER_EVENT_NOTIFICATION); + css_migration_enabled = false; } static void ccw_machine_2_9_class_options(MachineClass *mc) @@ -1269,99 +1257,9 @@ static void ccw_machine_2_9_class_options(MachineClass *mc) ccw_machine_2_10_class_options(mc); compat_props_add(mc->compat_props, hw_compat_2_9, hw_compat_2_9_len); compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - css_migration_enabled = false; } DEFINE_CCW_MACHINE(2, 9); -static void ccw_machine_2_8_instance_options(MachineState *machine) -{ - ccw_machine_2_9_instance_options(machine); -} - -static void ccw_machine_2_8_class_options(MachineClass *mc) -{ - static GlobalProperty compat[] = { - { TYPE_S390_FLIC_COMMON, "adapter_routes_max_batch", "64", }, - }; - - ccw_machine_2_9_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_8, hw_compat_2_8_len); - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); -} -DEFINE_CCW_MACHINE(2, 8); - -static void ccw_machine_2_7_instance_options(MachineState *machine) -{ - ccw_machine_2_8_instance_options(machine); -} - -static void ccw_machine_2_7_class_options(MachineClass *mc) -{ - S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); - - s390mc->cpu_model_allowed = false; - ccw_machine_2_8_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_7, hw_compat_2_7_len); -} -DEFINE_CCW_MACHINE(2, 7); - -static void ccw_machine_2_6_instance_options(MachineState *machine) -{ - ccw_machine_2_7_instance_options(machine); -} - -static void ccw_machine_2_6_class_options(MachineClass *mc) -{ - S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); - static GlobalProperty compat[] = { - { TYPE_S390_IPL, "iplbext_migration", "off", }, - { TYPE_VIRTUAL_CSS_BRIDGE, "css_dev_path", "off", }, - }; - - s390mc->ri_allowed = false; - ccw_machine_2_7_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_6, hw_compat_2_6_len); - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); -} -DEFINE_CCW_MACHINE(2, 6); - -static void ccw_machine_2_5_instance_options(MachineState *machine) -{ - ccw_machine_2_6_instance_options(machine); -} - -static void ccw_machine_2_5_class_options(MachineClass *mc) -{ - ccw_machine_2_6_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_5, hw_compat_2_5_len); -} -DEFINE_CCW_MACHINE(2, 5); - -static void ccw_machine_2_4_instance_options(MachineState *machine) -{ - ccw_machine_2_5_instance_options(machine); -} - -static void ccw_machine_2_4_class_options(MachineClass *mc) -{ - static GlobalProperty compat[] = { - { TYPE_S390_SKEYS, "migration-enabled", "off", }, - { "virtio-blk-ccw", "max_revision", "0", }, - { "virtio-balloon-ccw", "max_revision", "0", }, - { "virtio-serial-ccw", "max_revision", "0", }, - { "virtio-9p-ccw", "max_revision", "0", }, - { "virtio-rng-ccw", "max_revision", "0", }, - { "virtio-net-ccw", "max_revision", "0", }, - { "virtio-scsi-ccw", "max_revision", "0", }, - { "vhost-scsi-ccw", "max_revision", "0", }, - }; - - ccw_machine_2_5_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_4, hw_compat_2_4_len); - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); -} -DEFINE_CCW_MACHINE(2, 4); - #endif static void ccw_machine_register_types(void) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 7cbce4766a..43f3b162c8 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -1157,7 +1157,6 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp) CcwDevice *ccw_dev = CCW_DEVICE(d); SubchDev *sch = ccw_dev->sch; int n = virtio_get_num_queues(vdev); - S390FLICState *flic = s390_get_flic(); if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) { dev->max_rev = 0; @@ -1184,10 +1183,10 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp) VIRTIO_QUEUE_MAX); return; } - if (virtio_get_num_queues(vdev) > flic->adapter_routes_max_batch) { + if (virtio_get_num_queues(vdev) > ADAPTER_ROUTES_MAX_GSI) { error_setg(errp, "The number of virtqueues %d " "exceeds flic adapter route limit %d", n, - flic->adapter_routes_max_batch); + ADAPTER_ROUTES_MAX_GSI); return; } diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index 299cd4bc1b..318587ff57 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -665,12 +665,13 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s) } } + if (s->norintstsen & SDHC_NISEN_DMA) { + s->norintsts |= SDHC_NIS_DMA; + } + if (s->blkcnt == 0) { sdhci_end_transfer(s); } else { - if (s->norintstsen & SDHC_NISEN_DMA) { - s->norintsts |= SDHC_NIS_DMA; - } sdhci_update_irq(s); } } @@ -691,9 +692,22 @@ static void sdhci_sdma_transfer_single_block(SDHCIState *s) } s->blkcnt--; + if (s->norintstsen & SDHC_NISEN_DMA) { + s->norintsts |= SDHC_NIS_DMA; + } + sdhci_end_transfer(s); } +static void sdhci_sdma_transfer(SDHCIState *s) +{ + if ((s->blkcnt == 1) || !(s->trnmod & SDHC_TRNS_MULTI)) { + sdhci_sdma_transfer_single_block(s); + } else { + sdhci_sdma_transfer_multi_blocks(s); + } +} + typedef struct ADMADescr { hwaddr addr; uint16_t length; @@ -925,12 +939,7 @@ static void sdhci_data_transfer(void *opaque) if (s->trnmod & SDHC_TRNS_DMA) { switch (SDHC_DMA_TYPE(s->hostctl1)) { case SDHC_CTRL_SDMA: - if ((s->blkcnt == 1) || !(s->trnmod & SDHC_TRNS_MULTI)) { - sdhci_sdma_transfer_single_block(s); - } else { - sdhci_sdma_transfer_multi_blocks(s); - } - + sdhci_sdma_transfer(s); break; case SDHC_CTRL_ADMA1_32: if (!(s->capareg & R_SDHC_CAPAB_ADMA1_MASK)) { @@ -1174,11 +1183,7 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) if (!(mask & 0xFF000000) && s->blkcnt && (s->blksize & BLOCK_SIZE_MASK) && SDHC_DMA_TYPE(s->hostctl1) == SDHC_CTRL_SDMA) { - if (s->trnmod & SDHC_TRNS_MULTI) { - sdhci_sdma_transfer_multi_blocks(s); - } else { - sdhci_sdma_transfer_single_block(s); - } + sdhci_sdma_transfer(s); } } break; diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index 217a69e4d5..e070360a2c 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -974,7 +974,7 @@ static void sun4m_hw_init(MachineState *machine) sysbus_mmio_map(s, 0, hwdef->ms_kb_base); /* Logically OR both its IRQs together */ - ms_kb_orgate = DEVICE(object_new(TYPE_OR_IRQ)); + ms_kb_orgate = qdev_new(TYPE_OR_IRQ); object_property_set_int(OBJECT(ms_kb_orgate), "num-lines", 2, &error_fatal); qdev_realize_and_unref(ms_kb_orgate, NULL, &error_fatal); sysbus_connect_irq(s, 0, qdev_get_gpio_in(ms_kb_orgate, 0)); @@ -995,7 +995,7 @@ static void sun4m_hw_init(MachineState *machine) sysbus_mmio_map(s, 0, hwdef->serial_base); /* Logically OR both its IRQs together */ - serial_orgate = DEVICE(object_new(TYPE_OR_IRQ)); + serial_orgate = qdev_new(TYPE_OR_IRQ); object_property_set_int(OBJECT(serial_orgate), "num-lines", 2, &error_fatal); qdev_realize_and_unref(serial_orgate, NULL, &error_fatal); diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c index 2663a9d9ef..11eca9fa4d 100644 --- a/hw/timer/imx_gpt.c +++ b/hw/timer/imx_gpt.c @@ -20,10 +20,6 @@ #include "qemu/log.h" #include "trace.h" -#ifndef DEBUG_IMX_GPT -#define DEBUG_IMX_GPT 0 -#endif - static const char *imx_gpt_reg_name(uint32_t reg) { switch (reg) { diff --git a/hw/timer/xilinx_timer.c b/hw/timer/xilinx_timer.c index 4955fe1b01..6595cf5f51 100644 --- a/hw/timer/xilinx_timer.c +++ b/hw/timer/xilinx_timer.c @@ -193,6 +193,10 @@ static const MemoryRegionOps timer_ops = { .read = timer_read, .write = timer_write, .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, .valid = { .min_access_size = 4, .max_access_size = 4 diff --git a/hw/tricore/triboard.c b/hw/tricore/triboard.c index 4dba0259cd..9cc8d282ff 100644 --- a/hw/tricore/triboard.c +++ b/hw/tricore/triboard.c @@ -31,11 +31,10 @@ #include "hw/tricore/triboard.h" #include "hw/tricore/tc27x_soc.h" -static void tricore_load_kernel(const char *kernel_filename) +static void tricore_load_kernel(TriCoreCPU *cpu, const char *kernel_filename) { uint64_t entry; long kernel_size; - TriCoreCPU *cpu; CPUTriCoreState *env; kernel_size = load_elf(kernel_filename, NULL, @@ -46,7 +45,6 @@ static void tricore_load_kernel(const char *kernel_filename) error_report("no kernel file '%s'", kernel_filename); exit(1); } - cpu = TRICORE_CPU(first_cpu); env = &cpu->env; env->PC = entry; } @@ -62,7 +60,7 @@ static void triboard_machine_init(MachineState *machine) sysbus_realize(SYS_BUS_DEVICE(&ms->tc27x_soc), &error_fatal); if (machine->kernel_filename) { - tricore_load_kernel(machine->kernel_filename); + tricore_load_kernel(&ms->tc27x_soc.cpu, machine->kernel_filename); } } diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c index 8d26d13791..428fe927ad 100644 --- a/hw/ufs/ufs.c +++ b/hw/ufs/ufs.c @@ -1164,7 +1164,7 @@ static QueryRespCode ufs_exec_query_attr(UfsRequest *req, int op) value = ufs_read_attr_value(u, idn); ret = UFS_QUERY_RESULT_SUCCESS; } else { - value = req->req_upiu.qr.value; + value = be32_to_cpu(req->req_upiu.qr.value); ret = ufs_write_attr_value(u, idn, value); } req->rsp_upiu.qr.value = cpu_to_be32(value); diff --git a/hw/usb/bus.c b/hw/usb/bus.c index b19b0b13eb..f45b82c776 100644 --- a/hw/usb/bus.c +++ b/hw/usb/bus.c @@ -411,7 +411,7 @@ void usb_claim_port(USBDevice *dev, Error **errp) } else { if (bus->nfree == 1 && strcmp(object_get_typename(OBJECT(dev)), "usb-hub") != 0) { /* Create a new hub and chain it on */ - hub = usb_try_new("usb-hub"); + hub = USB_DEVICE(qdev_try_new("usb-hub")); if (hub) { usb_realize_and_unref(hub, bus, NULL); } @@ -662,7 +662,8 @@ USBDevice *usbdevice_create(const char *driver) return NULL; } - dev = f->usbdevice_init ? f->usbdevice_init() : usb_new(f->name); + dev = f->usbdevice_init ? f->usbdevice_init() + : USB_DEVICE(qdev_new(f->name)); if (!dev) { error_report("Failed to create USB device '%s'", f->name); return NULL; diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c index a0821db902..aa50a92e26 100644 --- a/hw/usb/dev-serial.c +++ b/hw/usb/dev-serial.c @@ -624,7 +624,7 @@ static USBDevice *usb_braille_init(void) return NULL; } - dev = usb_new("usb-braille"); + dev = USB_DEVICE(qdev_new("usb-braille")); qdev_prop_set_chr(&dev->qdev, "chardev", cdrv); return dev; } diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c index a069b42338..49642aab58 100644 --- a/hw/usb/hcd-xhci-pci.c +++ b/hw/usb/hcd-xhci-pci.c @@ -74,6 +74,7 @@ static bool xhci_pci_intr_raise(XHCIState *xhci, int n, bool level) } if (msi_enabled(pci_dev) && level) { + n %= msi_nr_vectors_allocated(pci_dev); msi_notify(pci_dev, n); return true; } diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 7dc0994c89..00d5bc3779 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -644,6 +644,10 @@ static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v) dma_addr_t erdp; unsigned int dp_idx; + if (xhci->numintrs == 1) { + v = 0; + } + if (v >= xhci->numintrs) { DPRINTF("intr nr out of range (%d >= %d)\n", v, xhci->numintrs); return; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 17080b9dc0..ab17a98ee5 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1012,7 +1012,6 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) { uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; - DeviceState *dev = DEVICE(vdev); char *name; int fd = vdev->vbasedev.fd; @@ -1046,12 +1045,12 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) } if (vfio_opt_rom_in_denylist(vdev)) { - if (dev->opts && qdict_haskey(dev->opts, "rombar")) { + if (vdev->pdev.rom_bar > 0) { warn_report("Device at %s is known to cause system instability" " issues during option rom execution", vdev->vbasedev.name); error_printf("Proceeding anyway since user specified" - " non zero value for rombar\n"); + " positive value for rombar\n"); } else { warn_report("Rom loading for device at %s has been disabled" " due to system instability issues", diff --git a/hw/xen/trace-events b/hw/xen/trace-events index a07fe41c6d..b67942d07b 100644 --- a/hw/xen/trace-events +++ b/hw/xen/trace-events @@ -38,7 +38,7 @@ xen_device_remove_watch(const char *type, char *name, const char *node, const ch xs_node_create(const char *node) "%s" xs_node_destroy(const char *node) "%s" xs_node_vprintf(char *path, char *value) "%s %s" -xs_node_vscanf(char *path, char *value) "%s %s" +xs_node_read(const char *path, const char *value) "%s %s" xs_node_watch(char *path) "%s" xs_node_unwatch(char *path) "%s" diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c index b2b2cc9c5d..288fad422b 100644 --- a/hw/xen/xen-bus-helper.c +++ b/hw/xen/xen-bus-helper.c @@ -105,25 +105,22 @@ int xs_node_vscanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, va_list ap) { - char *path, *value; + char *value; int rc; - path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : - g_strdup(key); - value = qemu_xen_xs_read(h, tid, path, NULL); - - trace_xs_node_vscanf(path, value); + if (node && strlen(node) != 0) { + value = xs_node_read(h, tid, NULL, errp, "%s/%s", node, key); + } else { + value = xs_node_read(h, tid, NULL, errp, "%s", key); + } if (value) { rc = vsscanf(value, fmt, ap); } else { - error_setg_errno(errp, errno, "failed to read from '%s'", - path); rc = EOF; } free(value); - g_free(path); return rc; } @@ -142,6 +139,28 @@ int xs_node_scanf(struct qemu_xs_handle *h, xs_transaction_t tid, return rc; } +char *xs_node_read(struct qemu_xs_handle *h, xs_transaction_t tid, + unsigned int *len, Error **errp, + const char *path_fmt, ...) +{ + char *path, *value; + va_list ap; + + va_start(ap, path_fmt); + path = g_strdup_vprintf(path_fmt, ap); + va_end(ap); + + value = qemu_xen_xs_read(h, tid, path, len); + trace_xs_node_read(path, value); + if (!value) { + error_setg_errno(errp, errno, "failed to read from '%s'", path); + } + + g_free(path); + + return value; +} + struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node, const char *key, xs_watch_fn fn, void *opaque, Error **errp) diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index adfc4efad0..feeb612681 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -156,8 +156,8 @@ again: !strcmp(key[i], "hotplug-status")) continue; - if (xs_node_scanf(xenbus->xsh, tid, path, key[i], NULL, "%ms", - &val) == 1) { + val = xs_node_read(xenbus->xsh, tid, NULL, NULL, "%s/%s", path, key[i]); + if (val) { qdict_put_str(opts, key[i], val); free(val); } @@ -650,6 +650,16 @@ int xen_device_frontend_scanf(XenDevice *xendev, const char *key, return rc; } +char *xen_device_frontend_read(XenDevice *xendev, const char *key) +{ + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + + g_assert(xenbus->xsh); + + return xs_node_read(xenbus->xsh, XBT_NULL, NULL, NULL, "%s/%s", + xendev->frontend_path, key); +} + static void xen_device_frontend_set_state(XenDevice *xendev, enum xenbus_state state, bool publish) diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c index c5ad71e8dc..c9143ba259 100644 --- a/hw/xen/xen_pvdev.c +++ b/hw/xen/xen_pvdev.c @@ -22,6 +22,7 @@ #include "qemu/main-loop.h" #include "hw/qdev-core.h" #include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen-bus-helper.h" #include "hw/xen/xen_pvdev.h" /* private */ @@ -81,12 +82,9 @@ int xenstore_write_str(const char *base, const char *node, const char *val) char *xenstore_read_str(const char *base, const char *node) { - char abspath[XEN_BUFSIZE]; - unsigned int len; char *str, *ret = NULL; - snprintf(abspath, sizeof(abspath), "%s/%s", base, node); - str = qemu_xen_xs_read(xenstore, 0, abspath, &len); + str = xs_node_read(xenstore, 0, NULL, NULL, "%s/%s", base, node); if (str != NULL) { /* move to qemu-allocated memory to make sure * callers can safely g_free() stuff. */ |