diff options
Diffstat (limited to 'hw')
54 files changed, 2156 insertions, 544 deletions
diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c index 3fc4b14c26..c6c61bb9cd 100644 --- a/hw/acpi/acpi-cpu-hotplug-stub.c +++ b/hw/acpi/acpi-cpu-hotplug-stub.c @@ -19,6 +19,12 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, return; } +void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + CPUHotplugState *state, hwaddr base_addr) +{ + return; +} + void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list) { return; diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 2d81c1e790..5cb60ca8bc 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -7,7 +7,6 @@ #include "trace.h" #include "sysemu/numa.h" -#define ACPI_CPU_HOTPLUG_REG_LEN 12 #define ACPI_CPU_SELECTOR_OFFSET_WR 0 #define ACPI_CPU_FLAGS_OFFSET_RW 4 #define ACPI_CPU_CMD_OFFSET_WR 5 @@ -339,9 +338,10 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_FW_EJECT_EVENT "CEJF" void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - build_madt_cpu_fn build_madt_cpu, hwaddr io_base, + build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, const char *res_root, - const char *event_handler_method) + const char *event_handler_method, + AmlRegionSpace rs) { Aml *ifctx; Aml *field; @@ -365,14 +365,22 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_name_decl("_UID", aml_string("CPU Hotplug resources"))); aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0)); + assert((rs == AML_SYSTEM_IO) || (rs == AML_SYSTEM_MEMORY)); + crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1, + if (rs == AML_SYSTEM_IO) { + aml_append(crs, aml_io(AML_DECODE16, base_addr, base_addr, 1, ACPI_CPU_HOTPLUG_REG_LEN)); + } else if (rs == AML_SYSTEM_MEMORY) { + aml_append(crs, aml_memory32_fixed(base_addr, + ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE)); + } + aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs)); /* declare CPU hotplug MMIO region with related access fields */ aml_append(cpu_ctrl_dev, - aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base), + aml_operation_region("PRST", rs, aml_int(base_addr), ACPI_CPU_HOTPLUG_REG_LEN)); field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 2d6e91b124..15b4c3ebbf 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -25,6 +25,7 @@ static const uint32_t ged_supported_events[] = { ACPI_GED_MEM_HOTPLUG_EVT, ACPI_GED_PWR_DOWN_EVT, ACPI_GED_NVDIMM_HOTPLUG_EVT, + ACPI_GED_CPU_HOTPLUG_EVT, }; /* @@ -107,6 +108,9 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "." MEMORY_SLOT_SCAN_METHOD)); break; + case ACPI_GED_CPU_HOTPLUG_EVT: + aml_append(if_ctx, aml_call0(AML_GED_EVT_CPU_SCAN_METHOD)); + break; case ACPI_GED_PWR_DOWN_EVT: aml_append(if_ctx, aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), @@ -234,6 +238,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, } else { acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); } + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); } else { error_setg(errp, "virt: device plug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -248,6 +254,8 @@ static void acpi_ged_unplug_request_cb(HotplugHandler *hotplug_dev, if ((object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && !(object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)))) { acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); } else { error_setg(errp, "acpi: device unplug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -261,6 +269,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { acpi_memory_unplug_cb(&s->memhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); } else { error_setg(errp, "acpi: device unplug for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -272,6 +282,7 @@ static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) AcpiGedState *s = ACPI_GED(adev); acpi_memory_ospm_status(&s->memhp_state, list); + acpi_cpu_ospm_status(&s->cpuhp_state, list); } static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) @@ -286,6 +297,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) sel = ACPI_GED_PWR_DOWN_EVT; } else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) { sel = ACPI_GED_NVDIMM_HOTPLUG_EVT; + } else if (ev & ACPI_CPU_HOTPLUG_STATUS) { + sel = ACPI_GED_CPU_HOTPLUG_EVT; } else { /* Unknown event. Return without generating interrupt. */ warn_report("GED: Unsupported event %d. No irq injected", ev); @@ -371,6 +384,42 @@ static const VMStateDescription vmstate_acpi_ged = { } }; +static void acpi_ged_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + AcpiGedState *s = ACPI_GED(dev); + uint32_t ged_events; + int i; + + ged_events = ctpop32(s->ged_event_bitmap); + + for (i = 0; i < ARRAY_SIZE(ged_supported_events) && ged_events; i++) { + uint32_t event = s->ged_event_bitmap & ged_supported_events[i]; + + if (!event) { + continue; + } + + switch (event) { + case ACPI_GED_CPU_HOTPLUG_EVT: + /* initialize CPU Hotplug related regions */ + memory_region_init(&s->container_cpuhp, OBJECT(dev), + "cpuhp container", + ACPI_CPU_HOTPLUG_REG_LEN); + sysbus_init_mmio(sbd, &s->container_cpuhp); + cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), + &s->cpuhp_state, 0); + break; + } + ged_events--; + } + + if (ged_events) { + error_report("Unsupported events specified"); + abort(); + } +} + static void acpi_ged_initfn(Object *obj) { DeviceState *dev = DEVICE(obj); @@ -411,6 +460,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) dc->desc = "ACPI Generic Event Device"; device_class_set_props(dc, acpi_ged_properties); dc->vmsd = &vmstate_acpi_ged; + dc->realize = acpi_ged_realize; hc->plug = acpi_ged_device_plug_cb; hc->unplug_request = acpi_ged_unplug_request_cb; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index b0c68d66a3..719e83e6a1 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -3308,6 +3308,7 @@ DEFINE_VIRT_MACHINE_AS_LATEST(9, 1) static void virt_machine_9_0_options(MachineClass *mc) { virt_machine_9_1_options(mc); + mc->smbios_memory_device_size = 16 * GiB; compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); } DEFINE_VIRT_MACHINE(9, 0) diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c index 5993f4f040..e5196aa4bb 100644 --- a/hw/audio/virtio-snd.c +++ b/hw/audio/virtio-snd.c @@ -282,11 +282,13 @@ uint32_t virtio_snd_set_pcm_params(VirtIOSound *s, error_report("Number of channels is not supported."); return cpu_to_le32(VIRTIO_SND_S_NOT_SUPP); } - if (!(supported_formats & BIT(params->format))) { + if (BIT(params->format) > sizeof(supported_formats) || + !(supported_formats & BIT(params->format))) { error_report("Stream format is not supported."); return cpu_to_le32(VIRTIO_SND_S_NOT_SUPP); } - if (!(supported_rates & BIT(params->rate))) { + if (BIT(params->rate) > sizeof(supported_rates) || + !(supported_rates & BIT(params->rate))) { error_report("Stream rate is not supported."); return cpu_to_le32(VIRTIO_SND_S_NOT_SUPP); } @@ -1261,7 +1263,7 @@ static void virtio_snd_pcm_in_cb(void *data, int available) { VirtIOSoundPCMStream *stream = data; VirtIOSoundPCMBuffer *buffer; - size_t size; + size_t size, max_size; WITH_QEMU_LOCK_GUARD(&stream->queue_mutex) { while (!QSIMPLEQ_EMPTY(&stream->queue)) { @@ -1275,7 +1277,12 @@ static void virtio_snd_pcm_in_cb(void *data, int available) continue; } + max_size = iov_size(buffer->elem->in_sg, buffer->elem->in_num); for (;;) { + if (buffer->size >= max_size) { + return_rx_buffer(stream, buffer); + break; + } size = AUD_read(stream->voice.in, buffer->data + buffer->size, MIN(available, (stream->params.period_bytes - diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index fdbc30b9ce..5b7f46bbb0 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -51,6 +51,7 @@ static const int user_feature_bits[] = { VIRTIO_F_RING_PACKED, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/char/goldfish_tty.c b/hw/char/goldfish_tty.c index f8ff043c39..c2e1f6537f 100644 --- a/hw/char/goldfish_tty.c +++ b/hw/char/goldfish_tty.c @@ -16,6 +16,7 @@ #include "qemu/log.h" #include "trace.h" #include "exec/address-spaces.h" +#include "sysemu/dma.h" #include "hw/char/goldfish_tty.h" #define GOLDFISH_TTY_VERSION 1 @@ -69,7 +70,6 @@ static uint64_t goldfish_tty_read(void *opaque, hwaddr addr, static void goldfish_tty_cmd(GoldfishTTYState *s, uint32_t cmd) { uint32_t to_copy; - uint8_t *buf; uint8_t data_out[GOLFISH_TTY_BUFFER_SIZE]; int len; uint64_t ptr; @@ -97,8 +97,8 @@ static void goldfish_tty_cmd(GoldfishTTYState *s, uint32_t cmd) while (len) { to_copy = MIN(GOLFISH_TTY_BUFFER_SIZE, len); - address_space_rw(&address_space_memory, ptr, - MEMTXATTRS_UNSPECIFIED, data_out, to_copy, 0); + dma_memory_read_relaxed(&address_space_memory, ptr, + data_out, to_copy); qemu_chr_fe_write_all(&s->chr, data_out, to_copy); len -= to_copy; @@ -109,9 +109,9 @@ static void goldfish_tty_cmd(GoldfishTTYState *s, uint32_t cmd) len = s->data_len; ptr = s->data_ptr; while (len && !fifo8_is_empty(&s->rx_fifo)) { - buf = (uint8_t *)fifo8_pop_buf(&s->rx_fifo, len, &to_copy); - address_space_rw(&address_space_memory, ptr, - MEMTXATTRS_UNSPECIFIED, buf, to_copy, 1); + const uint8_t *buf = fifo8_pop_bufptr(&s->rx_fifo, len, &to_copy); + + dma_memory_write_relaxed(&address_space_memory, ptr, buf, to_copy); len -= to_copy; ptr += to_copy; diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c index d2e3e4570a..7982ecd39a 100644 --- a/hw/core/cpu-common.c +++ b/hw/core/cpu-common.c @@ -282,7 +282,10 @@ static void cpu_common_finalize(Object *obj) } #endif free_queued_cpu_work(cpu); - g_array_free(cpu->gdb_regs, TRUE); + /* If cleanup didn't happen in context to gdb_unregister_coprocessor_all */ + if (cpu->gdb_regs) { + g_array_free(cpu->gdb_regs, TRUE); + } qemu_lockcnt_destroy(&cpu->in_ioctl_lock); qemu_mutex_destroy(&cpu->work_mutex); qemu_cond_destroy(cpu->halt_cond); diff --git a/hw/core/machine.c b/hw/core/machine.c index 8a878f84d7..27dcda0248 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -1005,6 +1005,12 @@ static void machine_class_init(ObjectClass *oc, void *data) /* Default 128 MB as guest ram size */ mc->default_ram_size = 128 * MiB; mc->rom_file_has_mr = true; + /* + * SMBIOS 3.1.0 7.18.5 Memory Device — Extended Size + * use max possible value that could be encoded into + * 'Extended Size' field (2047Tb). + */ + mc->smbios_memory_device_size = 2047 * TiB; /* numa node memory size aligned on 8MB by default. * On Linux, each node's border has to be 8MB aligned diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c index d397718b1b..12dee2e467 100644 --- a/hw/cxl/cxl-events.c +++ b/hw/cxl/cxl-events.c @@ -139,6 +139,19 @@ bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type, return cxl_event_count(log) == 1; } +void cxl_discard_all_event_records(CXLDeviceState *cxlds) +{ + CXLEventLogType log_type; + CXLEventLog *log; + + for (log_type = 0; log_type < CXL_EVENT_TYPE_MAX; log_type++) { + log = &cxlds->event_logs[log_type]; + while (!cxl_event_empty(log)) { + cxl_event_delete_head(cxlds, log_type, log); + } + } +} + CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, uint8_t log_type, int max_recs, size_t *len) diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index c5f5fcfd64..e9f2543c43 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -315,7 +315,8 @@ static void machine_set_cxl(Object *obj, Visitor *v, const char *name, static void machine_get_cfmw(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - CXLFixedMemoryWindowOptionsList **list = opaque; + CXLState *state = opaque; + CXLFixedMemoryWindowOptionsList **list = &state->cfmw_list; visit_type_CXLFixedMemoryWindowOptionsList(v, name, list, errp); } diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 74eeb6fde7..3ebbd32e10 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -12,6 +12,7 @@ #include "hw/pci/msix.h" #include "hw/cxl/cxl.h" #include "hw/cxl/cxl_events.h" +#include "hw/cxl/cxl_mailbox.h" #include "hw/pci/pci.h" #include "hw/pci-bridge/cxl_upstream_port.h" #include "qemu/cutils.h" @@ -62,12 +63,18 @@ enum { #define SET_INTERRUPT_POLICY 0x3 FIRMWARE_UPDATE = 0x02, #define GET_INFO 0x0 + #define TRANSFER 0x1 + #define ACTIVATE 0x2 TIMESTAMP = 0x03, #define GET 0x0 #define SET 0x1 LOGS = 0x04, #define GET_SUPPORTED 0x0 #define GET_LOG 0x1 + FEATURES = 0x05, + #define GET_SUPPORTED 0x0 + #define GET_FEATURE 0x1 + #define SET_FEATURE 0x2 IDENTIFY = 0x40, #define MEMORY_DEVICE 0x0 CCLS = 0x41, @@ -83,6 +90,9 @@ enum { #define GET_POISON_LIST 0x0 #define INJECT_POISON 0x1 #define CLEAR_POISON 0x2 + #define GET_SCAN_MEDIA_CAPABILITIES 0x3 + #define SCAN_MEDIA 0x4 + #define GET_SCAN_MEDIA_RESULTS 0x5 DCD_CONFIG = 0x48, #define GET_DC_CONFIG 0x0 #define GET_DYN_CAP_EXT_LIST 0x1 @@ -235,7 +245,6 @@ static CXLRetCode cmd_events_get_records(const struct cxl_cmd *cmd, log_type = payload_in[0]; pl = (CXLGetEventPayload *)payload_out; - memset(pl, 0, sizeof(*pl)); max_recs = (cxlds->payload_size - CXL_EVENT_PAYLOAD_HDR_SIZE) / CXL_EVENT_RECORD_SIZE; @@ -273,7 +282,6 @@ static CXLRetCode cmd_events_get_interrupt_policy(const struct cxl_cmd *cmd, CXLEventLog *log; policy = (CXLEventInterruptPolicy *)payload_out; - memset(policy, 0, sizeof(*policy)); log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO]; if (log->irq_enabled) { @@ -372,7 +380,6 @@ static CXLRetCode cmd_infostat_identify(const struct cxl_cmd *cmd, QEMU_BUILD_BUG_ON(sizeof(*is_identify) != 18); is_identify = (void *)payload_out; - memset(is_identify, 0, sizeof(*is_identify)); is_identify->pcie_vid = class->vendor_id; is_identify->pcie_did = class->device_id; if (object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_USP)) { @@ -606,7 +613,6 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd, QEMU_BUILD_BUG_ON(sizeof(*bg_op_status) != 8); bg_op_status = (void *)payload_out; - memset(bg_op_status, 0, sizeof(*bg_op_status)); bg_op_status->status = cci->bg.complete_pct << 1; if (cci->bg.runtime > 0) { bg_op_status->status |= 1U << 0; @@ -618,6 +624,9 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +#define CXL_FW_SLOTS 2 +#define CXL_FW_SIZE 0x02000000 /* 32 mb */ + /* CXL r3.1 Section 8.2.9.3.1: Get FW Info (Opcode 0200h) */ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd, uint8_t *payload_in, @@ -647,17 +656,193 @@ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd, } fw_info = (void *)payload_out; - memset(fw_info, 0, sizeof(*fw_info)); - fw_info->slots_supported = 2; - fw_info->slot_info = BIT(0) | BIT(3); - fw_info->caps = 0; - pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0"); + fw_info->slots_supported = CXL_FW_SLOTS; + fw_info->slot_info = (cci->fw.active_slot & 0x7) | + ((cci->fw.staged_slot & 0x7) << 3); + fw_info->caps = BIT(0); /* online update supported */ + + if (cci->fw.slot[0]) { + pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0"); + } + if (cci->fw.slot[1]) { + pstrcpy(fw_info->fw_rev2, sizeof(fw_info->fw_rev2), "BWFW VERSION 1"); + } *len_out = sizeof(*fw_info); return CXL_MBOX_SUCCESS; } +/* CXL r3.1 section 8.2.9.3.2: Transfer FW (Opcode 0201h) */ +#define CXL_FW_XFER_ALIGNMENT 128 + +#define CXL_FW_XFER_ACTION_FULL 0x0 +#define CXL_FW_XFER_ACTION_INIT 0x1 +#define CXL_FW_XFER_ACTION_CONTINUE 0x2 +#define CXL_FW_XFER_ACTION_END 0x3 +#define CXL_FW_XFER_ACTION_ABORT 0x4 + +static CXLRetCode cmd_firmware_update_transfer(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t action; + uint8_t slot; + uint8_t rsvd1[2]; + uint32_t offset; + uint8_t rsvd2[0x78]; + uint8_t data[]; + } QEMU_PACKED *fw_transfer = (void *)payload_in; + size_t offset, length; + + if (fw_transfer->action == CXL_FW_XFER_ACTION_ABORT) { + /* + * At this point there aren't any on-going transfers + * running in the bg - this is serialized before this + * call altogether. Just mark the state machine and + * disregard any other input. + */ + cci->fw.transferring = false; + return CXL_MBOX_SUCCESS; + } + + offset = fw_transfer->offset * CXL_FW_XFER_ALIGNMENT; + length = len - sizeof(*fw_transfer); + if (offset + length > CXL_FW_SIZE) { + return CXL_MBOX_INVALID_INPUT; + } + + if (cci->fw.transferring) { + if (fw_transfer->action == CXL_FW_XFER_ACTION_FULL || + fw_transfer->action == CXL_FW_XFER_ACTION_INIT) { + return CXL_MBOX_FW_XFER_IN_PROGRESS; + } + /* + * Abort partitioned package transfer if over 30 secs + * between parts. As opposed to the explicit ABORT action, + * semantically treat this condition as an error - as + * if a part action were passed without a previous INIT. + */ + if (difftime(time(NULL), cci->fw.last_partxfer) > 30.0) { + cci->fw.transferring = false; + return CXL_MBOX_INVALID_INPUT; + } + } else if (fw_transfer->action == CXL_FW_XFER_ACTION_CONTINUE || + fw_transfer->action == CXL_FW_XFER_ACTION_END) { + return CXL_MBOX_INVALID_INPUT; + } + + /* allow back-to-back retransmission */ + if ((offset != cci->fw.prev_offset || length != cci->fw.prev_len) && + (fw_transfer->action == CXL_FW_XFER_ACTION_CONTINUE || + fw_transfer->action == CXL_FW_XFER_ACTION_END)) { + /* verify no overlaps */ + if (offset < cci->fw.prev_offset + cci->fw.prev_len) { + return CXL_MBOX_FW_XFER_OUT_OF_ORDER; + } + } + + switch (fw_transfer->action) { + case CXL_FW_XFER_ACTION_FULL: /* ignores offset */ + case CXL_FW_XFER_ACTION_END: + if (fw_transfer->slot == 0 || + fw_transfer->slot == cci->fw.active_slot || + fw_transfer->slot > CXL_FW_SLOTS) { + return CXL_MBOX_FW_INVALID_SLOT; + } + + /* mark the slot used upon bg completion */ + break; + case CXL_FW_XFER_ACTION_INIT: + if (offset != 0) { + return CXL_MBOX_INVALID_INPUT; + } + + cci->fw.transferring = true; + cci->fw.prev_offset = offset; + cci->fw.prev_len = length; + break; + case CXL_FW_XFER_ACTION_CONTINUE: + cci->fw.prev_offset = offset; + cci->fw.prev_len = length; + break; + default: + return CXL_MBOX_INVALID_INPUT; + } + + if (fw_transfer->action == CXL_FW_XFER_ACTION_FULL) { + cci->bg.runtime = 10 * 1000UL; + } else { + cci->bg.runtime = 2 * 1000UL; + } + /* keep relevant context for bg completion */ + cci->fw.curr_action = fw_transfer->action; + cci->fw.curr_slot = fw_transfer->slot; + *len_out = 0; + + return CXL_MBOX_BG_STARTED; +} + +static void __do_firmware_xfer(CXLCCI *cci) +{ + switch (cci->fw.curr_action) { + case CXL_FW_XFER_ACTION_FULL: + case CXL_FW_XFER_ACTION_END: + cci->fw.slot[cci->fw.curr_slot - 1] = true; + cci->fw.transferring = false; + break; + case CXL_FW_XFER_ACTION_INIT: + case CXL_FW_XFER_ACTION_CONTINUE: + time(&cci->fw.last_partxfer); + break; + default: + break; + } +} + +/* CXL r3.1 section 8.2.9.3.3: Activate FW (Opcode 0202h) */ +static CXLRetCode cmd_firmware_update_activate(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t action; + uint8_t slot; + } QEMU_PACKED *fw_activate = (void *)payload_in; + QEMU_BUILD_BUG_ON(sizeof(*fw_activate) != 0x2); + + if (fw_activate->slot == 0 || + fw_activate->slot == cci->fw.active_slot || + fw_activate->slot > CXL_FW_SLOTS) { + return CXL_MBOX_FW_INVALID_SLOT; + } + + /* ensure that an actual fw package is there */ + if (!cci->fw.slot[fw_activate->slot - 1]) { + return CXL_MBOX_FW_INVALID_SLOT; + } + + switch (fw_activate->action) { + case 0: /* online */ + cci->fw.active_slot = fw_activate->slot; + break; + case 1: /* reset */ + cci->fw.staged_slot = fw_activate->slot; + break; + default: + return CXL_MBOX_INVALID_INPUT; + } + + return CXL_MBOX_SUCCESS; +} + /* CXL r3.1 Section 8.2.9.4.1: Get Timestamp (Opcode 0300h) */ static CXLRetCode cmd_timestamp_get(const struct cxl_cmd *cmd, uint8_t *payload_in, @@ -768,6 +953,388 @@ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* CXL r3.1 section 8.2.9.6: Features */ +/* + * Get Supported Features output payload + * CXL r3.1 section 8.2.9.6.1 Table 8-96 + */ +typedef struct CXLSupportedFeatureHeader { + uint16_t entries; + uint16_t nsuppfeats_dev; + uint32_t reserved; +} QEMU_PACKED CXLSupportedFeatureHeader; + +/* + * Get Supported Features Supported Feature Entry + * CXL r3.1 section 8.2.9.6.1 Table 8-97 + */ +typedef struct CXLSupportedFeatureEntry { + QemuUUID uuid; + uint16_t feat_index; + uint16_t get_feat_size; + uint16_t set_feat_size; + uint32_t attr_flags; + uint8_t get_feat_version; + uint8_t set_feat_version; + uint16_t set_feat_effects; + uint8_t rsvd[18]; +} QEMU_PACKED CXLSupportedFeatureEntry; + +/* + * Get Supported Features Supported Feature Entry + * CXL rev 3.1 section 8.2.9.6.1 Table 8-97 + */ +/* Supported Feature Entry : attribute flags */ +#define CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE BIT(0) +#define CXL_FEAT_ENTRY_ATTR_FLAG_DEEPEST_RESET_PERSISTENCE_MASK GENMASK(3, 1) +#define CXL_FEAT_ENTRY_ATTR_FLAG_PERSIST_ACROSS_FIRMWARE_UPDATE BIT(4) +#define CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_DEFAULT_SELECTION BIT(5) +#define CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_SAVED_SELECTION BIT(6) + +/* Supported Feature Entry : set feature effects */ +#define CXL_FEAT_ENTRY_SFE_CONFIG_CHANGE_COLD_RESET BIT(0) +#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE BIT(1) +#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_DATA_CHANGE BIT(2) +#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_POLICY_CHANGE BIT(3) +#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_LOG_CHANGE BIT(4) +#define CXL_FEAT_ENTRY_SFE_SECURITY_STATE_CHANGE BIT(5) +#define CXL_FEAT_ENTRY_SFE_BACKGROUND_OPERATION BIT(6) +#define CXL_FEAT_ENTRY_SFE_SUPPORT_SECONDARY_MAILBOX BIT(7) +#define CXL_FEAT_ENTRY_SFE_SUPPORT_ABORT_BACKGROUND_OPERATION BIT(8) +#define CXL_FEAT_ENTRY_SFE_CEL_VALID BIT(9) +#define CXL_FEAT_ENTRY_SFE_CONFIG_CHANGE_CONV_RESET BIT(10) +#define CXL_FEAT_ENTRY_SFE_CONFIG_CHANGE_CXL_RESET BIT(11) + +enum CXL_SUPPORTED_FEATURES_LIST { + CXL_FEATURE_PATROL_SCRUB = 0, + CXL_FEATURE_ECS, + CXL_FEATURE_MAX +}; + +/* Get Feature CXL 3.1 Spec 8.2.9.6.2 */ +/* + * Get Feature input payload + * CXL r3.1 section 8.2.9.6.2 Table 8-99 + */ +/* Get Feature : Payload in selection */ +enum CXL_GET_FEATURE_SELECTION { + CXL_GET_FEATURE_SEL_CURRENT_VALUE, + CXL_GET_FEATURE_SEL_DEFAULT_VALUE, + CXL_GET_FEATURE_SEL_SAVED_VALUE, + CXL_GET_FEATURE_SEL_MAX +}; + +/* Set Feature CXL 3.1 Spec 8.2.9.6.3 */ +/* + * Set Feature input payload + * CXL r3.1 section 8.2.9.6.3 Table 8-101 + */ +typedef struct CXLSetFeatureInHeader { + QemuUUID uuid; + uint32_t flags; + uint16_t offset; + uint8_t version; + uint8_t rsvd[9]; +} QEMU_PACKED QEMU_ALIGNED(16) CXLSetFeatureInHeader; + +/* Set Feature : Payload in flags */ +#define CXL_SET_FEATURE_FLAG_DATA_TRANSFER_MASK 0x7 +enum CXL_SET_FEATURE_FLAG_DATA_TRANSFER { + CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_INITIATE_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_CONTINUE_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_ABORT_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_DATA_TRANSFER_MAX +}; +#define CXL_SET_FEAT_DATA_SAVED_ACROSS_RESET BIT(3) + +/* CXL r3.1 section 8.2.9.9.11.1: Device Patrol Scrub Control Feature */ +static const QemuUUID patrol_scrub_uuid = { + .data = UUID(0x96dad7d6, 0xfde8, 0x482b, 0xa7, 0x33, + 0x75, 0x77, 0x4e, 0x06, 0xdb, 0x8a) +}; + +typedef struct CXLMemPatrolScrubSetFeature { + CXLSetFeatureInHeader hdr; + CXLMemPatrolScrubWriteAttrs feat_data; +} QEMU_PACKED QEMU_ALIGNED(16) CXLMemPatrolScrubSetFeature; + +/* + * CXL r3.1 section 8.2.9.9.11.2: + * DDR5 Error Check Scrub (ECS) Control Feature + */ +static const QemuUUID ecs_uuid = { + .data = UUID(0xe5b13f22, 0x2328, 0x4a14, 0xb8, 0xba, + 0xb9, 0x69, 0x1e, 0x89, 0x33, 0x86) +}; + +typedef struct CXLMemECSSetFeature { + CXLSetFeatureInHeader hdr; + CXLMemECSWriteAttrs feat_data[]; +} QEMU_PACKED QEMU_ALIGNED(16) CXLMemECSSetFeature; + +/* CXL r3.1 section 8.2.9.6.1: Get Supported Features (Opcode 0500h) */ +static CXLRetCode cmd_features_get_supported(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint32_t count; + uint16_t start_index; + uint16_t reserved; + } QEMU_PACKED QEMU_ALIGNED(16) * get_feats_in = (void *)payload_in; + + struct { + CXLSupportedFeatureHeader hdr; + CXLSupportedFeatureEntry feat_entries[]; + } QEMU_PACKED QEMU_ALIGNED(16) * get_feats_out = (void *)payload_out; + uint16_t index, req_entries; + uint16_t entry; + + if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) { + return CXL_MBOX_UNSUPPORTED; + } + if (get_feats_in->count < sizeof(CXLSupportedFeatureHeader) || + get_feats_in->start_index >= CXL_FEATURE_MAX) { + return CXL_MBOX_INVALID_INPUT; + } + + req_entries = (get_feats_in->count - + sizeof(CXLSupportedFeatureHeader)) / + sizeof(CXLSupportedFeatureEntry); + req_entries = MIN(req_entries, + (CXL_FEATURE_MAX - get_feats_in->start_index)); + + for (entry = 0, index = get_feats_in->start_index; + entry < req_entries; index++) { + switch (index) { + case CXL_FEATURE_PATROL_SCRUB: + /* Fill supported feature entry for device patrol scrub control */ + get_feats_out->feat_entries[entry++] = + (struct CXLSupportedFeatureEntry) { + .uuid = patrol_scrub_uuid, + .feat_index = index, + .get_feat_size = sizeof(CXLMemPatrolScrubReadAttrs), + .set_feat_size = sizeof(CXLMemPatrolScrubWriteAttrs), + .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE, + .get_feat_version = CXL_MEMDEV_PS_GET_FEATURE_VERSION, + .set_feat_version = CXL_MEMDEV_PS_SET_FEATURE_VERSION, + .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE | + CXL_FEAT_ENTRY_SFE_CEL_VALID, + }; + break; + case CXL_FEATURE_ECS: + /* Fill supported feature entry for device DDR5 ECS control */ + get_feats_out->feat_entries[entry++] = + (struct CXLSupportedFeatureEntry) { + .uuid = ecs_uuid, + .feat_index = index, + .get_feat_size = CXL_ECS_NUM_MEDIA_FRUS * + sizeof(CXLMemECSReadAttrs), + .set_feat_size = CXL_ECS_NUM_MEDIA_FRUS * + sizeof(CXLMemECSWriteAttrs), + .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE, + .get_feat_version = CXL_ECS_GET_FEATURE_VERSION, + .set_feat_version = CXL_ECS_SET_FEATURE_VERSION, + .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE | + CXL_FEAT_ENTRY_SFE_CEL_VALID, + }; + break; + default: + __builtin_unreachable(); + } + } + get_feats_out->hdr.nsuppfeats_dev = CXL_FEATURE_MAX; + get_feats_out->hdr.entries = req_entries; + *len_out = sizeof(CXLSupportedFeatureHeader) + + req_entries * sizeof(CXLSupportedFeatureEntry); + + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.1 section 8.2.9.6.2: Get Feature (Opcode 0501h) */ +static CXLRetCode cmd_features_get_feature(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + QemuUUID uuid; + uint16_t offset; + uint16_t count; + uint8_t selection; + } QEMU_PACKED QEMU_ALIGNED(16) * get_feature; + uint16_t bytes_to_copy = 0; + CXLType3Dev *ct3d; + CXLSetFeatureInfo *set_feat_info; + + if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) { + return CXL_MBOX_UNSUPPORTED; + } + + ct3d = CXL_TYPE3(cci->d); + get_feature = (void *)payload_in; + + set_feat_info = &ct3d->set_feat_info; + if (qemu_uuid_is_equal(&get_feature->uuid, &set_feat_info->uuid)) { + return CXL_MBOX_FEATURE_TRANSFER_IN_PROGRESS; + } + + if (get_feature->selection != CXL_GET_FEATURE_SEL_CURRENT_VALUE) { + return CXL_MBOX_UNSUPPORTED; + } + if (get_feature->offset + get_feature->count > cci->payload_max) { + return CXL_MBOX_INVALID_INPUT; + } + + if (qemu_uuid_is_equal(&get_feature->uuid, &patrol_scrub_uuid)) { + if (get_feature->offset >= sizeof(CXLMemPatrolScrubReadAttrs)) { + return CXL_MBOX_INVALID_INPUT; + } + bytes_to_copy = sizeof(CXLMemPatrolScrubReadAttrs) - + get_feature->offset; + bytes_to_copy = MIN(bytes_to_copy, get_feature->count); + memcpy(payload_out, + (uint8_t *)&ct3d->patrol_scrub_attrs + get_feature->offset, + bytes_to_copy); + } else if (qemu_uuid_is_equal(&get_feature->uuid, &ecs_uuid)) { + if (get_feature->offset >= CXL_ECS_NUM_MEDIA_FRUS * + sizeof(CXLMemECSReadAttrs)) { + return CXL_MBOX_INVALID_INPUT; + } + bytes_to_copy = CXL_ECS_NUM_MEDIA_FRUS * + sizeof(CXLMemECSReadAttrs) - + get_feature->offset; + bytes_to_copy = MIN(bytes_to_copy, get_feature->count); + memcpy(payload_out, + (uint8_t *)&ct3d->ecs_attrs + get_feature->offset, + bytes_to_copy); + } else { + return CXL_MBOX_UNSUPPORTED; + } + + *len_out = bytes_to_copy; + + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.1 section 8.2.9.6.3: Set Feature (Opcode 0502h) */ +static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLSetFeatureInHeader *hdr = (void *)payload_in; + CXLMemPatrolScrubWriteAttrs *ps_write_attrs; + CXLMemPatrolScrubSetFeature *ps_set_feature; + CXLMemECSWriteAttrs *ecs_write_attrs; + CXLMemECSSetFeature *ecs_set_feature; + CXLSetFeatureInfo *set_feat_info; + uint16_t bytes_to_copy = 0; + uint8_t data_transfer_flag; + CXLType3Dev *ct3d; + uint16_t count; + + + if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) { + return CXL_MBOX_UNSUPPORTED; + } + ct3d = CXL_TYPE3(cci->d); + set_feat_info = &ct3d->set_feat_info; + + if (!qemu_uuid_is_null(&set_feat_info->uuid) && + !qemu_uuid_is_equal(&hdr->uuid, &set_feat_info->uuid)) { + return CXL_MBOX_FEATURE_TRANSFER_IN_PROGRESS; + } + if (hdr->flags & CXL_SET_FEAT_DATA_SAVED_ACROSS_RESET) { + set_feat_info->data_saved_across_reset = true; + } else { + set_feat_info->data_saved_across_reset = false; + } + + data_transfer_flag = + hdr->flags & CXL_SET_FEATURE_FLAG_DATA_TRANSFER_MASK; + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_INITIATE_DATA_TRANSFER) { + set_feat_info->uuid = hdr->uuid; + set_feat_info->data_size = 0; + } + set_feat_info->data_transfer_flag = data_transfer_flag; + set_feat_info->data_offset = hdr->offset; + bytes_to_copy = len_in - sizeof(CXLSetFeatureInHeader); + + if (qemu_uuid_is_equal(&hdr->uuid, &patrol_scrub_uuid)) { + if (hdr->version != CXL_MEMDEV_PS_SET_FEATURE_VERSION) { + return CXL_MBOX_UNSUPPORTED; + } + + ps_set_feature = (void *)payload_in; + ps_write_attrs = &ps_set_feature->feat_data; + memcpy((uint8_t *)&ct3d->patrol_scrub_wr_attrs + hdr->offset, + ps_write_attrs, + bytes_to_copy); + set_feat_info->data_size += bytes_to_copy; + + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER || + data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) { + ct3d->patrol_scrub_attrs.scrub_cycle &= ~0xFF; + ct3d->patrol_scrub_attrs.scrub_cycle |= + ct3d->patrol_scrub_wr_attrs.scrub_cycle_hr & 0xFF; + ct3d->patrol_scrub_attrs.scrub_flags &= ~0x1; + ct3d->patrol_scrub_attrs.scrub_flags |= + ct3d->patrol_scrub_wr_attrs.scrub_flags & 0x1; + } + } else if (qemu_uuid_is_equal(&hdr->uuid, + &ecs_uuid)) { + if (hdr->version != CXL_ECS_SET_FEATURE_VERSION) { + return CXL_MBOX_UNSUPPORTED; + } + + ecs_set_feature = (void *)payload_in; + ecs_write_attrs = ecs_set_feature->feat_data; + memcpy((uint8_t *)ct3d->ecs_wr_attrs + hdr->offset, + ecs_write_attrs, + bytes_to_copy); + set_feat_info->data_size += bytes_to_copy; + + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER || + data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) { + for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) { + ct3d->ecs_attrs[count].ecs_log_cap = + ct3d->ecs_wr_attrs[count].ecs_log_cap; + ct3d->ecs_attrs[count].ecs_config = + ct3d->ecs_wr_attrs[count].ecs_config & 0x1F; + } + } + } else { + return CXL_MBOX_UNSUPPORTED; + } + + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER || + data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER || + data_transfer_flag == CXL_SET_FEATURE_FLAG_ABORT_DATA_TRANSFER) { + memset(&set_feat_info->uuid, 0, sizeof(QemuUUID)); + if (qemu_uuid_is_equal(&hdr->uuid, &patrol_scrub_uuid)) { + memset(&ct3d->patrol_scrub_wr_attrs, 0, set_feat_info->data_size); + } else if (qemu_uuid_is_equal(&hdr->uuid, &ecs_uuid)) { + memset(ct3d->ecs_wr_attrs, 0, set_feat_info->data_size); + } + set_feat_info->data_transfer_flag = 0; + set_feat_info->data_saved_across_reset = false; + set_feat_info->data_offset = 0; + set_feat_info->data_size = 0; + } + + return CXL_MBOX_SUCCESS; +} + /* CXL r3.1 Section 8.2.9.9.1.1: Identify Memory Device (Opcode 4000h) */ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, uint8_t *payload_in, @@ -805,7 +1372,6 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, } id = (void *)payload_out; - memset(id, 0, sizeof(*id)); snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0); @@ -953,6 +1519,7 @@ static void __do_sanitization(CXLType3Dev *ct3d) memset(lsa, 0, memory_region_size(mr)); } } + cxl_discard_all_event_records(&ct3d->cxl_dstate); } /* @@ -1086,8 +1653,8 @@ static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd, QLIST_FOREACH(ent, poison_list, node) { /* Check for no overlap */ - if (ent->start >= query_start + query_length || - ent->start + ent->length <= query_start) { + if (!ranges_overlap(ent->start, ent->length, + query_start, query_length)) { continue; } record_count++; @@ -1095,13 +1662,12 @@ static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd, out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); - memset(out, 0, out_pl_len); QLIST_FOREACH(ent, poison_list, node) { uint64_t start, stop; /* Check for no overlap */ - if (ent->start >= query_start + query_length || - ent->start + ent->length <= query_start) { + if (!ranges_overlap(ent->start, ent->length, + query_start, query_length)) { continue; } @@ -1117,6 +1683,10 @@ static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd, out->flags = (1 << 1); stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts); } + if (scan_media_running(cci)) { + out->flags |= (1 << 2); + } + stw_le_p(&out->count, record_count); *len_out = out_pl_len; return CXL_MBOX_SUCCESS; @@ -1146,6 +1716,16 @@ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } } + /* + * Freeze the list if there is an on-going scan media operation. + */ + if (scan_media_running(cci)) { + /* + * XXX: Spec is ambiguous - is this case considered + * a successful return despite not adding to the list? + */ + goto success; + } if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { return CXL_MBOX_INJECT_POISON_LIMIT; @@ -1161,6 +1741,7 @@ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd, */ QLIST_INSERT_HEAD(poison_list, p, node); ct3d->poison_list_cnt++; +success: *len_out = 0; return CXL_MBOX_SUCCESS; @@ -1200,6 +1781,17 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, } } + /* + * Freeze the list if there is an on-going scan media operation. + */ + if (scan_media_running(cci)) { + /* + * XXX: Spec is ambiguous - is this case considered + * a successful return despite not removing from the list? + */ + goto success; + } + QLIST_FOREACH(ent, poison_list, node) { /* * Test for contained in entry. Simpler than general case @@ -1210,7 +1802,7 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, } } if (!ent) { - return CXL_MBOX_SUCCESS; + goto success; } QLIST_REMOVE(ent, node); @@ -1247,12 +1839,262 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, } /* Any fragments have been added, free original entry */ g_free(ent); +success: *len_out = 0; return CXL_MBOX_SUCCESS; } /* + * CXL r3.1 section 8.2.9.9.4.4: Get Scan Media Capabilities + */ +static CXLRetCode +cmd_media_get_scan_media_capabilities(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct get_scan_media_capabilities_pl { + uint64_t pa; + uint64_t length; + } QEMU_PACKED; + + struct get_scan_media_capabilities_out_pl { + uint32_t estimated_runtime_ms; + }; + + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate; + struct get_scan_media_capabilities_pl *in = (void *)payload_in; + struct get_scan_media_capabilities_out_pl *out = (void *)payload_out; + uint64_t query_start; + uint64_t query_length; + + query_start = ldq_le_p(&in->pa); + /* 64 byte alignment required */ + if (query_start & 0x3f) { + return CXL_MBOX_INVALID_INPUT; + } + query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE; + + if (query_start + query_length > cxl_dstate->static_mem_size) { + return CXL_MBOX_INVALID_PA; + } + + /* + * Just use 400 nanosecond access/read latency + 100 ns for + * the cost of updating the poison list. For small enough + * chunks return at least 1 ms. + */ + stl_le_p(&out->estimated_runtime_ms, + MAX(1, query_length * (0.0005L / 64))); + + *len_out = sizeof(*out); + return CXL_MBOX_SUCCESS; +} + +static void __do_scan_media(CXLType3Dev *ct3d) +{ + CXLPoison *ent; + unsigned int results_cnt = 0; + + QLIST_FOREACH(ent, &ct3d->scan_media_results, node) { + results_cnt++; + } + + /* only scan media may clear the overflow */ + if (ct3d->poison_list_overflowed && + ct3d->poison_list_cnt == results_cnt) { + cxl_clear_poison_list_overflowed(ct3d); + } + /* scan media has run since last conventional reset */ + ct3d->scan_media_hasrun = true; +} + +/* + * CXL r3.1 section 8.2.9.9.4.5: Scan Media + */ +static CXLRetCode cmd_media_scan_media(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct scan_media_pl { + uint64_t pa; + uint64_t length; + uint8_t flags; + } QEMU_PACKED; + + struct scan_media_pl *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate; + uint64_t query_start; + uint64_t query_length; + CXLPoison *ent, *next; + + query_start = ldq_le_p(&in->pa); + /* 64 byte alignment required */ + if (query_start & 0x3f) { + return CXL_MBOX_INVALID_INPUT; + } + query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE; + + if (query_start + query_length > cxl_dstate->static_mem_size) { + return CXL_MBOX_INVALID_PA; + } + if (ct3d->dc.num_regions && query_start + query_length >= + cxl_dstate->static_mem_size + ct3d->dc.total_capacity) { + return CXL_MBOX_INVALID_PA; + } + + if (in->flags == 0) { /* TODO */ + qemu_log_mask(LOG_UNIMP, + "Scan Media Event Log is unsupported\n"); + } + + /* any previous results are discarded upon a new Scan Media */ + QLIST_FOREACH_SAFE(ent, &ct3d->scan_media_results, node, next) { + QLIST_REMOVE(ent, node); + g_free(ent); + } + + /* kill the poison list - it will be recreated */ + if (ct3d->poison_list_overflowed) { + QLIST_FOREACH_SAFE(ent, &ct3d->poison_list, node, next) { + QLIST_REMOVE(ent, node); + g_free(ent); + ct3d->poison_list_cnt--; + } + } + + /* + * Scan the backup list and move corresponding entries + * into the results list, updating the poison list + * when possible. + */ + QLIST_FOREACH_SAFE(ent, &ct3d->poison_list_bkp, node, next) { + CXLPoison *res; + + if (ent->start >= query_start + query_length || + ent->start + ent->length <= query_start) { + continue; + } + + /* + * If a Get Poison List cmd comes in while this + * scan is being done, it will see the new complete + * list, while setting the respective flag. + */ + if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) { + CXLPoison *p = g_new0(CXLPoison, 1); + + p->start = ent->start; + p->length = ent->length; + p->type = ent->type; + QLIST_INSERT_HEAD(&ct3d->poison_list, p, node); + ct3d->poison_list_cnt++; + } + + res = g_new0(CXLPoison, 1); + res->start = ent->start; + res->length = ent->length; + res->type = ent->type; + QLIST_INSERT_HEAD(&ct3d->scan_media_results, res, node); + + QLIST_REMOVE(ent, node); + g_free(ent); + } + + cci->bg.runtime = MAX(1, query_length * (0.0005L / 64)); + *len_out = 0; + + return CXL_MBOX_BG_STARTED; +} + +/* + * CXL r3.1 section 8.2.9.9.4.6: Get Scan Media Results + */ +static CXLRetCode cmd_media_get_scan_media_results(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct get_scan_media_results_out_pl { + uint64_t dpa_restart; + uint64_t length; + uint8_t flags; + uint8_t rsvd1; + uint16_t count; + uint8_t rsvd2[0xc]; + struct { + uint64_t addr; + uint32_t length; + uint32_t resv; + } QEMU_PACKED records[]; + } QEMU_PACKED; + + struct get_scan_media_results_out_pl *out = (void *)payload_out; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLPoisonList *scan_media_results = &ct3d->scan_media_results; + CXLPoison *ent, *next; + uint16_t total_count = 0, record_count = 0, i = 0; + uint16_t out_pl_len; + + if (!ct3d->scan_media_hasrun) { + return CXL_MBOX_UNSUPPORTED; + } + + /* + * Calculate limits, all entries are within the same address range of the + * last scan media call. + */ + QLIST_FOREACH(ent, scan_media_results, node) { + size_t rec_size = record_count * sizeof(out->records[0]); + + if (sizeof(*out) + rec_size < CXL_MAILBOX_MAX_PAYLOAD_SIZE) { + record_count++; + } + total_count++; + } + + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); + + memset(out, 0, out_pl_len); + QLIST_FOREACH_SAFE(ent, scan_media_results, node, next) { + uint64_t start, stop; + + if (i == record_count) { + break; + } + + start = ROUND_DOWN(ent->start, 64ull); + stop = ROUND_DOWN(ent->start, 64ull) + ent->length; + stq_le_p(&out->records[i].addr, start | (ent->type & 0x7)); + stl_le_p(&out->records[i].length, (stop - start) / CXL_CACHE_LINE_SIZE); + i++; + + /* consume the returning entry */ + QLIST_REMOVE(ent, node); + g_free(ent); + } + + stw_le_p(&out->count, record_count); + if (total_count > record_count) { + out->flags = (1 << 0); /* More Media Error Records */ + } + + *len_out = out_pl_len; + return CXL_MBOX_SUCCESS; +} + +/* * CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration * (Opcode: 4800h) */ @@ -1822,40 +2664,51 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -#define IMMEDIATE_CONFIG_CHANGE (1 << 1) -#define IMMEDIATE_DATA_CHANGE (1 << 2) -#define IMMEDIATE_POLICY_CHANGE (1 << 3) -#define IMMEDIATE_LOG_CHANGE (1 << 4) -#define SECURITY_STATE_CHANGE (1 << 5) -#define BACKGROUND_OPERATION (1 << 6) - static const struct cxl_cmd cxl_cmd_set[256][256] = { [EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS", cmd_events_get_records, 1, 0 }, [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS", - cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE }, + cmd_events_clear_records, ~0, CXL_MBOX_IMMEDIATE_LOG_CHANGE }, [EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY", cmd_events_get_interrupt_policy, 0, 0 }, [EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY", cmd_events_set_interrupt_policy, - ~0, IMMEDIATE_CONFIG_CHANGE }, + ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE }, [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO", cmd_firmware_update_get_info, 0, 0 }, + [FIRMWARE_UPDATE][TRANSFER] = { "FIRMWARE_UPDATE_TRANSFER", + cmd_firmware_update_transfer, ~0, CXL_MBOX_BACKGROUND_OPERATION }, + [FIRMWARE_UPDATE][ACTIVATE] = { "FIRMWARE_UPDATE_ACTIVATE", + cmd_firmware_update_activate, 2, CXL_MBOX_BACKGROUND_OPERATION }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, - 8, IMMEDIATE_POLICY_CHANGE }, + 8, CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 }, [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 }, + [FEATURES][GET_SUPPORTED] = { "FEATURES_GET_SUPPORTED", + cmd_features_get_supported, 0x8, 0 }, + [FEATURES][GET_FEATURE] = { "FEATURES_GET_FEATURE", + cmd_features_get_feature, 0x15, 0 }, + [FEATURES][SET_FEATURE] = { "FEATURES_SET_FEATURE", + cmd_features_set_feature, + ~0, + (CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | + CXL_MBOX_IMMEDIATE_DATA_CHANGE | + CXL_MBOX_IMMEDIATE_POLICY_CHANGE | + CXL_MBOX_IMMEDIATE_LOG_CHANGE | + CXL_MBOX_SECURITY_STATE_CHANGE)}, [IDENTIFY][MEMORY_DEVICE] = { "IDENTIFY_MEMORY_DEVICE", cmd_identify_memory_device, 0, 0 }, [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO", cmd_ccls_get_partition_info, 0, 0 }, [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, - ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, + ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE }, [SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0, - IMMEDIATE_DATA_CHANGE | SECURITY_STATE_CHANGE | BACKGROUND_OPERATION }, + (CXL_MBOX_IMMEDIATE_DATA_CHANGE | + CXL_MBOX_SECURITY_STATE_CHANGE | + CXL_MBOX_BACKGROUND_OPERATION)}, [PERSISTENT_MEM][GET_SECURITY_STATE] = { "GET_SECURITY_STATE", cmd_get_security_state, 0, 0 }, [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", @@ -1864,6 +2717,14 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { cmd_media_inject_poison, 8, 0 }, [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON", cmd_media_clear_poison, 72, 0 }, + [MEDIA_AND_POISON][GET_SCAN_MEDIA_CAPABILITIES] = { + "MEDIA_AND_POISON_GET_SCAN_MEDIA_CAPABILITIES", + cmd_media_get_scan_media_capabilities, 16, 0 }, + [MEDIA_AND_POISON][SCAN_MEDIA] = { "MEDIA_AND_POISON_SCAN_MEDIA", + cmd_media_scan_media, 17, CXL_MBOX_BACKGROUND_OPERATION }, + [MEDIA_AND_POISON][GET_SCAN_MEDIA_RESULTS] = { + "MEDIA_AND_POISON_GET_SCAN_MEDIA_RESULTS", + cmd_media_get_scan_media_results, 0, 0 }, }; static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = { @@ -1874,10 +2735,10 @@ static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = { 8, 0 }, [DCD_CONFIG][ADD_DYN_CAP_RSP] = { "DCD_ADD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp, - ~0, IMMEDIATE_DATA_CHANGE }, + ~0, CXL_MBOX_IMMEDIATE_DATA_CHANGE }, [DCD_CONFIG][RELEASE_DYN_CAP] = { "DCD_RELEASE_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap, - ~0, IMMEDIATE_DATA_CHANGE }, + ~0, CXL_MBOX_IMMEDIATE_DATA_CHANGE }, }; static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { @@ -1885,8 +2746,8 @@ static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { [INFOSTAT][BACKGROUND_OPERATION_STATUS] = { "BACKGROUND_OPERATION_STATUS", cmd_infostat_bg_op_sts, 0, 0 }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, - [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 0, - IMMEDIATE_POLICY_CHANGE }, + [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, + CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 }, [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 }, @@ -1913,6 +2774,7 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, int ret; const struct cxl_cmd *cxl_cmd; opcode_handler h; + CXLDeviceState *cxl_dstate; *len_out = 0; cxl_cmd = &cci->cxl_cmd_set[set][cmd]; @@ -1928,28 +2790,34 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, } /* Only one bg command at a time */ - if ((cxl_cmd->effect & BACKGROUND_OPERATION) && + if ((cxl_cmd->effect & CXL_MBOX_BACKGROUND_OPERATION) && cci->bg.runtime > 0) { return CXL_MBOX_BUSY; } - /* forbid any selected commands while overwriting */ - if (sanitize_running(cci)) { - if (h == cmd_events_get_records || - h == cmd_ccls_get_partition_info || - h == cmd_ccls_set_lsa || - h == cmd_ccls_get_lsa || - h == cmd_logs_get_log || - h == cmd_media_get_poison_list || - h == cmd_media_inject_poison || - h == cmd_media_clear_poison || - h == cmd_sanitize_overwrite) { - return CXL_MBOX_MEDIA_DISABLED; + /* forbid any selected commands while the media is disabled */ + if (object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) { + cxl_dstate = &CXL_TYPE3(cci->d)->cxl_dstate; + + if (cxl_dev_media_disabled(cxl_dstate)) { + if (h == cmd_events_get_records || + h == cmd_ccls_get_partition_info || + h == cmd_ccls_set_lsa || + h == cmd_ccls_get_lsa || + h == cmd_logs_get_log || + h == cmd_media_get_poison_list || + h == cmd_media_inject_poison || + h == cmd_media_clear_poison || + h == cmd_sanitize_overwrite || + h == cmd_firmware_update_transfer || + h == cmd_firmware_update_activate) { + return CXL_MBOX_MEDIA_DISABLED; + } } } ret = (*h)(cxl_cmd, pl_in, len_in, pl_out, len_out, cci); - if ((cxl_cmd->effect & BACKGROUND_OPERATION) && + if ((cxl_cmd->effect & CXL_MBOX_BACKGROUND_OPERATION) && ret == CXL_MBOX_BG_STARTED) { *bg_started = true; } else { @@ -1987,6 +2855,9 @@ static void bg_timercb(void *opaque) cci->bg.complete_pct = 100; cci->bg.ret_code = ret; switch (cci->bg.opcode) { + case 0x0201: /* fw transfer */ + __do_firmware_xfer(cci); + break; case 0x4400: /* sanitize */ { CXLType3Dev *ct3d = CXL_TYPE3(cci->d); @@ -1995,8 +2866,13 @@ static void bg_timercb(void *opaque) cxl_dev_enable_media(&ct3d->cxl_dstate); } break; - case 0x4304: /* TODO: scan media */ + case 0x4304: /* scan media */ + { + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + + __do_scan_media(ct3d); break; + } default: __builtin_unreachable(); break; @@ -2053,6 +2929,10 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max) cci->bg.runtime = 0; cci->bg.timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, bg_timercb, cci); + + memset(&cci->fw, 0, sizeof(cci->fw)); + cci->fw.active_slot = 1; + cci->fw.slot[cci->fw.active_slot - 1] = true; } static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmds)[256]) diff --git a/hw/i2c/mpc_i2c.c b/hw/i2c/mpc_i2c.c index cb051a520f..06d4ce7d68 100644 --- a/hw/i2c/mpc_i2c.c +++ b/hw/i2c/mpc_i2c.c @@ -82,7 +82,7 @@ struct MPCI2CState { uint8_t cr; uint8_t sr; uint8_t dr; - uint8_t dfssr; + uint8_t dfsrr; }; static bool mpc_i2c_is_enabled(MPCI2CState *s) @@ -293,7 +293,7 @@ static void mpc_i2c_write(void *opaque, hwaddr addr, } break; case MPC_I2C_DFSRR: - s->dfssr = value; + s->dfsrr = value; break; default: DPRINTF("ERROR: Bad write addr 0x%x\n", (unsigned int)addr); @@ -319,7 +319,7 @@ static const VMStateDescription mpc_i2c_vmstate = { VMSTATE_UINT8(cr, MPCI2CState), VMSTATE_UINT8(sr, MPCI2CState), VMSTATE_UINT8(dr, MPCI2CState), - VMSTATE_UINT8(dfssr, MPCI2CState), + VMSTATE_UINT8(dfsrr, MPCI2CState), VMSTATE_END_OF_LIST() } }; @@ -329,7 +329,7 @@ static void mpc_i2c_realize(DeviceState *dev, Error **errp) MPCI2CState *i2c = MPC_I2C(dev); sysbus_init_irq(SYS_BUS_DEVICE(dev), &i2c->irq); memory_region_init_io(&i2c->iomem, OBJECT(i2c), &i2c_ops, i2c, - "mpc-i2c", 0x14); + "mpc-i2c", 0x15); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &i2c->iomem); i2c->bus = i2c_init_bus(dev, "i2c"); } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index f4e366f64f..5d4bd2b710 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1536,7 +1536,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, .fw_unplugs_cpu = pm->smi_on_cpu_unplug, }; build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, - pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02"); + pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02", + AML_SYSTEM_IO); } if (pcms->memhp_io_base && nr_mem) { diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 37c21a0aec..9a768f0b44 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -358,7 +358,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, { struct vtd_iotlb_key key; VTDIOTLBEntry *entry; - int level; + unsigned level; for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { key.gfn = vtd_get_iotlb_gfn(addr, level); @@ -2666,13 +2666,43 @@ static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, return true; } +static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as, + bool size, hwaddr addr) +{ + /* + * According to ATS spec table 2.4: + * S = 0, bits 15:12 = xxxx range size: 4K + * S = 1, bits 15:12 = xxx0 range size: 8K + * S = 1, bits 15:12 = xx01 range size: 16K + * S = 1, bits 15:12 = x011 range size: 32K + * S = 1, bits 15:12 = 0111 range size: 64K + * ... + */ + + IOMMUTLBEvent event; + uint64_t sz; + + if (size) { + sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT); + addr &= ~(sz - 1); + } else { + sz = VTD_PAGE_SIZE; + } + + event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP; + event.entry.target_as = &vtd_dev_as->as; + event.entry.addr_mask = sz - 1; + event.entry.iova = addr; + event.entry.perm = IOMMU_NONE; + event.entry.translated_addr = 0; + memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); +} + static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { VTDAddressSpace *vtd_dev_as; - IOMMUTLBEvent event; hwaddr addr; - uint64_t sz; uint16_t sid; bool size; @@ -2697,28 +2727,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, goto done; } - /* According to ATS spec table 2.4: - * S = 0, bits 15:12 = xxxx range size: 4K - * S = 1, bits 15:12 = xxx0 range size: 8K - * S = 1, bits 15:12 = xx01 range size: 16K - * S = 1, bits 15:12 = x011 range size: 32K - * S = 1, bits 15:12 = 0111 range size: 64K - * ... - */ - if (size) { - sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT); - addr &= ~(sz - 1); - } else { - sz = VTD_PAGE_SIZE; - } - - event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP; - event.entry.target_as = &vtd_dev_as->as; - event.entry.addr_mask = sz - 1; - event.entry.iova = addr; - event.entry.perm = IOMMU_NONE; - event.entry.translated_addr = 0; - memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); + do_invalidate_device_tlb(vtd_dev_as, size, addr); done: return true; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index f8cf99bddf..5f32c36943 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -264,10 +264,10 @@ #define VTD_FRCD_FR(val) (((val) & 0xffULL) << 32) #define VTD_FRCD_SID_MASK 0xffffULL #define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) +#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40) +#define VTD_FRCD_PP(val) (((val) & 0x1ULL) << 31) /* For the low 64-bit of 128-bit */ #define VTD_FRCD_FI(val) ((val) & ~0xfffULL) -#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40) -#define VTD_FRCD_PP(val) (((val) & 0x1) << 31) #define VTD_FRCD_IR_IDX(val) (((val) & 0xffffULL) << 48) /* DMA Remapping Fault Conditions */ @@ -436,7 +436,7 @@ struct VTDIOTLBPageInvInfo { uint16_t domain_id; uint32_t pasid; uint64_t addr; - uint8_t mask; + uint64_t mask; }; typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 9445b07b4f..d9e69243b4 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -495,6 +495,7 @@ static void pc_i440fx_machine_9_0_options(MachineClass *m) pc_i440fx_machine_9_1_options(m); m->alias = NULL; m->is_default = false; + m->smbios_memory_device_size = 16 * GiB; compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 71d3c6d122..9d108b194e 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -374,6 +374,7 @@ static void pc_q35_machine_9_0_options(MachineClass *m) PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_machine_9_1_options(m); m->alias = NULL; + m->smbios_memory_device_size = 16 * GiB; compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); pcmc->isa_bios_alias = false; diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c index a14a84bc6f..849472a128 100644 --- a/hw/i386/sgx.c +++ b/hw/i386/sgx.c @@ -268,10 +268,12 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) { - PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + PCMachineState *pcms = + (PCMachineState *)object_dynamic_cast(qdev_get_machine(), + TYPE_PC_MACHINE); SGXEPCDevice *epc; - if (pcms->sgx_epc.size == 0 || pcms->sgx_epc.nr_sections <= section_nr) { + if (!pcms || pcms->sgx_epc.size == 0 || pcms->sgx_epc.nr_sections <= section_nr) { return true; } diff --git a/hw/intc/loongson_ipi.c b/hw/intc/loongson_ipi.c index e6a7142480..682cec96f3 100644 --- a/hw/intc/loongson_ipi.c +++ b/hw/intc/loongson_ipi.c @@ -14,6 +14,7 @@ #include "qapi/error.h" #include "qemu/log.h" #include "exec/address-spaces.h" +#include "exec/memory.h" #include "migration/vmstate.h" #ifdef TARGET_LOONGARCH64 #include "target/loongarch/cpu.h" @@ -102,7 +103,7 @@ static MemTxResult send_ipi_data(CPUState *cpu, uint64_t val, hwaddr addr, * if the mask is 0, we need not to do anything. */ if ((val >> 27) & 0xf) { - data = address_space_ldl(iocsr_as, addr, attrs, NULL); + data = address_space_ldl_le(iocsr_as, addr, attrs, NULL); for (i = 0; i < 4; i++) { /* get mask for byte writing */ if (val & (0x1 << (27 + i))) { @@ -113,7 +114,7 @@ static MemTxResult send_ipi_data(CPUState *cpu, uint64_t val, hwaddr addr, data &= mask; data |= (val >> 32) & ~mask; - address_space_stl(iocsr_as, addr, data, attrs, NULL); + address_space_stl_le(iocsr_as, addr, data, attrs, NULL); return MEMTX_OK; } @@ -317,6 +318,13 @@ static void loongson_ipi_realize(DeviceState *dev, Error **errp) } } +static void loongson_ipi_unrealize(DeviceState *dev) +{ + LoongsonIPI *s = LOONGSON_IPI(dev); + + g_free(s->cpu); +} + static const VMStateDescription vmstate_ipi_core = { .name = "ipi-single", .version_id = 2, @@ -352,28 +360,18 @@ static void loongson_ipi_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = loongson_ipi_realize; + dc->unrealize = loongson_ipi_unrealize; device_class_set_props(dc, ipi_properties); dc->vmsd = &vmstate_loongson_ipi; } -static void loongson_ipi_finalize(Object *obj) -{ - LoongsonIPI *s = LOONGSON_IPI(obj); - - g_free(s->cpu); -} - -static const TypeInfo loongson_ipi_info = { - .name = TYPE_LOONGSON_IPI, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(LoongsonIPI), - .class_init = loongson_ipi_class_init, - .instance_finalize = loongson_ipi_finalize, +static const TypeInfo loongson_ipi_types[] = { + { + .name = TYPE_LOONGSON_IPI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(LoongsonIPI), + .class_init = loongson_ipi_class_init, + } }; -static void loongson_ipi_register_types(void) -{ - type_register_static(&loongson_ipi_info); -} - -type_init(loongson_ipi_register_types) +DEFINE_TYPES(loongson_ipi_types) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 35ac59883a..d648192ab9 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -737,6 +737,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) error_setg(errp, "volatile memdev must have backing device"); return false; } + if (host_memory_backend_is_mapped(ct3d->hostvmem)) { + error_setg(errp, "memory backend %s can't be used multiple times.", + object_get_canonical_path_component(OBJECT(ct3d->hostvmem))); + return false; + } memory_region_set_nonvolatile(vmr, false); memory_region_set_enabled(vmr, true); host_memory_backend_set_mapped(ct3d->hostvmem, true); @@ -760,6 +765,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) error_setg(errp, "persistent memdev must have backing device"); return false; } + if (host_memory_backend_is_mapped(ct3d->hostpmem)) { + error_setg(errp, "memory backend %s can't be used multiple times.", + object_get_canonical_path_component(OBJECT(ct3d->hostpmem))); + return false; + } memory_region_set_nonvolatile(pmr, true); memory_region_set_enabled(pmr, true); host_memory_backend_set_mapped(ct3d->hostpmem, true); @@ -790,6 +800,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) return false; } + if (host_memory_backend_is_mapped(ct3d->dc.host_dc)) { + error_setg(errp, "memory backend %s can't be used multiple times.", + object_get_canonical_path_component(OBJECT(ct3d->dc.host_dc))); + return false; + } /* * Set DC regions as volatile for now, non-volatile support can * be added in the future if needed. @@ -829,6 +844,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) uint8_t *pci_conf = pci_dev->config; unsigned short msix_num = 6; int i, rc; + uint16_t count; QTAILQ_INIT(&ct3d->error_list); @@ -893,6 +909,28 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) } cxl_event_init(&ct3d->cxl_dstate, 2); + /* Set default value for patrol scrub attributes */ + ct3d->patrol_scrub_attrs.scrub_cycle_cap = + CXL_MEMDEV_PS_SCRUB_CYCLE_CHANGE_CAP_DEFAULT | + CXL_MEMDEV_PS_SCRUB_REALTIME_REPORT_CAP_DEFAULT; + ct3d->patrol_scrub_attrs.scrub_cycle = + CXL_MEMDEV_PS_CUR_SCRUB_CYCLE_DEFAULT | + (CXL_MEMDEV_PS_MIN_SCRUB_CYCLE_DEFAULT << 8); + ct3d->patrol_scrub_attrs.scrub_flags = CXL_MEMDEV_PS_ENABLE_DEFAULT; + + /* Set default value for DDR5 ECS read attributes */ + for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) { + ct3d->ecs_attrs[count].ecs_log_cap = + CXL_ECS_LOG_ENTRY_TYPE_DEFAULT; + ct3d->ecs_attrs[count].ecs_cap = + CXL_ECS_REALTIME_REPORT_CAP_DEFAULT; + ct3d->ecs_attrs[count].ecs_config = + CXL_ECS_THRESHOLD_COUNT_DEFAULT | + (CXL_ECS_MODE_DEFAULT << 3); + /* Reserved */ + ct3d->ecs_attrs[count].ecs_flags = 0; + } + return; err_release_cdat: @@ -1127,7 +1165,7 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, return MEMTX_ERROR; } - if (sanitize_running(&ct3d->cci)) { + if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) { qemu_guest_getrandom_nofail(data, size); return MEMTX_OK; } @@ -1149,7 +1187,7 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, return MEMTX_ERROR; } - if (sanitize_running(&ct3d->cci)) { + if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) { return MEMTX_OK; } @@ -1304,6 +1342,12 @@ void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d) cxl_device_get_timestamp(&ct3d->cxl_dstate); } +void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d) +{ + ct3d->poison_list_overflowed = false; + ct3d->poison_list_overflow_ts = 0; +} + void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, Error **errp) { @@ -1340,19 +1384,21 @@ void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, } } - if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { - cxl_set_poison_list_overflowed(ct3d); - return; - } - p = g_new0(CXLPoison, 1); p->length = length; p->start = start; /* Different from injected via the mbox */ p->type = CXL_POISON_TYPE_INTERNAL; - QLIST_INSERT_HEAD(&ct3d->poison_list, p, node); - ct3d->poison_list_cnt++; + if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) { + QLIST_INSERT_HEAD(&ct3d->poison_list, p, node); + ct3d->poison_list_cnt++; + } else { + if (!ct3d->poison_list_overflowed) { + cxl_set_poison_list_overflowed(ct3d); + } + QLIST_INSERT_HEAD(&ct3d->poison_list_bkp, p, node); + } } /* For uncorrectable errors include support for multiple header recording */ diff --git a/hw/mips/loongson3_virt.c b/hw/mips/loongson3_virt.c index 4ad36f0c5b..408e3d7054 100644 --- a/hw/mips/loongson3_virt.c +++ b/hw/mips/loongson3_virt.c @@ -355,8 +355,8 @@ static uint64_t load_kernel(CPUMIPSState *env) kernel_size = load_elf(loaderparams.kernel_filename, NULL, cpu_mips_kseg0_to_phys, NULL, - (uint64_t *)&kernel_entry, - (uint64_t *)&kernel_low, (uint64_t *)&kernel_high, + &kernel_entry, + &kernel_low, &kernel_high, NULL, 0, EM_MIPS, 1, 0); if (kernel_size < 0) { error_report("could not load kernel '%s': %s", diff --git a/hw/net/allwinner_emac.c b/hw/net/allwinner_emac.c index 989839784a..d40ff37e99 100644 --- a/hw/net/allwinner_emac.c +++ b/hw/net/allwinner_emac.c @@ -349,7 +349,7 @@ static void aw_emac_write(void *opaque, hwaddr offset, uint64_t value, "allwinner_emac: TX length > fifo data length\n"); } if (len > 0) { - data = fifo8_pop_buf(fifo, len, &ret); + data = fifo8_pop_bufptr(fifo, len, &ret); qemu_send_packet(nc, data, ret); aw_emac_tx_reset(s, chan); /* Raise TX interrupt */ diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 18898afe81..a788e6937e 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -48,6 +48,7 @@ static const int kernel_feature_bits[] = { VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_PACKED, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VIRTIO_NET_F_HASH_REPORT, VHOST_INVALID_FEATURE_BIT @@ -78,6 +79,7 @@ static const int user_feature_bits[] = { VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_PACKED, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_GUEST_USO4, diff --git a/hw/nubus/nubus-virtio-mmio.c b/hw/nubus/nubus-virtio-mmio.c index 58a63c84d0..7a98731c45 100644 --- a/hw/nubus/nubus-virtio-mmio.c +++ b/hw/nubus/nubus-virtio-mmio.c @@ -7,6 +7,7 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" #include "hw/nubus/nubus-virtio-mmio.h" @@ -23,6 +24,7 @@ static void nubus_virtio_mmio_set_input_irq(void *opaque, int n, int level) static void nubus_virtio_mmio_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); NubusVirtioMMIODeviceClass *nvmdc = NUBUS_VIRTIO_MMIO_GET_CLASS(dev); NubusVirtioMMIO *s = NUBUS_VIRTIO_MMIO(dev); NubusDevice *nd = NUBUS_DEVICE(dev); diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8d25174d25..e86ea2e7ce 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -203,6 +203,7 @@ #include "sysemu/hostmem.h" #include "hw/pci/msix.h" #include "hw/pci/pcie_sriov.h" +#include "sysemu/spdm-socket.h" #include "migration/vmstate.h" #include "nvme.h" @@ -8315,6 +8316,27 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) return 0; } +static bool pcie_doe_spdm_rsp(DOECap *doe_cap) +{ + void *req = pcie_doe_get_write_mbox_ptr(doe_cap); + uint32_t req_len = pcie_doe_get_obj_len(req) * 4; + void *rsp = doe_cap->read_mbox; + uint32_t rsp_len = SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE; + + uint32_t recvd = spdm_socket_rsp(doe_cap->spdm_socket, + SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE, + req, req_len, rsp, rsp_len); + doe_cap->read_mbox_len += DIV_ROUND_UP(recvd, 4); + + return recvd != 0; +} + +static DOEProtocol doe_spdm_prot[] = { + { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_CMA, pcie_doe_spdm_rsp }, + { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_SECURED_CMA, pcie_doe_spdm_rsp }, + { } +}; + static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) { ERRP_GUARD(); @@ -8402,6 +8424,25 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize); + pcie_cap_deverr_init(pci_dev); + + /* DOE Initialisation */ + if (pci_dev->spdm_port) { + uint16_t doe_offset = n->params.sriov_max_vfs ? + PCI_CONFIG_SPACE_SIZE + PCI_ARI_SIZEOF + : PCI_CONFIG_SPACE_SIZE; + + pcie_doe_init(pci_dev, &pci_dev->doe_spdm, doe_offset, + doe_spdm_prot, true, 0); + + pci_dev->doe_spdm.spdm_socket = spdm_socket_connect(pci_dev->spdm_port, + errp); + + if (pci_dev->doe_spdm.spdm_socket < 0) { + return false; + } + } + if (n->params.cmb_size_mb) { nvme_init_cmb(n, pci_dev); } @@ -8650,6 +8691,11 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->cmb.buf); } + if (pci_dev->doe_spdm.spdm_socket > 0) { + spdm_socket_close(pci_dev->doe_spdm.spdm_socket, + SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE); + } + if (n->pmr.dev) { host_memory_backend_set_mapped(n->pmr.dev, false); } @@ -8695,6 +8741,7 @@ static Property nvme_props[] = { DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar, false), DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff), + DEFINE_PROP_UINT16("spdm_port", PCIDevice, spdm_port, 0), DEFINE_PROP_END_OF_LIST(), }; @@ -8766,11 +8813,25 @@ static void nvme_pci_write_config(PCIDevice *dev, uint32_t address, { uint16_t old_num_vfs = pcie_sriov_num_vfs(dev); + if (pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) { + pcie_doe_write_config(&dev->doe_spdm, address, val, len); + } pci_default_write_config(dev, address, val, len); pcie_cap_flr_write_config(dev, address, val, len); nvme_sriov_post_write_config(dev, old_num_vfs); } +static uint32_t nvme_pci_read_config(PCIDevice *dev, uint32_t address, int len) +{ + uint32_t val; + if (dev->spdm_port && pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) { + if (pcie_doe_read_config(&dev->doe_spdm, address, len, &val)) { + return val; + } + } + return pci_default_read_config(dev, address, len); +} + static const VMStateDescription nvme_vmstate = { .name = "nvme", .unmigratable = 1, @@ -8783,6 +8844,7 @@ static void nvme_class_init(ObjectClass *oc, void *data) pc->realize = nvme_realize; pc->config_write = nvme_pci_write_config; + pc->config_read = nvme_pci_read_config; pc->exit = nvme_exit; pc->class_id = PCI_CLASS_STORAGE_EXPRESS; pc->revision = 2; diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index f69413ea2c..391fabb8a8 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -7,7 +7,8 @@ #include "hw/pci/pcie_host.h" #include "hw/acpi/cxl.h" -static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) +static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq, + Aml *scope, uint8_t bus_num) { Aml *method, *crs; int i, slot_no; @@ -20,7 +21,7 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) Aml *pkg = aml_package(4); aml_append(pkg, aml_int((slot_no << 16) | 0xFFFF)); aml_append(pkg, aml_int(i)); - aml_append(pkg, aml_name("GSI%d", gsi)); + aml_append(pkg, aml_name("L%.02X%X", bus_num, gsi)); aml_append(pkg, aml_int(0)); aml_append(rt_pkg, pkg); } @@ -30,7 +31,7 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) /* Create GSI link device */ for (i = 0; i < PCI_NUM_PINS; i++) { uint32_t irqs = irq + i; - Aml *dev_gsi = aml_device("GSI%d", i); + Aml *dev_gsi = aml_device("L%.02X%X", bus_num, i); aml_append(dev_gsi, aml_name_decl("_HID", aml_string("PNP0C0F"))); aml_append(dev_gsi, aml_name_decl("_UID", aml_int(i))); crs = aml_resource_template(); @@ -45,7 +46,7 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) aml_append(dev_gsi, aml_name_decl("_CRS", crs)); method = aml_method("_SRS", 1, AML_NOTSERIALIZED); aml_append(dev_gsi, method); - aml_append(dev, dev_gsi); + aml_append(scope, dev_gsi); } } @@ -174,7 +175,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node))); } - acpi_dsdt_add_pci_route_table(dev, cfg->irq); + acpi_dsdt_add_pci_route_table(dev, cfg->irq, scope, bus_num); /* * Resources defined for PXBs are composed of the following parts: @@ -205,7 +206,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device"))); aml_append(dev, aml_name_decl("_CCA", aml_int(1))); - acpi_dsdt_add_pci_route_table(dev, cfg->irq); + acpi_dsdt_add_pci_route_table(dev, cfg->irq, scope, 0); method = aml_method("_CBA", 0, AML_NOTSERIALIZED); aml_append(method, aml_return(aml_int(cfg->ecam.base))); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 4c7be52951..8ad5d7e2d8 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -85,6 +85,7 @@ static Property pci_props[] = { QEMU_PCIE_ERR_UNC_MASK_BITNR, true), DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present, QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), + DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf), DEFINE_PROP_END_OF_LIST() }; @@ -959,13 +960,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; } - /* - * With SR/IOV and ARI, a device at function 0 need not be a multifunction - * device, as it may just be a VF that ended up with function 0 in - * the legacy PCI interpretation. Avoid failing in such cases: - */ - if (pci_is_vf(dev) && - dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + /* SR/IOV is not handled here. */ + if (pci_is_vf(dev)) { return; } @@ -998,7 +994,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) } /* function 0 indicates single function, so function > 0 must be NULL */ for (func = 1; func < PCI_FUNC_MAX; ++func) { - if (bus->devices[PCI_DEVFN(slot, func)]) { + PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)]; + if (device && !pci_is_vf(device)) { error_setg(errp, "PCI: %x.0 indicates single function, " "but %x.%x is already populated.", slot, slot, func); @@ -1283,6 +1280,7 @@ static void pci_qdev_unrealize(DeviceState *dev) pci_unregister_io_regions(pci_dev); pci_del_option_rom(pci_dev); + pcie_sriov_unregister_device(pci_dev); if (pc->exit) { pc->exit(pci_dev); @@ -1314,7 +1312,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size = memory_region_size(memory); uint8_t hdr_type; - assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */ assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); @@ -1325,7 +1322,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2); r = &pci_dev->io_regions[region_num]; - r->addr = PCI_BAR_UNMAPPED; r->size = size; r->type = type; r->memory = memory; @@ -1333,22 +1329,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, ? pci_get_bus(pci_dev)->address_space_io : pci_get_bus(pci_dev)->address_space_mem; - wmask = ~(size - 1); - if (region_num == PCI_ROM_SLOT) { - /* ROM enable bit is writable */ - wmask |= PCI_ROM_ADDRESS_ENABLE; - } - - addr = pci_bar(pci_dev, region_num); - pci_set_long(pci_dev->config + addr, type); + if (pci_is_vf(pci_dev)) { + PCIDevice *pf = pci_dev->exp.sriov_vf.pf; + assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]); - if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && - r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { - pci_set_quad(pci_dev->wmask + addr, wmask); - pci_set_quad(pci_dev->cmask + addr, ~0ULL); + r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size); + if (r->addr != PCI_BAR_UNMAPPED) { + memory_region_add_subregion_overlap(r->address_space, + r->addr, r->memory, 1); + } } else { - pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); - pci_set_long(pci_dev->cmask + addr, 0xffffffff); + r->addr = PCI_BAR_UNMAPPED; + + wmask = ~(size - 1); + if (region_num == PCI_ROM_SLOT) { + /* ROM enable bit is writable */ + wmask |= PCI_ROM_ADDRESS_ENABLE; + } + + addr = pci_bar(pci_dev, region_num); + pci_set_long(pci_dev->config + addr, type); + + if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && + r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(pci_dev->wmask + addr, wmask); + pci_set_quad(pci_dev->cmask + addr, ~0ULL); + } else { + pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); + pci_set_long(pci_dev->cmask + addr, 0xffffffff); + } } } @@ -1437,7 +1446,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg, pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET); uint16_t vf_stride = pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride; + uint32_t vf_num = d->devfn - (pf->devfn + vf_offset); + + if (vf_num) { + vf_num /= vf_stride; + } if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { new_addr = pci_get_quad(pf->config + bar); @@ -2105,6 +2118,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } + if (!pcie_sriov_register_device(pci_dev, errp)) { + pci_qdev_unrealize(DEVICE(pci_dev)); + return; + } + /* * A PCIe Downstream Port that do not have ARI Forwarding enabled must * associate only Device 0 with the device attached to the bus diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 56523ab4e8..0fc9f810b9 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -20,6 +20,8 @@ #include "qapi/error.h" #include "trace.h" +static GHashTable *pfs; + static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) { for (uint16_t i = 0; i < total_vfs; i++) { @@ -31,17 +33,62 @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) dev->exp.sriov_pf.vf = NULL; } -bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, - const char *vfname, uint16_t vf_dev_id, - uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride, - Error **errp) +static void clear_ctrl_vfe(PCIDevice *dev) +{ + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); +} + +static void register_vfs(PCIDevice *dev) +{ + uint16_t num_vfs; + uint16_t i; + uint16_t sriov_cap = dev->exp.sriov_cap; + + assert(sriov_cap > 0); + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { + clear_ctrl_vfe(dev); + return; + } + + trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), num_vfs); + for (i = 0; i < num_vfs; i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + } +} + +static void unregister_vfs(PCIDevice *dev) +{ + uint16_t i; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + + trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + } +} + +static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset, + uint16_t vf_dev_id, uint16_t init_vfs, + uint16_t total_vfs, uint16_t vf_offset, + uint16_t vf_stride, Error **errp) { - BusState *bus = qdev_get_parent_bus(&dev->qdev); - int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV PF"); + return false; + } + + if (pci_is_vf(dev)) { + error_setg(errp, "a device cannot be both an SR-IOV PF and a VF"); + return false; + } + if (total_vfs) { uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI); uint16_t first_vf_devfn = dev->devfn + vf_offset; @@ -90,6 +137,28 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, qdev_prop_set_bit(&dev->qdev, "multifunction", true); + return true; +} + +bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, + const char *vfname, uint16_t vf_dev_id, + uint16_t init_vfs, uint16_t total_vfs, + uint16_t vf_offset, uint16_t vf_stride, + Error **errp) +{ + BusState *bus = qdev_get_parent_bus(&dev->qdev); + int32_t devfn = dev->devfn + vf_offset; + + if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs, + total_vfs, vf_offset, vf_stride, errp)) { + return false; + } + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); for (uint16_t i = 0; i < total_vfs; i++) { @@ -119,7 +188,24 @@ void pcie_sriov_pf_exit(PCIDevice *dev) { uint8_t *cfg = dev->config + dev->exp.sriov_cap; - unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + if (dev->exp.sriov_pf.vf_user_created) { + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF); + uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID); + + unregister_vfs(dev); + + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = dev->exp.sriov_pf.vf[i]; + + vf->exp.sriov_vf.pf = NULL; + + pci_config_set_vendor_id(vf->config, ven_id); + pci_config_set_device_id(vf->config, vf_dev_id); + } + } else { + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + } } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -152,74 +238,172 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, MemoryRegion *memory) { - PCIIORegion *r; - PCIBus *bus = pci_get_bus(dev); uint8_t type; - pcibus_t size = memory_region_size(memory); - assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ - assert(region_num >= 0); - assert(region_num < PCI_NUM_REGIONS); + assert(dev->exp.sriov_vf.pf); type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; - if (!is_power_of_2(size)) { - error_report("%s: PCI region size must be a power" - " of two - type=0x%x, size=0x%"FMT_PCIBUS, - __func__, type, size); - exit(1); - } - - r = &dev->io_regions[region_num]; - r->memory = memory; - r->address_space = - type & PCI_BASE_ADDRESS_SPACE_IO - ? bus->address_space_io - : bus->address_space_mem; - r->size = size; - r->type = type; - - r->addr = pci_bar_address(dev, region_num, r->type, r->size); - if (r->addr != PCI_BAR_UNMAPPED) { - memory_region_add_subregion_overlap(r->address_space, - r->addr, r->memory, 1); - } + return pci_register_bar(dev, region_num, type, memory); } -static void clear_ctrl_vfe(PCIDevice *dev) +static gint compare_vf_devfns(gconstpointer a, gconstpointer b) { - uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; - pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); + return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn; } -static void register_vfs(PCIDevice *dev) +int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, + uint16_t offset, + Error **errp) { - uint16_t num_vfs; + GPtrArray *pf; + PCIDevice **vfs; + BusState *bus = qdev_get_parent_bus(DEVICE(dev)); + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t vf_dev_id; + uint16_t vf_offset; + uint16_t vf_stride; uint16_t i; - uint16_t sriov_cap = dev->exp.sriov_cap; - assert(sriov_cap > 0); - num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); - if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { - clear_ctrl_vfe(dev); - return; + if (!pfs || !dev->qdev.id) { + return 0; } - trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + pf = g_hash_table_lookup(pfs, dev->qdev.id); + if (!pf) { + return 0; } + + if (pf->len > UINT16_MAX) { + error_setg(errp, "too many VFs"); + return -1; + } + + g_ptr_array_sort(pf, compare_vf_devfns); + vfs = (void *)pf->pdata; + + if (vfs[0]->devfn <= dev->devfn) { + error_setg(errp, "a VF function number is less than the PF function number"); + return -1; + } + + vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID); + vf_offset = vfs[0]->devfn - dev->devfn; + vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn; + + for (i = 0; i < pf->len; i++) { + if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) { + error_setg(errp, "SR-IOV VF parent bus mismatches with PF"); + return -1; + } + + if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) { + error_setg(errp, "SR-IOV VF vendor ID mismatches with PF"); + return -1; + } + + if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) { + error_setg(errp, "inconsistent SR-IOV VF device IDs"); + return -1; + } + + for (size_t j = 0; j < PCI_NUM_REGIONS; j++) { + if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size || + vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) { + error_setg(errp, "inconsistent SR-IOV BARs"); + return -1; + } + } + + if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) { + error_setg(errp, "inconsistent SR-IOV stride"); + return -1; + } + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len, + pf->len, vf_offset, vf_stride, errp)) { + return -1; + } + + for (i = 0; i < pf->len; i++) { + vfs[i]->exp.sriov_vf.pf = dev; + vfs[i]->exp.sriov_vf.vf_number = i; + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(vfs[i]->config, 0xffff); + pci_config_set_device_id(vfs[i]->config, 0xffff); + } + + dev->exp.sriov_pf.vf = vfs; + dev->exp.sriov_pf.vf_user_created = true; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + PCIIORegion *region = &vfs[0]->io_regions[i]; + + if (region->size) { + pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size); + } + } + + return PCI_EXT_CAP_SRIOV_SIZEOF; } -static void unregister_vfs(PCIDevice *dev) +bool pcie_sriov_register_device(PCIDevice *dev, Error **errp) { - uint16_t i; - uint8_t *cfg = dev->config + dev->exp.sriov_cap; + if (!dev->exp.sriov_pf.vf && dev->qdev.id && + pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; + } - trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + if (dev->sriov_pf) { + PCIDevice *pci_pf; + GPtrArray *pf; + + if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) { + error_setg(errp, "user cannot create SR-IOV VF with this device type"); + return false; + } + + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV VF"); + return false; + } + + if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) { + error_setg(errp, "PCI device specified as SR-IOV PF already exists"); + return false; + } + + if (!pfs) { + pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); + } + + pf = g_hash_table_lookup(pfs, dev->sriov_pf); + if (!pf) { + pf = g_ptr_array_new(); + g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf); + } + + g_ptr_array_add(pf, dev); + } + + return true; +} + +void pcie_sriov_unregister_device(PCIDevice *dev) +{ + if (dev->sriov_pf && pfs) { + GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf); + + if (pf) { + g_ptr_array_remove_fast(pf, dev); + + if (!pf->len) { + g_hash_table_remove(pfs, dev->sriov_pf); + g_ptr_array_free(pf, FALSE); + } + } } } @@ -306,7 +490,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize) uint16_t pcie_sriov_vf_number(PCIDevice *dev) { - assert(pci_is_vf(dev)); + assert(dev->exp.sriov_vf.pf); return dev->exp.sriov_vf.vf_number; } diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c index 0925528160..36d6a3a412 100644 --- a/hw/riscv/virt-acpi-build.c +++ b/hw/riscv/virt-acpi-build.c @@ -141,12 +141,36 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) } } +static void acpi_dsdt_add_plic_aplic(Aml *scope, uint8_t socket_count, + uint64_t mmio_base, uint64_t mmio_size, + const char *hid) +{ + uint64_t plic_aplic_addr; + uint32_t gsi_base; + uint8_t socket; + + for (socket = 0; socket < socket_count; socket++) { + plic_aplic_addr = mmio_base + mmio_size * socket; + gsi_base = VIRT_IRQCHIP_NUM_SOURCES * socket; + Aml *dev = aml_device("IC%.02X", socket); + aml_append(dev, aml_name_decl("_HID", aml_string("%s", hid))); + aml_append(dev, aml_name_decl("_UID", aml_int(socket))); + aml_append(dev, aml_name_decl("_GSB", aml_int(gsi_base))); + + Aml *crs = aml_resource_template(); + aml_append(crs, aml_memory32_fixed(plic_aplic_addr, mmio_size, + AML_READ_WRITE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } +} + static void acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, uint32_t uart_irq) { Aml *dev = aml_device("COM0"); - aml_append(dev, aml_name_decl("_HID", aml_string("PNP0501"))); + aml_append(dev, aml_name_decl("_HID", aml_string("RSCV0003"))); aml_append(dev, aml_name_decl("_UID", aml_int(0))); Aml *crs = aml_resource_template(); @@ -411,6 +435,14 @@ static void build_dsdt(GArray *table_data, socket_count = riscv_socket_count(ms); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_PLIC].base, + memmap[VIRT_PLIC].size, "RSCV0001"); + } else { + acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_APLIC_S].base, + memmap[VIRT_APLIC_S].size, "RSCV0002"); + } + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0], UART0_IRQ); if (socket_count == 1) { diff --git a/hw/rtc/ls7a_rtc.c b/hw/rtc/ls7a_rtc.c index 052201c2cd..3226b6105e 100644 --- a/hw/rtc/ls7a_rtc.c +++ b/hw/rtc/ls7a_rtc.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * Loongarch LS7A Real Time Clock emulation + * LoongArch LS7A Real Time Clock emulation * * Copyright (C) 2021 Loongson Technology Corporation Limited */ diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c index 8504dd30a0..b7af825623 100644 --- a/hw/scsi/esp.c +++ b/hw/scsi/esp.c @@ -197,39 +197,9 @@ static uint8_t esp_fifo_pop(ESPState *s) return val; } -static uint32_t esp_fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, int maxlen) -{ - const uint8_t *buf; - uint32_t n, n2; - int len; - - if (maxlen == 0) { - return 0; - } - - len = maxlen; - buf = fifo8_pop_buf(fifo, len, &n); - if (dest) { - memcpy(dest, buf, n); - } - - /* Add FIFO wraparound if needed */ - len -= n; - len = MIN(len, fifo8_num_used(fifo)); - if (len) { - buf = fifo8_pop_buf(fifo, len, &n2); - if (dest) { - memcpy(&dest[n], buf, n2); - } - n += n2; - } - - return n; -} - static uint32_t esp_fifo_pop_buf(ESPState *s, uint8_t *dest, int maxlen) { - uint32_t len = esp_fifo8_pop_buf(&s->fifo, dest, maxlen); + uint32_t len = fifo8_pop_buf(&s->fifo, dest, maxlen); esp_update_drq(s); return len; @@ -335,7 +305,7 @@ static void do_command_phase(ESPState *s) if (!cmdlen || !s->current_dev) { return; } - esp_fifo8_pop_buf(&s->cmdfifo, buf, cmdlen); + fifo8_pop_buf(&s->cmdfifo, buf, cmdlen); current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, s->lun); if (!current_lun) { @@ -381,7 +351,7 @@ static void do_message_phase(ESPState *s) /* Ignore extended messages for now */ if (s->cmdfifo_cdb_offset) { int len = MIN(s->cmdfifo_cdb_offset, fifo8_num_used(&s->cmdfifo)); - esp_fifo8_pop_buf(&s->cmdfifo, NULL, len); + fifo8_drop(&s->cmdfifo, len); s->cmdfifo_cdb_offset = 0; } } @@ -486,7 +456,7 @@ static bool esp_cdb_ready(ESPState *s) return false; } - pbuf = fifo8_peek_buf(&s->cmdfifo, len, &n); + pbuf = fifo8_peek_bufptr(&s->cmdfifo, len, &n); if (n < len) { /* * In normal use the cmdfifo should never wrap, but include this check diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 3d5fe0994d..49cff2a0cb 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -38,6 +38,7 @@ static const int kernel_feature_bits[] = { VIRTIO_RING_F_EVENT_IDX, VIRTIO_SCSI_F_HOTPLUG, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index cc91ade525..55e4be5b34 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -36,6 +36,7 @@ static const int user_feature_bits[] = { VIRTIO_RING_F_EVENT_IDX, VIRTIO_SCSI_F_HOTPLUG, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 3b7703489d..a394514264 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -1093,6 +1093,7 @@ static bool smbios_get_tables_ep(MachineState *ms, Error **errp) { unsigned i, dimm_cnt, offset; + MachineClass *mc = MACHINE_GET_CLASS(ms); ERRP_GUARD(); assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 || @@ -1123,12 +1124,12 @@ static bool smbios_get_tables_ep(MachineState *ms, smbios_build_type_9_table(errp); smbios_build_type_11_table(); -#define MAX_DIMM_SZ (16 * GiB) -#define GET_DIMM_SZ ((i < dimm_cnt - 1) ? MAX_DIMM_SZ \ - : ((current_machine->ram_size - 1) % MAX_DIMM_SZ) + 1) +#define GET_DIMM_SZ ((i < dimm_cnt - 1) ? mc->smbios_memory_device_size \ + : ((current_machine->ram_size - 1) % mc->smbios_memory_device_size) + 1) - dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / - MAX_DIMM_SZ; + dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, + mc->smbios_memory_device_size) / + mc->smbios_memory_device_size; /* * The offset determines if we need to keep additional space between diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index 4cb5393c0b..471950adef 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -54,10 +54,12 @@ typedef struct HPETTimer { /* timers */ uint64_t cmp; /* comparator */ uint64_t fsb; /* FSB route */ /* Hidden register state */ + uint64_t cmp64; /* comparator (extended to counter width) */ uint64_t period; /* Last value written to comparator */ uint8_t wrap_flag; /* timer pop will indicate wrap for one-shot 32-bit * mode. Next pop will be actual timer expiration. */ + uint64_t last; /* last value armed, to avoid timer storms */ } HPETTimer; struct HPETState { @@ -116,11 +118,6 @@ static uint32_t timer_enabled(HPETTimer *t) static uint32_t hpet_time_after(uint64_t a, uint64_t b) { - return ((int32_t)(b - a) < 0); -} - -static uint32_t hpet_time_after64(uint64_t a, uint64_t b) -{ return ((int64_t)(b - a) < 0); } @@ -156,29 +153,34 @@ static uint64_t hpet_get_ticks(HPETState *s) return ns_to_ticks(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->hpet_offset); } +static uint64_t hpet_get_ns(HPETState *s, uint64_t tick) +{ + return ticks_to_ns(tick) - s->hpet_offset; +} + /* - * calculate diff between comparator value and current ticks + * calculate next value of the general counter that matches the + * target (either entirely, or the low 32-bit only depending on + * the timer mode). */ -static inline uint64_t hpet_calculate_diff(HPETTimer *t, uint64_t current) +static uint64_t hpet_calculate_cmp64(HPETTimer *t, uint64_t cur_tick, uint64_t target) { - if (t->config & HPET_TN_32BIT) { - uint32_t diff, cmp; - - cmp = (uint32_t)t->cmp; - diff = cmp - (uint32_t)current; - diff = (int32_t)diff > 0 ? diff : (uint32_t)1; - return (uint64_t)diff; + uint64_t result = deposit64(cur_tick, 0, 32, target); + if (result < cur_tick) { + result += 0x100000000ULL; + } + return result; } else { - uint64_t diff, cmp; - - cmp = t->cmp; - diff = cmp - current; - diff = (int64_t)diff > 0 ? diff : (uint64_t)1; - return diff; + return target; } } +static uint64_t hpet_next_wrap(uint64_t cur_tick) +{ + return (cur_tick | 0xffffffffU) + 1; +} + static void update_irq(struct HPETTimer *timer, int set) { uint64_t mask; @@ -196,21 +198,31 @@ static void update_irq(struct HPETTimer *timer, int set) } s = timer->state; mask = 1 << timer->tn; - if (!set || !timer_enabled(timer) || !hpet_enabled(timer->state)) { + + if (set && (timer->config & HPET_TN_TYPE_LEVEL)) { + /* + * If HPET_TN_ENABLE bit is 0, "the timer will still operate and + * generate appropriate status bits, but will not cause an interrupt" + */ + s->isr |= mask; + } else { s->isr &= ~mask; + } + + if (set && timer_enabled(timer) && hpet_enabled(s)) { + if (timer_fsb_route(timer)) { + address_space_stl_le(&address_space_memory, timer->fsb >> 32, + timer->fsb & 0xffffffff, MEMTXATTRS_UNSPECIFIED, + NULL); + } else if (timer->config & HPET_TN_TYPE_LEVEL) { + qemu_irq_raise(s->irqs[route]); + } else { + qemu_irq_pulse(s->irqs[route]); + } + } else { if (!timer_fsb_route(timer)) { qemu_irq_lower(s->irqs[route]); } - } else if (timer_fsb_route(timer)) { - address_space_stl_le(&address_space_memory, timer->fsb >> 32, - timer->fsb & 0xffffffff, MEMTXATTRS_UNSPECIFIED, - NULL); - } else if (timer->config & HPET_TN_TYPE_LEVEL) { - s->isr |= mask; - qemu_irq_raise(s->irqs[route]); - } else { - s->isr &= ~mask; - qemu_irq_pulse(s->irqs[route]); } } @@ -250,7 +262,13 @@ static bool hpet_validate_num_timers(void *opaque, int version_id) static int hpet_post_load(void *opaque, int version_id) { HPETState *s = opaque; + int i; + for (i = 0; i < s->num_timers; i++) { + HPETTimer *t = &s->timer[i]; + t->cmp64 = hpet_calculate_cmp64(t, s->hpet_counter, t->cmp); + t->last = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - NANOSECONDS_PER_SECOND; + } /* Recalculate the offset between the main counter and guest time */ if (!s->hpet_offset_saved) { s->hpet_offset = ticks_to_ns(s->hpet_counter) @@ -346,14 +364,17 @@ static const VMStateDescription vmstate_hpet = { } }; -static void hpet_arm(HPETTimer *t, uint64_t ticks) +static void hpet_arm(HPETTimer *t, uint64_t tick) { - if (ticks < ns_to_ticks(INT64_MAX / 2)) { - timer_mod(t->qemu_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ticks_to_ns(ticks)); - } else { - timer_del(t->qemu_timer); + uint64_t ns = hpet_get_ns(t->state, tick); + + /* Clamp period to reasonable min value (1 us) */ + if (timer_is_periodic(t) && ns - t->last < 1000) { + ns = t->last + 1000; } + + t->last = ns; + timer_mod(t->qemu_timer, ns); } /* @@ -362,72 +383,68 @@ static void hpet_arm(HPETTimer *t, uint64_t ticks) static void hpet_timer(void *opaque) { HPETTimer *t = opaque; - uint64_t diff; - uint64_t period = t->period; uint64_t cur_tick = hpet_get_ticks(t->state); if (timer_is_periodic(t) && period != 0) { + while (hpet_time_after(cur_tick, t->cmp64)) { + t->cmp64 += period; + } if (t->config & HPET_TN_32BIT) { - while (hpet_time_after(cur_tick, t->cmp)) { - t->cmp = (uint32_t)(t->cmp + t->period); - } + t->cmp = (uint32_t)t->cmp64; } else { - while (hpet_time_after64(cur_tick, t->cmp)) { - t->cmp += period; - } - } - diff = hpet_calculate_diff(t, cur_tick); - hpet_arm(t, diff); - } else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) { - if (t->wrap_flag) { - diff = hpet_calculate_diff(t, cur_tick); - hpet_arm(t, diff); - t->wrap_flag = 0; + t->cmp = t->cmp64; } + hpet_arm(t, t->cmp64); + } else if (t->wrap_flag) { + t->wrap_flag = 0; + hpet_arm(t, t->cmp64); } update_irq(t, 1); } static void hpet_set_timer(HPETTimer *t) { - uint64_t diff; - uint32_t wrap_diff; /* how many ticks until we wrap? */ uint64_t cur_tick = hpet_get_ticks(t->state); - /* whenever new timer is being set up, make sure wrap_flag is 0 */ t->wrap_flag = 0; - diff = hpet_calculate_diff(t, cur_tick); - - /* hpet spec says in one-shot 32-bit mode, generate an interrupt when - * counter wraps in addition to an interrupt with comparator match. - */ - if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) { - wrap_diff = 0xffffffff - (uint32_t)cur_tick; - if (wrap_diff < (uint32_t)diff) { - diff = wrap_diff; + t->cmp64 = hpet_calculate_cmp64(t, cur_tick, t->cmp); + if (t->config & HPET_TN_32BIT) { + + /* hpet spec says in one-shot 32-bit mode, generate an interrupt when + * counter wraps in addition to an interrupt with comparator match. + */ + if (!timer_is_periodic(t) && t->cmp64 > hpet_next_wrap(cur_tick)) { t->wrap_flag = 1; + hpet_arm(t, hpet_next_wrap(cur_tick)); + return; } } - hpet_arm(t, diff); + hpet_arm(t, t->cmp64); } static void hpet_del_timer(HPETTimer *t) { + HPETState *s = t->state; timer_del(t->qemu_timer); - update_irq(t, 0); + + if (s->isr & (1 << t->tn)) { + /* For level-triggered interrupt, this leaves ISR set but lowers irq. */ + update_irq(t, 1); + } } static uint64_t hpet_ram_read(void *opaque, hwaddr addr, unsigned size) { HPETState *s = opaque; - uint64_t cur_tick, index; + int shift = (addr & 4) * 8; + uint64_t cur_tick; trace_hpet_ram_read(addr); - index = addr; + /*address range of all TN regs*/ - if (index >= 0x100 && index <= 0x3ff) { + if (addr >= 0x100 && addr <= 0x3ff) { uint8_t timer_id = (addr - 0x100) / 0x20; HPETTimer *timer = &s->timer[timer_id]; @@ -436,52 +453,33 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr, return 0; } - switch ((addr - 0x100) % 0x20) { - case HPET_TN_CFG: - return timer->config; - case HPET_TN_CFG + 4: // Interrupt capabilities - return timer->config >> 32; + switch (addr & 0x18) { + case HPET_TN_CFG: // including interrupt capabilities + return timer->config >> shift; case HPET_TN_CMP: // comparator register - return timer->cmp; - case HPET_TN_CMP + 4: - return timer->cmp >> 32; + return timer->cmp >> shift; case HPET_TN_ROUTE: - return timer->fsb; - case HPET_TN_ROUTE + 4: - return timer->fsb >> 32; + return timer->fsb >> shift; default: trace_hpet_ram_read_invalid(); break; } } else { - switch (index) { - case HPET_ID: - return s->capability; - case HPET_PERIOD: - return s->capability >> 32; + switch (addr & ~4) { + case HPET_ID: // including HPET_PERIOD + return s->capability >> shift; case HPET_CFG: - return s->config; - case HPET_CFG + 4: - trace_hpet_invalid_hpet_cfg(4); - return 0; + return s->config >> shift; case HPET_COUNTER: if (hpet_enabled(s)) { cur_tick = hpet_get_ticks(s); } else { cur_tick = s->hpet_counter; } - trace_hpet_ram_read_reading_counter(0, cur_tick); - return cur_tick; - case HPET_COUNTER + 4: - if (hpet_enabled(s)) { - cur_tick = hpet_get_ticks(s); - } else { - cur_tick = s->hpet_counter; - } - trace_hpet_ram_read_reading_counter(4, cur_tick); - return cur_tick >> 32; + trace_hpet_ram_read_reading_counter(addr & 4, cur_tick); + return cur_tick >> shift; case HPET_STATUS: - return s->isr; + return s->isr >> shift; default: trace_hpet_ram_read_invalid(); break; @@ -495,15 +493,14 @@ static void hpet_ram_write(void *opaque, hwaddr addr, { int i; HPETState *s = opaque; - uint64_t old_val, new_val, val, index; + int shift = (addr & 4) * 8; + int len = MIN(size * 8, 64 - shift); + uint64_t old_val, new_val, cleared; trace_hpet_ram_write(addr, value); - index = addr; - old_val = hpet_ram_read(opaque, addr, 4); - new_val = value; /*address range of all TN regs*/ - if (index >= 0x100 && index <= 0x3ff) { + if (addr >= 0x100 && addr <= 0x3ff) { uint8_t timer_id = (addr - 0x100) / 0x20; HPETTimer *timer = &s->timer[timer_id]; @@ -512,71 +509,49 @@ static void hpet_ram_write(void *opaque, hwaddr addr, trace_hpet_timer_id_out_of_range(timer_id); return; } - switch ((addr - 0x100) % 0x20) { + switch (addr & 0x18) { case HPET_TN_CFG: - trace_hpet_ram_write_tn_cfg(); - if (activating_bit(old_val, new_val, HPET_TN_FSB_ENABLE)) { + trace_hpet_ram_write_tn_cfg(addr & 4); + old_val = timer->config; + new_val = deposit64(old_val, shift, len, value); + new_val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK); + if (deactivating_bit(old_val, new_val, HPET_TN_TYPE_LEVEL)) { + /* + * Do this before changing timer->config; otherwise, if + * HPET_TN_FSB is set, update_irq will not lower the qemu_irq. + */ update_irq(timer, 0); } - val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK); - timer->config = (timer->config & 0xffffffff00000000ULL) | val; + timer->config = new_val; + if (activating_bit(old_val, new_val, HPET_TN_ENABLE) + && (s->isr & (1 << timer_id))) { + update_irq(timer, 1); + } if (new_val & HPET_TN_32BIT) { timer->cmp = (uint32_t)timer->cmp; timer->period = (uint32_t)timer->period; } - if (activating_bit(old_val, new_val, HPET_TN_ENABLE) && - hpet_enabled(s)) { + if (hpet_enabled(s)) { hpet_set_timer(timer); - } else if (deactivating_bit(old_val, new_val, HPET_TN_ENABLE)) { - hpet_del_timer(timer); } break; - case HPET_TN_CFG + 4: // Interrupt capabilities - trace_hpet_ram_write_invalid_tn_cfg(4); - break; case HPET_TN_CMP: // comparator register - trace_hpet_ram_write_tn_cmp(0); if (timer->config & HPET_TN_32BIT) { - new_val = (uint32_t)new_val; - } - if (!timer_is_periodic(timer) - || (timer->config & HPET_TN_SETVAL)) { - timer->cmp = (timer->cmp & 0xffffffff00000000ULL) | new_val; - } - if (timer_is_periodic(timer)) { - /* - * FIXME: Clamp period to reasonable min value? - * Clamp period to reasonable max value - */ - if (timer->config & HPET_TN_32BIT) { - new_val = MIN(new_val, ~0u >> 1); + /* High 32-bits are zero, leave them untouched. */ + if (shift) { + trace_hpet_ram_write_invalid_tn_cmp(); + break; } - timer->period = - (timer->period & 0xffffffff00000000ULL) | new_val; - } - /* - * FIXME: on a 64-bit write, HPET_TN_SETVAL should apply to the - * high bits part as well. - */ - timer->config &= ~HPET_TN_SETVAL; - if (hpet_enabled(s)) { - hpet_set_timer(timer); + len = 64; + value = (uint32_t) value; } - break; - case HPET_TN_CMP + 4: // comparator register high order - trace_hpet_ram_write_tn_cmp(4); + trace_hpet_ram_write_tn_cmp(addr & 4); if (!timer_is_periodic(timer) || (timer->config & HPET_TN_SETVAL)) { - timer->cmp = (timer->cmp & 0xffffffffULL) | new_val << 32; + timer->cmp = deposit64(timer->cmp, shift, len, value); } if (timer_is_periodic(timer)) { - /* - * FIXME: Clamp period to reasonable min value? - * Clamp period to reasonable max value - */ - new_val = MIN(new_val, ~0u >> 1); - timer->period = - (timer->period & 0xffffffffULL) | new_val << 32; + timer->period = deposit64(timer->period, shift, len, value); } timer->config &= ~HPET_TN_SETVAL; if (hpet_enabled(s)) { @@ -584,10 +559,7 @@ static void hpet_ram_write(void *opaque, hwaddr addr, } break; case HPET_TN_ROUTE: - timer->fsb = (timer->fsb & 0xffffffff00000000ULL) | new_val; - break; - case HPET_TN_ROUTE + 4: - timer->fsb = (new_val << 32) | (timer->fsb & 0xffffffff); + timer->fsb = deposit64(timer->fsb, shift, len, value); break; default: trace_hpet_ram_write_invalid(); @@ -595,20 +567,23 @@ static void hpet_ram_write(void *opaque, hwaddr addr, } return; } else { - switch (index) { + switch (addr & ~4) { case HPET_ID: return; case HPET_CFG: - val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); - s->config = (s->config & 0xffffffff00000000ULL) | val; + old_val = s->config; + new_val = deposit64(old_val, shift, len, value); + new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); + s->config = new_val; if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) { /* Enable main counter and interrupt generation. */ s->hpet_offset = ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); for (i = 0; i < s->num_timers; i++) { - if ((&s->timer[i])->cmp != ~0ULL) { - hpet_set_timer(&s->timer[i]); + if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) { + update_irq(&s->timer[i], 1); } + hpet_set_timer(&s->timer[i]); } } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) { /* Halt main counter and disable interrupt generation. */ @@ -629,13 +604,11 @@ static void hpet_ram_write(void *opaque, hwaddr addr, qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level); } break; - case HPET_CFG + 4: - trace_hpet_invalid_hpet_cfg(4); - break; case HPET_STATUS: - val = new_val & s->isr; + new_val = value << shift; + cleared = new_val & s->isr; for (i = 0; i < s->num_timers; i++) { - if (val & (1 << i)) { + if (cleared & (1 << i)) { update_irq(&s->timer[i], 0); } } @@ -644,15 +617,7 @@ static void hpet_ram_write(void *opaque, hwaddr addr, if (hpet_enabled(s)) { trace_hpet_ram_write_counter_write_while_enabled(); } - s->hpet_counter = - (s->hpet_counter & 0xffffffff00000000ULL) | value; - trace_hpet_ram_write_counter_written(0, value, s->hpet_counter); - break; - case HPET_COUNTER + 4: - trace_hpet_ram_write_counter_write_while_enabled(); - s->hpet_counter = - (s->hpet_counter & 0xffffffffULL) | (((uint64_t)value) << 32); - trace_hpet_ram_write_counter_written(4, value, s->hpet_counter); + s->hpet_counter = deposit64(s->hpet_counter, shift, len, value); break; default: trace_hpet_ram_write_invalid(); @@ -666,7 +631,11 @@ static const MemoryRegionOps hpet_ram_ops = { .write = hpet_ram_write, .valid = { .min_access_size = 4, - .max_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, }, .endianness = DEVICE_NATIVE_ENDIAN, }; diff --git a/hw/timer/trace-events b/hw/timer/trace-events index de769f4b71..f48a712801 100644 --- a/hw/timer/trace-events +++ b/hw/timer/trace-events @@ -108,9 +108,9 @@ hpet_ram_read_reading_counter(uint8_t reg_off, uint64_t cur_tick) "reading count hpet_ram_read_invalid(void) "invalid hpet_ram_readl" hpet_ram_write(uint64_t addr, uint64_t value) "enter hpet_ram_writel at 0x%" PRIx64 " = 0x%" PRIx64 hpet_ram_write_timer_id(uint64_t timer_id) "hpet_ram_writel timer_id = 0x%" PRIx64 -hpet_ram_write_tn_cfg(void) "hpet_ram_writel HPET_TN_CFG" -hpet_ram_write_invalid_tn_cfg(uint8_t reg_off) "invalid HPET_TN_CFG + %" PRIu8 " write" +hpet_ram_write_tn_cfg(uint8_t reg_off) "hpet_ram_writel HPET_TN_CFG + %" PRIu8 hpet_ram_write_tn_cmp(uint8_t reg_off) "hpet_ram_writel HPET_TN_CMP + %" PRIu8 +hpet_ram_write_invalid_tn_cmp(void) "invalid HPET_TN_CMP + 4 write" hpet_ram_write_invalid(void) "invalid hpet_ram_writel" hpet_ram_write_counter_write_while_enabled(void) "Writing counter while HPET enabled!" hpet_ram_write_counter_written(uint8_t reg_off, uint64_t value, uint64_t counter) "HPET counter + %" PRIu8 "written. crt = 0x%" PRIx64 " -> 0x%" PRIx64 diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 0c4354e3e7..71bf32b83c 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -230,6 +230,9 @@ static void vfio_ap_instance_init(Object *obj) */ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops, DEVICE(vapdev), true); + + /* AP device is mdev type device */ + vbasedev->mdev = true; } #ifdef CONFIG_IOMMUFD diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 1f8e1272c7..115862f430 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -675,6 +675,9 @@ static void vfio_ccw_instance_init(Object *obj) VFIOCCWDevice *vcdev = VFIO_CCW(obj); VFIODevice *vbasedev = &vcdev->vdev; + /* CCW device is mdev type device */ + vbasedev->mdev = true; + /* * All vfio-ccw devices are believed to operate in a way compatible with * discarding of memory in RAM blocks, ie. pages pinned in the host are diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 6d15b36e0b..36d0cf6585 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -199,6 +199,9 @@ bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer) VFIODevice *vbasedev; QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) { + return false; + } if (!vbasedev->dirty_pages_supported) { return false; } @@ -599,7 +602,7 @@ static void vfio_listener_region_add(MemoryListener *listener, IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); int iommu_idx; - trace_vfio_listener_region_add_iommu(iova, end); + trace_vfio_listener_region_add_iommu(section->mr->name, iova, end); /* * FIXME: For VFIO iommu types which have KVM acceleration to * avoid bouncing all map/unmaps through qemu this way, this @@ -725,6 +728,7 @@ static void vfio_listener_region_del(MemoryListener *listener, if (memory_region_is_iommu(section->mr)) { VFIOGuestIOMMU *giommu; + trace_vfio_listener_region_del_iommu(section->mr->name); QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { if (MEMORY_REGION(giommu->iommu_mr) == section->mr && giommu->n.start == section->offset_within_region) { @@ -1536,7 +1540,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev, { const VFIOIOMMUClass *ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); - HostIOMMUDevice *hiod; + HostIOMMUDevice *hiod = NULL; if (vbasedev->iommufd) { ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); @@ -1544,17 +1548,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev, assert(ops); - if (!ops->attach_device(name, vbasedev, as, errp)) { - return false; + + if (!vbasedev->mdev) { + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); + vbasedev->hiod = hiod; } - hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); - if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) { + if (!ops->attach_device(name, vbasedev, as, errp)) { object_unref(hiod); - ops->detach_device(vbasedev); + vbasedev->hiod = NULL; return false; } - vbasedev->hiod = hiod; return true; } diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 38a9df3496..9ccdb639ac 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -656,7 +656,6 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, return true; listener_release_exit: QLIST_REMOVE(group, container_next); - QLIST_REMOVE(bcontainer, next); vfio_kvm_device_del_group(group); memory_listener_unregister(&bcontainer->listener); if (vioc->release) { @@ -915,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, trace_vfio_attach_device(vbasedev->name, groupid); + if (!vfio_device_hiod_realize(vbasedev, errp)) { + return false; + } + group = vfio_get_group(groupid, as, errp); if (!group) { return false; @@ -1142,7 +1145,6 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, VFIODevice *vdev = opaque; hiod->name = g_strdup(vdev->name); - hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev); hiod->agent = opaque; return true; @@ -1151,11 +1153,9 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) { - HostIOMMUDeviceCaps *caps = &hiod->caps; - switch (cap) { case HOST_IOMMU_DEVICE_CAP_AW_BITS: - return caps->aw_bits; + return vfio_device_get_aw_bits(hiod->agent); default: error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); return -EINVAL; diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index b14edd46ed..ea15c79db0 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -675,3 +675,28 @@ int vfio_device_get_aw_bits(VFIODevice *vdev) return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX; } + +bool vfio_device_is_mdev(VFIODevice *vbasedev) +{ + g_autofree char *subsys = NULL; + g_autofree char *tmp = NULL; + + if (!vbasedev->sysfsdev) { + return false; + } + + tmp = g_strdup_printf("%s/subsystem", vbasedev->sysfsdev); + subsys = realpath(tmp, NULL); + return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0); +} + +bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp) +{ + HostIOMMUDevice *hiod = vbasedev->hiod; + + if (!hiod) { + return true; + } + + return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp); +} diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 7b5f87a148..e7bece4ea1 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -25,6 +25,7 @@ #include "qemu/cutils.h" #include "qemu/chardev_open.h" #include "pci.h" +#include "exec/ram_addr.h" static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly) @@ -110,6 +111,68 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) iommufd_backend_disconnect(vbasedev->iommufd); } +static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt) +{ + return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; +} + +static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, + bool start, Error **errp) +{ + const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + VFIOIOASHwpt *hwpt; + + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + if (!iommufd_hwpt_dirty_tracking(hwpt)) { + continue; + } + + if (!iommufd_backend_set_dirty_tracking(container->be, + hwpt->hwpt_id, start, errp)) { + goto err; + } + } + + return 0; + +err: + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + if (!iommufd_hwpt_dirty_tracking(hwpt)) { + continue; + } + iommufd_backend_set_dirty_tracking(container->be, + hwpt->hwpt_id, !start, NULL); + } + return -EINVAL; +} + +static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size, Error **errp) +{ + VFIOIOMMUFDContainer *container = container_of(bcontainer, + VFIOIOMMUFDContainer, + bcontainer); + unsigned long page_size = qemu_real_host_page_size(); + VFIOIOASHwpt *hwpt; + + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + if (!iommufd_hwpt_dirty_tracking(hwpt)) { + continue; + } + + if (!iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id, + iova, size, page_size, + (uint64_t *)vbmap->bitmap, + errp)) { + return -EINVAL; + } + } + + return 0; +} + static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) { ERRP_GUARD(); @@ -172,7 +235,7 @@ out: return ret; } -static bool iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, +static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, Error **errp) { int iommufd = vbasedev->iommufd->fd; @@ -187,12 +250,12 @@ static bool iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, error_setg_errno(errp, errno, "[iommufd=%d] error attach %s (%d) to id=%d", iommufd, vbasedev->name, vbasedev->fd, id); - return false; + return -errno; } trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, vbasedev->fd, id); - return true; + return 0; } static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) @@ -212,11 +275,111 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) return true; } +static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, + VFIOIOMMUFDContainer *container, + Error **errp) +{ + ERRP_GUARD(); + IOMMUFDBackend *iommufd = vbasedev->iommufd; + uint32_t flags = 0; + VFIOIOASHwpt *hwpt; + uint32_t hwpt_id; + int ret; + + /* Try to find a domain */ + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + if (ret) { + /* -EINVAL means the domain is incompatible with the device. */ + if (ret == -EINVAL) { + /* + * It is an expected failure and it just means we will try + * another domain, or create one if no existing compatible + * domain is found. Hence why the error is discarded below. + */ + error_free(*errp); + *errp = NULL; + continue; + } + + return false; + } else { + vbasedev->hwpt = hwpt; + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); + vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); + return true; + } + } + + /* + * This is quite early and VFIO Migration state isn't yet fully + * initialized, thus rely only on IOMMU hardware capabilities as to + * whether IOMMU dirty tracking is going to be requested. Later + * vfio_migration_realize() may decide to use VF dirty tracking + * instead. + */ + if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) { + flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + } + + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, + container->ioas_id, flags, + IOMMU_HWPT_DATA_NONE, 0, NULL, + &hwpt_id, errp)) { + return false; + } + + hwpt = g_malloc0(sizeof(*hwpt)); + hwpt->hwpt_id = hwpt_id; + hwpt->hwpt_flags = flags; + QLIST_INIT(&hwpt->device_list); + + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + if (ret) { + iommufd_backend_free_id(container->be, hwpt->hwpt_id); + g_free(hwpt); + return false; + } + + vbasedev->hwpt = hwpt; + vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); + QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); + container->bcontainer.dirty_pages_supported |= + vbasedev->iommu_dirty_tracking; + if (container->bcontainer.dirty_pages_supported && + !vbasedev->iommu_dirty_tracking) { + warn_report("IOMMU instance for device %s doesn't support dirty tracking", + vbasedev->name); + } + return true; +} + +static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev, + VFIOIOMMUFDContainer *container) +{ + VFIOIOASHwpt *hwpt = vbasedev->hwpt; + + QLIST_REMOVE(vbasedev, hwpt_next); + vbasedev->hwpt = NULL; + + if (QLIST_EMPTY(&hwpt->device_list)) { + QLIST_REMOVE(hwpt, next); + iommufd_backend_free_id(container->be, hwpt->hwpt_id); + g_free(hwpt); + } +} + static bool iommufd_cdev_attach_container(VFIODevice *vbasedev, VFIOIOMMUFDContainer *container, Error **errp) { - return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); + /* mdevs aren't physical devices and will fail with auto domains */ + if (!vbasedev->mdev) { + return iommufd_cdev_autodomains_get(vbasedev, container, errp); + } + + return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); } static void iommufd_cdev_detach_container(VFIODevice *vbasedev, @@ -227,6 +390,11 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev, if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { error_report_err(err); } + + if (vbasedev->hwpt) { + iommufd_cdev_autodomains_put(vbasedev, container); + } + } static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) @@ -320,6 +488,17 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, space = vfio_get_address_space(as); + /* + * The HostIOMMUDevice data from legacy backend is static and doesn't need + * any information from the (type1-iommu) backend to be initialized. In + * contrast however, the IOMMUFD HostIOMMUDevice data requires the iommufd + * FD to be connected and having a devid to be able to successfully call + * iommufd_backend_get_device_info(). + */ + if (!vfio_device_hiod_realize(vbasedev, errp)) { + goto err_alloc_ioas; + } + /* try to attach to an existing container in this space */ QLIST_FOREACH(bcontainer, &space->containers, next) { container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); @@ -354,6 +533,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; + QLIST_INIT(&container->hwpt_list); bcontainer = &container->bcontainer; vfio_address_space_insert(space, bcontainer); @@ -617,6 +797,8 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) vioc->attach_device = iommufd_cdev_attach; vioc->detach_device = iommufd_cdev_detach; vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; + vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking; + vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; }; static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, @@ -628,17 +810,19 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, union { struct iommu_hw_info_vtd vtd; } data; + uint64_t hw_caps; hiod->agent = opaque; if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, - &type, &data, sizeof(data), errp)) { + &type, &data, sizeof(data), + &hw_caps, errp)) { return false; } hiod->name = g_strdup(vdev->name); caps->type = type; - caps->aw_bits = vfio_device_get_aw_bits(vdev); + caps->hw_caps = hw_caps; return true; } diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 34d4be2ce1..262d42a46e 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -1036,16 +1036,18 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) return !vfio_block_migration(vbasedev, err, errp); } - if (!vbasedev->dirty_pages_supported) { + if ((!vbasedev->dirty_pages_supported || + vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) && + !vbasedev->iommu_dirty_tracking) { if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { error_setg(&err, - "%s: VFIO device doesn't support device dirty tracking", - vbasedev->name); + "%s: VFIO device doesn't support device and " + "IOMMU dirty tracking", vbasedev->name); goto add_blocker; } - warn_report("%s: VFIO device doesn't support device dirty tracking", - vbasedev->name); + warn_report("%s: VFIO device doesn't support device and " + "IOMMU dirty tracking", vbasedev->name); } ret = vfio_block_multiple_devices_migration(vbasedev, errp); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index e03d9f3ba5..2407720c35 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2963,12 +2963,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ERRP_GUARD(); VFIOPCIDevice *vdev = VFIO_PCI(pdev); VFIODevice *vbasedev = &vdev->vbasedev; - char *subsys; int i, ret; - bool is_mdev; char uuid[UUID_STR_LEN]; g_autofree char *name = NULL; - g_autofree char *tmp = NULL; if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -2997,14 +2994,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) * stays in sync with the active working set of the guest driver. Prevent * the x-balloon-allowed option unless this is minimally an mdev device. */ - tmp = g_strdup_printf("%s/subsystem", vbasedev->sysfsdev); - subsys = realpath(tmp, NULL); - is_mdev = subsys && (strcmp(subsys, "/sys/bus/mdev") == 0); - free(subsys); + vbasedev->mdev = vfio_device_is_mdev(vbasedev); - trace_vfio_mdev(vbasedev->name, is_mdev); + trace_vfio_mdev(vbasedev->name, vbasedev->mdev); - if (vbasedev->ram_block_discard_allowed && !is_mdev) { + if (vbasedev->ram_block_discard_allowed && !vbasedev->mdev) { error_setg(errp, "x-balloon-allowed only potentially compatible " "with mdev devices"); goto error; @@ -3121,7 +3115,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bars_register(vdev); - if (!pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { + if (!vbasedev->mdev && + !pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { error_prepend(errp, "Failed to set iommu_device: "); goto out_teardown; } @@ -3244,7 +3239,9 @@ out_deregister: timer_free(vdev->intx.mmap_timer); } out_unset_idev: - pci_device_unset_iommu_device(pdev); + if (!vbasedev->mdev) { + pci_device_unset_iommu_device(pdev); + } out_teardown: vfio_teardown_msi(vdev); vfio_bars_exit(vdev); @@ -3289,7 +3286,9 @@ static void vfio_exitfn(PCIDevice *pdev) vfio_pci_disable_rp_atomics(vdev); vfio_bars_exit(vdev); vfio_migration_exit(vbasedev); - pci_device_unset_iommu_device(pdev); + if (!vbasedev->mdev) { + pci_device_unset_iommu_device(pdev); + } } static void vfio_pci_reset(DeviceState *dev) @@ -3362,6 +3361,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, vbasedev.pre_copy_dirty_page_tracking, ON_OFF_AUTO_ON), + DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking", VFIOPCIDevice, + vbasedev.device_dirty_page_tracking, + ON_OFF_AUTO_ON), DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice, display, ON_OFF_AUTO_OFF), DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0), diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index e16179b507..98bd4dccea 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -95,7 +95,8 @@ vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t d vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "iommu %s @ 0x%"PRIx64" - 0x%"PRIx64 vfio_listener_region_skip(const char *name, uint64_t start, uint64_t end) "SKIPPING %s 0x%"PRIx64" - 0x%"PRIx64 vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" -vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 +vfio_listener_region_add_iommu(const char* name, uint64_t start, uint64_t end) "region_add [iommu] %s 0x%"PRIx64" - 0x%"PRIx64 +vfio_listener_region_del_iommu(const char *name) "region_del [iommu] %s" vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index b7c04f0856..04e36ae047 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -116,6 +116,7 @@ virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, u virtio_iommu_set_config(uint8_t bypass) "bypass=0x%x" virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" +virtio_iommu_detach_endpoint_from_domain(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d" virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 virtio_iommu_unmap_done(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index ae48cc1c96..32ee7f496d 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -33,6 +33,7 @@ static const int user_feature_bits[] = { VIRTIO_F_RING_PACKED, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 802b44a07d..da3b0e0229 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -21,6 +21,7 @@ static const int user_feature_bits[] = { VIRTIO_RING_F_INDIRECT_DESC, VIRTIO_RING_F_EVENT_IDX, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index bbe8aa4b99..5034768bff 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -205,6 +205,7 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, int queue_index; uint32_t algo, keytype, keylen; + sreq->info.op_code = opcode; algo = ldl_le_p(&sess_req->para.algo); keytype = ldl_le_p(&sess_req->para.keytype); keylen = ldl_le_p(&sess_req->para.keylen); @@ -224,7 +225,6 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, iov_discard_front(&iov, &out_num, keylen); } - sreq->info.op_code = opcode; asym_info = &sreq->info.u.asym_sess_info; asym_info->algo = algo; asym_info->keytype = keytype; diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 33ae61c4a6..59ef4fb217 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -308,6 +308,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) if (!ep->domain) { return; } + trace_virtio_iommu_detach_endpoint_from_domain(domain->id, ep->id); g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, ep->iommu_mr); QLIST_REMOVE(ep, next); @@ -467,26 +468,6 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } -static void virtio_iommu_device_clear(VirtIOIOMMU *s, PCIBus *bus, int devfn) -{ - IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); - IOMMUDevice *sdev; - - if (!sbus) { - return; - } - - sdev = sbus->pbdev[devfn]; - if (!sdev) { - return; - } - - g_list_free_full(sdev->resv_regions, g_free); - sdev->resv_regions = NULL; - g_free(sdev); - sbus->pbdev[devfn] = NULL; -} - static gboolean hiod_equal(gconstpointer v1, gconstpointer v2) { const struct hiod_key *key1 = v1; @@ -558,8 +539,6 @@ static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, { IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); IOMMUDevice *sdev; - GList *current_ranges; - GList *l, *tmp, *new_ranges = NULL; int ret = -EINVAL; if (!sbus) { @@ -573,35 +552,10 @@ static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, return ret; } - current_ranges = sdev->host_resv_ranges; - - g_assert(!sdev->probe_done); - - /* check that each new resv region is included in an existing one */ if (sdev->host_resv_ranges) { - range_inverse_array(iova_ranges, - &new_ranges, - 0, UINT64_MAX); - - for (tmp = new_ranges; tmp; tmp = tmp->next) { - Range *newr = (Range *)tmp->data; - bool included = false; - - for (l = current_ranges; l; l = l->next) { - Range * r = (Range *)l->data; - - if (range_contains_range(r, newr)) { - included = true; - break; - } - } - if (!included) { - goto error; - } - } - /* all new reserved ranges are included in existing ones */ - ret = 0; - goto out; + error_setg(errp, "%s virtio-iommu does not support aliased BDF", + __func__); + return ret; } range_inverse_array(iova_ranges, @@ -610,14 +564,31 @@ static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, rebuild_resv_regions(sdev); return 0; -error: - error_setg(errp, "%s Conflicting host reserved ranges set!", - __func__); -out: - g_list_free_full(new_ranges, g_free); - return ret; } +static void virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, + int devfn) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + + if (!sbus) { + return; + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + return; + } + + g_list_free_full(g_steal_pointer(&sdev->host_resv_ranges), g_free); + g_list_free_full(sdev->resv_regions, g_free); + sdev->host_resv_ranges = NULL; + sdev->resv_regions = NULL; + add_prop_resv_regions(sdev); +} + + static bool check_page_size_mask(VirtIOIOMMU *viommu, uint64_t new_mask, Error **errp) { @@ -726,9 +697,10 @@ virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) if (!hiod) { return; } + virtio_iommu_unset_host_iova_ranges(viommu, hiod->aliased_bus, + hiod->aliased_devfn); g_hash_table_remove(viommu->host_iommu_devices, &key); - virtio_iommu_device_clear(viommu, bus, devfn); } static const PCIIOMMUOps virtio_iommu_ops = { @@ -815,6 +787,7 @@ static int virtio_iommu_detach(VirtIOIOMMU *s, if (QLIST_EMPTY(&domain->endpoint_list)) { g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); } + g_tree_remove(s->endpoints, GUINT_TO_POINTER(ep_id)); return VIRTIO_IOMMU_S_OK; } @@ -977,7 +950,6 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, } buf += count; free -= count; - sdev->probe_done = true; return VIRTIO_IOMMU_S_OK; } diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c index e03543a70a..dba4987d6e 100644 --- a/hw/virtio/virtio-net-pci.c +++ b/hw/virtio/virtio-net-pci.c @@ -75,6 +75,7 @@ static void virtio_net_pci_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_VIRTIO_NET; k->revision = VIRTIO_PCI_ABI_VERSION; k->class_id = PCI_CLASS_NETWORK_ETHERNET; + k->sriov_vf_user_creatable = true; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); device_class_set_props(dc, virtio_net_properties); vpciklass->realize = virtio_net_pci_realize; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 9534730bba..0c8fcc5627 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1955,6 +1955,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) uint8_t *config; uint32_t size; VirtIODevice *vdev = virtio_bus_get_device(bus); + int16_t res; /* * Virtio capabilities present without @@ -2100,6 +2101,14 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx, PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar); } + + res = pcie_sriov_pf_init_from_user_created_vfs(&proxy->pci_dev, + proxy->last_pcie_cap_offset, + errp); + if (res > 0) { + proxy->last_pcie_cap_offset += res; + virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV); + } } static void virtio_pci_device_unplugged(DeviceState *d) @@ -2187,7 +2196,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) if (pcie_port && pci_is_express(pci_dev)) { int pos; - uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; + proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; pos = pcie_endpoint_cap_init(pci_dev, 0); assert(pos > 0); @@ -2207,9 +2216,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3); if (proxy->flags & VIRTIO_PCI_FLAG_AER) { - pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset, + pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset, PCI_ERR_SIZEOF, NULL); - last_pcie_cap_offset += PCI_ERR_SIZEOF; + proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) { @@ -2234,9 +2243,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) } if (proxy->flags & VIRTIO_PCI_FLAG_ATS) { - pcie_ats_init(pci_dev, last_pcie_cap_offset, + pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset, proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED); - last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; + proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) { @@ -2263,6 +2272,7 @@ static void virtio_pci_exit(PCIDevice *pci_dev) bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) && !pci_bus_is_root(pci_get_bus(pci_dev)); + pcie_sriov_pf_exit(&proxy->pci_dev); msix_uninit_exclusive_bar(pci_dev); if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port && pci_is_express(pci_dev)) { diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 583a224163..397c261c3c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -872,6 +872,46 @@ static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, vq->used_elems[idx].ndescs = elem->ndescs; } +static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) +{ + unsigned int i, steps, max_steps; + + i = vq->used_idx % vq->vring.num; + steps = 0; + /* + * We shouldn't need to increase 'i' by more than the distance + * between used_idx and last_avail_idx. + */ + max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num; + + /* Search for element in vq->used_elems */ + while (steps <= max_steps) { + /* Found element, set length and mark as filled */ + if (vq->used_elems[i].index == elem->index) { + vq->used_elems[i].len = len; + vq->used_elems[i].in_order_filled = true; + break; + } + + i += vq->used_elems[i].ndescs; + steps += vq->used_elems[i].ndescs; + + if (i >= vq->vring.num) { + i -= vq->vring.num; + } + } + + /* + * We should be able to find a matching VirtQueueElement in + * used_elems. If we don't, this is an error. + */ + if (steps >= max_steps) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: %s cannot fill buffer id %u\n", + __func__, vq->vdev->name, elem->index); + } +} + static void virtqueue_packed_fill_desc(VirtQueue *vq, const VirtQueueElement *elem, unsigned int idx, @@ -922,7 +962,9 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, return; } - if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) { + virtqueue_ordered_fill(vq, elem, len); + } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { virtqueue_packed_fill(vq, elem, len, idx); } else { virtqueue_split_fill(vq, elem, len, idx); @@ -981,6 +1023,73 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count) } } +static void virtqueue_ordered_flush(VirtQueue *vq) +{ + unsigned int i = vq->used_idx % vq->vring.num; + unsigned int ndescs = 0; + uint16_t old = vq->used_idx; + uint16_t new; + bool packed; + VRingUsedElem uelem; + + packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED); + + if (packed) { + if (unlikely(!vq->vring.desc)) { + return; + } + } else if (unlikely(!vq->vring.used)) { + return; + } + + /* First expected in-order element isn't ready, nothing to do */ + if (!vq->used_elems[i].in_order_filled) { + return; + } + + /* Search for filled elements in-order */ + while (vq->used_elems[i].in_order_filled) { + /* + * First entry for packed VQs is written last so the guest + * doesn't see invalid descriptors. + */ + if (packed && i != vq->used_idx) { + virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false); + } else if (!packed) { + uelem.id = vq->used_elems[i].index; + uelem.len = vq->used_elems[i].len; + vring_used_write(vq, &uelem, i); + } + + vq->used_elems[i].in_order_filled = false; + ndescs += vq->used_elems[i].ndescs; + i += vq->used_elems[i].ndescs; + if (i >= vq->vring.num) { + i -= vq->vring.num; + } + } + + if (packed) { + virtqueue_packed_fill_desc(vq, &vq->used_elems[vq->used_idx], 0, true); + vq->used_idx += ndescs; + if (vq->used_idx >= vq->vring.num) { + vq->used_idx -= vq->vring.num; + vq->used_wrap_counter ^= 1; + vq->signalled_used_valid = false; + } + } else { + /* Make sure buffer is written before we update index. */ + smp_wmb(); + new = old + ndescs; + vring_used_idx_set(vq, new); + if (unlikely((int16_t)(new - vq->signalled_used) < + (uint16_t)(new - old))) { + vq->signalled_used_valid = false; + } + } + vq->inuse -= ndescs; +} + void virtqueue_flush(VirtQueue *vq, unsigned int count) { if (virtio_device_disabled(vq->vdev)) { @@ -988,7 +1097,9 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) return; } - if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) { + virtqueue_ordered_flush(vq); + } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { virtqueue_packed_flush(vq, count); } else { virtqueue_split_flush(vq, count); @@ -1505,7 +1616,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) { - unsigned int i, head, max; + unsigned int i, head, max, idx; VRingMemoryRegionCaches *caches; MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; @@ -1629,6 +1740,13 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) elem->in_sg[i] = iov[out_num + i]; } + if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) { + idx = (vq->last_avail_idx - 1) % vq->vring.num; + vq->used_elems[idx].index = elem->index; + vq->used_elems[idx].len = elem->len; + vq->used_elems[idx].ndescs = elem->ndescs; + } + vq->inuse++; trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); @@ -1762,6 +1880,13 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) elem->index = id; elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries; + + if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) { + vq->used_elems[vq->last_avail_idx].index = elem->index; + vq->used_elems[vq->last_avail_idx].len = elem->len; + vq->used_elems[vq->last_avail_idx].ndescs = elem->ndescs; + } + vq->last_avail_idx += elem->ndescs; vq->inuse += elem->ndescs; |