diff options
Diffstat (limited to '')
119 files changed, 2067 insertions, 715 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 23174b4ca7..6dacd6d004 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -525,6 +525,7 @@ R: Phil Dennis-Jordan <phil@philjordan.eu> W: https://wiki.qemu.org/Features/HVF S: Maintained F: accel/hvf/ +F: accel/stubs/hvf-stub.c F: include/system/hvf.h F: include/system/hvf_int.h diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c index 3fc65d6b23..8c387fda24 100644 --- a/accel/hvf/hvf-all.c +++ b/accel/hvf/hvf-all.c @@ -12,6 +12,7 @@ #include "qemu/error-report.h" #include "system/hvf.h" #include "system/hvf_int.h" +#include "hw/core/cpu.h" const char *hvf_return_string(hv_return_t ret) { diff --git a/accel/stubs/hvf-stub.c b/accel/stubs/hvf-stub.c new file mode 100644 index 0000000000..42eadc5ca9 --- /dev/null +++ b/accel/stubs/hvf-stub.c @@ -0,0 +1,12 @@ +/* + * HVF stubs for QEMU + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "system/hvf.h" + +bool hvf_allowed; diff --git a/accel/stubs/meson.build b/accel/stubs/meson.build index 91a2d21925..8ca1a4529e 100644 --- a/accel/stubs/meson.build +++ b/accel/stubs/meson.build @@ -2,5 +2,6 @@ system_stubs_ss = ss.source_set() system_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c')) system_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) system_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c')) +system_stubs_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c')) specific_ss.add_all(when: ['CONFIG_SYSTEM_ONLY'], if_true: system_stubs_ss) diff --git a/block.c b/block.c index 0ece805e41..f222e1a50a 100644 --- a/block.c +++ b/block.c @@ -3153,7 +3153,7 @@ bdrv_attach_child_common(BlockDriverState *child_bs, * stop new requests from coming in. This is fine, we don't care about the * old requests here, they are not for this child. If another place enters a * drain section for the same parent, but wants it to be fully quiesced, it - * will not run most of the the code in .drained_begin() again (which is not + * will not run most of the code in .drained_begin() again (which is not * a problem, we already did this), but it will still poll until the parent * is fully quiesced, so it will not be negatively affected either. */ diff --git a/block/nvme.c b/block/nvme.c index bbf7c23dcd..8df53ee4ca 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -18,6 +18,7 @@ #include "qobject/qstring.h" #include "qemu/defer-call.h" #include "qemu/error-report.h" +#include "qemu/host-pci-mmio.h" #include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/cutils.h" @@ -60,7 +61,7 @@ typedef struct { uint8_t *queue; uint64_t iova; /* Hardware MMIO register */ - volatile uint32_t *doorbell; + uint32_t *doorbell; } NVMeQueue; typedef struct { @@ -100,7 +101,7 @@ struct BDRVNVMeState { QEMUVFIOState *vfio; void *bar0_wo_map; /* Memory mapped registers */ - volatile struct { + struct { uint32_t sq_tail; uint32_t cq_head; } *doorbells; @@ -292,7 +293,7 @@ static void nvme_kick(NVMeQueuePair *q) assert(!(q->sq.tail & 0xFF00)); /* Fence the write to submission queue entry before notifying the device. */ smp_wmb(); - *q->sq.doorbell = cpu_to_le32(q->sq.tail); + host_pci_stl_le_p(q->sq.doorbell, q->sq.tail); q->inflight += q->need_kick; q->need_kick = 0; } @@ -441,7 +442,7 @@ static bool nvme_process_completion(NVMeQueuePair *q) if (progress) { /* Notify the device so it can post more completions. */ smp_mb_release(); - *q->cq.doorbell = cpu_to_le32(q->cq.head); + host_pci_stl_le_p(q->cq.doorbell, q->cq.head); nvme_wake_free_req_locked(q); } @@ -460,7 +461,7 @@ static void nvme_process_completion_bh(void *opaque) * so notify the device that it has space to fill in more completions now. */ smp_mb_release(); - *q->cq.doorbell = cpu_to_le32(q->cq.head); + host_pci_stl_le_p(q->cq.doorbell, q->cq.head); nvme_wake_free_req_locked(q); nvme_process_completion(q); @@ -749,9 +750,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, int ret; uint64_t cap; uint32_t ver; + uint32_t cc; uint64_t timeout_ms; uint64_t deadline, now; - volatile NvmeBar *regs = NULL; + NvmeBar *regs = NULL; qemu_co_mutex_init(&s->dma_map_lock); qemu_co_queue_init(&s->dma_flush_queue); @@ -779,7 +781,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, /* Perform initialize sequence as described in NVMe spec "7.6.1 * Initialization". */ - cap = le64_to_cpu(regs->cap); + cap = host_pci_ldq_le_p(®s->cap); trace_nvme_controller_capability_raw(cap); trace_nvme_controller_capability("Maximum Queue Entries Supported", 1 + NVME_CAP_MQES(cap)); @@ -805,16 +807,17 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, bs->bl.request_alignment = s->page_size; timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); - ver = le32_to_cpu(regs->vs); + ver = host_pci_ldl_le_p(®s->vs); trace_nvme_controller_spec_version(extract32(ver, 16, 16), extract32(ver, 8, 8), extract32(ver, 0, 8)); /* Reset device to get a clean state. */ - regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE); + cc = host_pci_ldl_le_p(®s->cc); + host_pci_stl_le_p(®s->cc, cc & 0xFE); /* Wait for CSTS.RDY = 0. */ deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS; - while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + while (NVME_CSTS_RDY(host_pci_ldl_le_p(®s->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to reset (%" PRId64 " ms)", @@ -843,19 +846,21 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, s->queues[INDEX_ADMIN] = q; s->queue_count = 1; QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000); - regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | - ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); - regs->asq = cpu_to_le64(q->sq.iova); - regs->acq = cpu_to_le64(q->cq.iova); + host_pci_stl_le_p(®s->aqa, + ((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | + ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); + host_pci_stq_le_p(®s->asq, q->sq.iova); + host_pci_stq_le_p(®s->acq, q->cq.iova); /* After setting up all control registers we can enable device now. */ - regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | - (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | - CC_EN_MASK); + host_pci_stl_le_p(®s->cc, + (ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | + (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | + CC_EN_MASK); /* Wait for CSTS.RDY = 1. */ now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); deadline = now + timeout_ms * SCALE_MS; - while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + while (!NVME_CSTS_RDY(host_pci_ldl_le_p(®s->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to start (%" PRId64 " ms)", diff --git a/bsd-user/main.c b/bsd-user/main.c index 603fc80ba7..7c0a059c3b 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -175,6 +175,9 @@ static void usage(void) "-strace log system calls\n" "-trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n" " specify tracing options\n" +#ifdef CONFIG_PLUGIN + "-plugin [file=]<file>[,<argname>=<argvalue>]\n" +#endif "\n" "Environment variables:\n" "QEMU_STRACE Print system calls and arguments similar to the\n" @@ -225,6 +228,8 @@ static void init_task_state(TaskState *ts) }; } +static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); + void gemu_log(const char *fmt, ...) { va_list ap; @@ -307,6 +312,7 @@ int main(int argc, char **argv) cpu_model = NULL; qemu_add_opts(&qemu_trace_opts); + qemu_plugin_add_opts(); optind = 1; for (;;) { @@ -399,6 +405,11 @@ int main(int argc, char **argv) do_strace = 1; } else if (!strcmp(r, "trace")) { trace_opt_parse(optarg); +#ifdef CONFIG_PLUGIN + } else if (!strcmp(r, "plugin")) { + r = argv[optind++]; + qemu_plugin_opt_parse(r, &plugins); +#endif } else if (!strcmp(r, "0")) { argv0 = argv[optind++]; } else { @@ -433,6 +444,7 @@ int main(int argc, char **argv) exit(1); } trace_init_file(); + qemu_plugin_load_list(&plugins, &error_fatal); /* Zero out regs */ memset(regs, 0, sizeof(struct target_pt_regs)); diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 05381441a9..1a1b423030 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -278,6 +278,13 @@ CPU implementation for a while before removing all support. System emulator machines ------------------------ +Versioned machine types (aarch64, arm, i386, m68k, ppc64, s390x, x86_64) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +In accordance with our versioned machine type deprecation policy, all machine +types with version |VER_MACHINE_DEPRECATION_VERSION|, or older, have been +deprecated. + Arm ``virt`` machine ``dtb-kaslr-seed`` property (since 7.1) '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst index 790a5e481c..063284d4f8 100644 --- a/docs/about/removed-features.rst +++ b/docs/about/removed-features.rst @@ -981,10 +981,12 @@ from Linux in 2021, and is not supported anymore by QEMU either. System emulator machines ------------------------ -Note: Versioned machine types that have been introduced in a QEMU version -that has initially been released more than 6 years before are considered -obsolete and will be removed without further notice in this document. -Please use newer machine types instead. +Versioned machine types (aarch64, arm, i386, m68k, ppc64, s390x, x86_64) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +In accordance with our versioned machine type deprecation policy, all machine +types with version |VER_MACHINE_DELETION_VERSION|, or older, have been +removed. ``s390-virtio`` (removed in 2.6) '''''''''''''''''''''''''''''''' diff --git a/docs/conf.py b/docs/conf.py index 7b5712e122..f892a6e1da 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -117,6 +117,32 @@ finally: else: version = release = "unknown version" +bits = version.split(".") + +major = int(bits[0]) +minor = int(bits[1]) +micro = int(bits[2]) + +# Check for a dev snapshot, so we can adjust to next +# predicted release version. +# +# This assumes we do 3 releases per year, so must bump +# major if minor == 2 +if micro >= 50: + micro = 0 + if minor == 2: + major += 1 + minor = 0 + else: + minor += 1 + +# These thresholds must match the constants +# MACHINE_VER_DELETION_MAJOR & MACHINE_VER_DEPRECATION_MAJOR +# defined in include/hw/boards.h and the introductory text in +# docs/about/deprecated.rst +ver_machine_deprecation_version = "%d.%d.0" % (major - 3, minor) +ver_machine_deletion_version = "%d.%d.0" % (major - 6, minor) + # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # @@ -145,7 +171,18 @@ suppress_warnings = ["ref.option"] # environment variable is not set is for the benefit of readthedocs # style document building; our Makefile always sets the variable. confdir = os.getenv('CONFDIR', "/etc/qemu") -rst_epilog = ".. |CONFDIR| replace:: ``" + confdir + "``\n" + +vars = { + "CONFDIR": confdir, + "VER_MACHINE_DEPRECATION_VERSION": ver_machine_deprecation_version, + "VER_MACHINE_DELETION_VERSION": ver_machine_deletion_version, +} + +rst_epilog = "".join([ + ".. |" + key + "| replace:: ``" + vars[key] + "``\n" + for key in vars.keys() +]) + # We slurp in the defs.rst.inc and literally include it into rst_epilog, # because Sphinx's include:: directive doesn't work with absolute paths # and there isn't any one single relative path that will work for all diff --git a/docs/devel/testing/functional.rst b/docs/devel/testing/functional.rst index 8030cb4299..9e56dd1b11 100644 --- a/docs/devel/testing/functional.rst +++ b/docs/devel/testing/functional.rst @@ -274,7 +274,7 @@ speed mode in the meson.build file, while the "quick" speed mode is fine for functional tests that can be run without downloading files. ``make check`` then only runs the quick functional tests along with the other quick tests from the other test suites. If you choose to -run only run ``make check-functional``, the "thorough" tests will be +run only ``make check-functional``, the "thorough" tests will be executed, too. And to run all functional tests along with the others, you can use something like:: diff --git a/docs/igd-assign.txt b/docs/igd-assign.txt index 3aed7956d5..af4e8391fc 100644 --- a/docs/igd-assign.txt +++ b/docs/igd-assign.txt @@ -47,6 +47,7 @@ Intel document [1] shows how to dump VBIOS to file. For UEFI Option ROM, see QEMU also provides a "Legacy" mode that implicitly enables full functionality on IGD, it is automatically enabled when +* IGD generation is 6 to 9 (Sandy Bridge to Comet Lake) * Machine type is i440fx * IGD is assigned to guest BDF 00:02.0 * ROM BAR or romfile is present @@ -101,7 +102,7 @@ digital formats work well. Options ======= -* x-igd-opregion=[on|*off*] +* x-igd-opregion=[*on*|off] Copy host IGD OpRegion and expose it to guest with fw_cfg * x-igd-lpc=[on|*off*] @@ -123,7 +124,7 @@ Examples * Adding IGD with OpRegion and LPC ID hack, but without VGA ranges (For UEFI guests) - -device vfio-pci,host=00:02.0,id=hostdev0,addr=2.0,x-igd-legacy-mode=off,x-igd-opregion=on,x-igd-lpc=on,romfile=efi_oprom.rom + -device vfio-pci,host=00:02.0,id=hostdev0,addr=2.0,x-igd-legacy-mode=off,x-igd-lpc=on,romfile=efi_oprom.rom Guest firmware @@ -156,6 +157,12 @@ fw_cfg requirements on the VM firmware: it's expected that this fw_cfg file is only relevant to a single PCI class VGA device with Intel vendor ID, appearing at PCI bus address 00:02.0. + Starting from Meteor Lake, IGD devices access stolen memory via its MMIO + BAR2 (LMEMBAR) and removed the BDSM register in config space. There is + no need for guest firmware to allocate data stolen memory in guest address + space and write it to BDSM register. Value of this fw_cfg file is 0 in + such case. + Upstream Seabios has OpRegion and BDSM (pre-Gen11 device only) support. However, the support is not accepted by upstream EDK2/OVMF. A recommended solution is to create a virtual OpRom with following DXE drivers: diff --git a/docs/system/gdb.rst b/docs/system/gdb.rst index 4228cb56bb..d50470b135 100644 --- a/docs/system/gdb.rst +++ b/docs/system/gdb.rst @@ -20,7 +20,7 @@ connection, use the ``-gdb dev`` option instead of ``-s``. See .. parsed-literal:: - |qemu_system| -s -S -kernel bzImage -hda rootdisk.img -append "root=/dev/hda" + |qemu_system| -s -S -kernel bzImage -drive file=rootdisk.img,format=raw -append "root=/dev/sda" QEMU will launch but will silently wait for gdb to connect. diff --git a/docs/system/linuxboot.rst b/docs/system/linuxboot.rst index 5db2e560dc..2328b4a73d 100644 --- a/docs/system/linuxboot.rst +++ b/docs/system/linuxboot.rst @@ -11,7 +11,7 @@ The syntax is: .. parsed-literal:: - |qemu_system| -kernel bzImage -hda rootdisk.img -append "root=/dev/hda" + |qemu_system| -kernel bzImage -drive file=rootdisk.img,format=raw -append "root=/dev/sda" Use ``-kernel`` to provide the Linux kernel image and ``-append`` to give the kernel command line arguments. The ``-initrd`` option can be @@ -23,8 +23,8 @@ virtual serial port and the QEMU monitor to the console with the .. parsed-literal:: - |qemu_system| -kernel bzImage -hda rootdisk.img \ - -append "root=/dev/hda console=ttyS0" -nographic + |qemu_system| -kernel bzImage -drive file=rootdisk.img,format=raw \ + -append "root=/dev/sda console=ttyS0" -nographic Use Ctrl-a c to switch between the serial console and the monitor (see :ref:`GUI_keys`). diff --git a/docs/system/target-mips.rst b/docs/system/target-mips.rst index 83239fb9df..9028c3b304 100644 --- a/docs/system/target-mips.rst +++ b/docs/system/target-mips.rst @@ -112,5 +112,5 @@ https://mipsdistros.mips.com/LinuxDistro/nanomips/kernels/v4.15.18-432-gb2eb9a8b Start system emulation of Malta board with nanoMIPS I7200 CPU:: qemu-system-mipsel -cpu I7200 -kernel <kernel_image_file> \ - -M malta -serial stdio -m <memory_size> -hda <disk_image_file> \ + -M malta -serial stdio -m <memory_size> -drive file=<disk_image_file>,format=raw \ -append "mem=256m@0x0 rw console=ttyS0 vga=cirrus vesa=0x111 root=/dev/sda" diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c index b67b4a92da..b7bc6e40a1 100644 --- a/hw/acpi/acpi-pci-hotplug-stub.c +++ b/hw/acpi/acpi-pci-hotplug-stub.c @@ -34,7 +34,7 @@ void acpi_pcihp_reset(AcpiPciHpState *s) { } -bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus) +bool acpi_pcihp_is_hotpluggable_bus(AcpiPciHpState *s, BusState *bus) { return true; } diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index d8adfea648..7a62f8d5bc 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -458,11 +458,11 @@ static void acpi_ged_initfn(Object *obj) * container for memory hotplug IO and expose it as GED sysbus * MMIO so that boards can map it separately. */ - memory_region_init(&s->container_memhp, OBJECT(dev), "memhp container", - MEMORY_HOTPLUG_IO_LEN); - sysbus_init_mmio(sbd, &s->container_memhp); - acpi_memory_hotplug_init(&s->container_memhp, OBJECT(dev), - &s->memhp_state, 0); + memory_region_init(&s->container_memhp, OBJECT(dev), "memhp container", + MEMORY_HOTPLUG_IO_LEN); + sysbus_init_mmio(sbd, &s->container_memhp); + acpi_memory_hotplug_init(&s->container_memhp, OBJECT(dev), + &s->memhp_state, 0); memory_region_init_io(&ged_st->regs, obj, &ged_regs_ops, ged_st, TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index c7a735bf64..967b67485e 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -41,15 +41,6 @@ #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" -//#define DEBUG - -#ifdef DEBUG -#define ICH9_DEBUG(fmt, ...) \ -do { printf("%s "fmt, __func__, ## __VA_ARGS__); } while (0) -#else -#define ICH9_DEBUG(fmt, ...) do { } while (0) -#endif - static void ich9_pm_update_sci_fn(ACPIREGS *regs) { ICH9LPCPMRegs *pm = container_of(regs, ICH9LPCPMRegs, acpi_regs); @@ -135,8 +126,6 @@ static const MemoryRegionOps ich9_smi_ops = { void ich9_pm_iospace_update(ICH9LPCPMRegs *pm, uint32_t pm_io_base) { - ICH9_DEBUG("to 0x%x\n", pm_io_base); - assert((pm_io_base & ICH9_PMIO_MASK) == 0); pm->pm_io_base = pm_io_base; @@ -570,7 +559,7 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); - return acpi_pcihp_is_hotpluggbale_bus(&lpc->pm.acpi_pci_hotplug, bus); + return acpi_pcihp_is_hotpluggable_bus(&lpc->pm.acpi_pci_hotplug, bus); } void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 5f79c9016b..aac90013d4 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -371,7 +371,7 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); } -bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus) +bool acpi_pcihp_is_hotpluggable_bus(AcpiPciHpState *s, BusState *bus) { Object *o = OBJECT(bus->parent); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index b16d45f03e..d98b80df6d 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -406,7 +406,7 @@ static bool piix4_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus) { PIIX4PMState *s = PIIX4_PM(hotplug_dev); - return acpi_pcihp_is_hotpluggbale_bus(&s->acpi_pci_hotplug, bus); + return acpi_pcihp_is_hotpluggable_bus(&s->acpi_pci_hotplug, bus); } static void piix4_pm_machine_ready(Notifier *n, void *opaque) diff --git a/hw/core/machine.c b/hw/core/machine.c index ed01798d37..b8ae155dfa 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -41,7 +41,7 @@ GlobalProperty hw_compat_10_0[] = {}; const size_t hw_compat_10_0_len = G_N_ELEMENTS(hw_compat_10_0); GlobalProperty hw_compat_9_2[] = { - {"arm-cpu", "backcompat-pauth-default-use-qarma5", "true"}, + { "arm-cpu", "backcompat-pauth-default-use-qarma5", "true"}, { "virtio-balloon-pci", "vectors", "0" }, { "virtio-balloon-pci-transitional", "vectors", "0" }, { "virtio-balloon-pci-non-transitional", "vectors", "0" }, @@ -58,12 +58,12 @@ GlobalProperty hw_compat_9_1[] = { const size_t hw_compat_9_1_len = G_N_ELEMENTS(hw_compat_9_1); GlobalProperty hw_compat_9_0[] = { - {"arm-cpu", "backcompat-cntfrq", "true" }, + { "arm-cpu", "backcompat-cntfrq", "true" }, { "scsi-hd", "migrate-emulated-scsi-request", "false" }, { "scsi-cd", "migrate-emulated-scsi-request", "false" }, - {"vfio-pci", "skip-vsc-check", "false" }, + { "vfio-pci", "skip-vsc-check", "false" }, { "virtio-pci", "x-pcie-pm-no-soft-reset", "off" }, - {"sd-card", "spec_version", "2" }, + { "sd-card", "spec_version", "2" }, }; const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0); diff --git a/hw/display/apple-gfx.m b/hw/display/apple-gfx.m index 2ff1c90df7..8dde1f138d 100644 --- a/hw/display/apple-gfx.m +++ b/hw/display/apple-gfx.m @@ -69,7 +69,7 @@ struct PGTask_s { mach_vm_address_t address; uint64_t len; /* - * All unique MemoryRegions for which a mapping has been created in in this + * All unique MemoryRegions for which a mapping has been created in this * task, and on which we have thus called memory_region_ref(). There are * usually very few regions of system RAM in total, so we expect this array * to be very short. Therefore, no need for sorting or fancy search diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 06c4e7e190..43d4c08a2e 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -18,6 +18,7 @@ #include "chardev/char-fe.h" #include "qapi/error.h" #include "migration/blocker.h" +#include "standard-headers/drm/drm_fourcc.h" typedef enum VhostUserGpuRequest { VHOST_USER_GPU_NONE = 0, @@ -249,7 +250,9 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg) case VHOST_USER_GPU_DMABUF_SCANOUT: { VhostUserGpuDMABUFScanout *m = &msg->payload.dmabuf_scanout; int fd = qemu_chr_fe_get_msgfd(&g->vhost_chr); - uint64_t modifier = 0; + uint32_t offset = 0; + uint32_t stride = m->fd_stride; + uint64_t modifier = DRM_FORMAT_MOD_INVALID; QemuDmaBuf *dmabuf; if (m->scanout_id >= g->parent_obj.conf.max_outputs) { @@ -282,10 +285,10 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg) } dmabuf = qemu_dmabuf_new(m->width, m->height, - m->fd_stride, 0, 0, + &offset, &stride, 0, 0, m->fd_width, m->fd_height, m->fd_drm_fourcc, modifier, - fd, false, m->fd_flags & + &fd, 1, false, m->fd_flags & VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP); dpy_gl_scanout_dmabuf(con, dmabuf); diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c index 0510577475..d804f321aa 100644 --- a/hw/display/virtio-gpu-udmabuf.c +++ b/hw/display/virtio-gpu-udmabuf.c @@ -25,6 +25,7 @@ #include <linux/memfd.h> #include "qemu/memfd.h" #include "standard-headers/linux/udmabuf.h" +#include "standard-headers/drm/drm_fourcc.h" static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res) { @@ -176,16 +177,19 @@ static VGPUDMABuf struct virtio_gpu_rect *r) { VGPUDMABuf *dmabuf; + uint32_t offset = 0; if (res->dmabuf_fd < 0) { return NULL; } dmabuf = g_new0(VGPUDMABuf, 1); - dmabuf->buf = qemu_dmabuf_new(r->width, r->height, fb->stride, + dmabuf->buf = qemu_dmabuf_new(r->width, r->height, + &offset, &fb->stride, r->x, r->y, fb->width, fb->height, qemu_pixman_to_drm_format(fb->format), - 0, res->dmabuf_fd, true, false); + DRM_FORMAT_MOD_INVALID, &res->dmabuf_fd, + 1, true, false); dmabuf->scanout_id = scanout_id; QTAILQ_INSERT_HEAD(&g->dmabuf.bufs, dmabuf, next); diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 0271cfd271..e4d0688dbf 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -13,6 +13,8 @@ #include "qapi/error.h" #include "system/address-spaces.h" #include "system/memory.h" +#include "exec/target_page.h" +#include "linux/kvm.h" #include "system/kvm.h" #include "qemu/bitops.h" #include "qemu/error-report.h" @@ -23,7 +25,6 @@ #include "hw/hyperv/hyperv.h" #include "qom/object.h" #include "target/i386/kvm/hyperv-proto.h" -#include "target/i386/cpu.h" #include "exec/target_page.h" struct SynICState { diff --git a/hw/hyperv/meson.build b/hw/hyperv/meson.build index d3d2668c71..d1cf781f04 100644 --- a/hw/hyperv/meson.build +++ b/hw/hyperv/meson.build @@ -1,5 +1,6 @@ -specific_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c')) -specific_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c')) -specific_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c')) -specific_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c')) -specific_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c'), if_false: files('hv-balloon-stub.c')) +system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c')) +system_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c')) +system_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c')) +system_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c')) +system_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c')) +system_ss.add(when: 'CONFIG_HV_BALLOON', if_false: files('hv-balloon-stub.c')) diff --git a/hw/hyperv/syndbg.c b/hw/hyperv/syndbg.c index ca291826a0..8b8a14750d 100644 --- a/hw/hyperv/syndbg.c +++ b/hw/hyperv/syndbg.c @@ -10,11 +10,11 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/sockets.h" +#include "qemu/units.h" #include "qapi/error.h" #include "migration/vmstate.h" #include "hw/qdev-properties.h" #include "hw/loader.h" -#include "cpu.h" #include "exec/target_page.h" #include "hw/hyperv/hyperv.h" #include "hw/hyperv/vmbus-bridge.h" @@ -184,12 +184,15 @@ static bool create_udp_pkt(HvSynDbg *syndbg, void *pkt, uint32_t pkt_len, return true; } +#define MSG_BUFSZ (4 * KiB) + static uint16_t handle_recv_msg(HvSynDbg *syndbg, uint64_t outgpa, uint32_t count, bool is_raw, uint32_t options, uint64_t timeout, uint32_t *retrieved_count) { uint16_t ret; - uint8_t data_buf[TARGET_PAGE_SIZE - UDP_PKT_HEADER_SIZE]; + g_assert(MSG_BUFSZ >= qemu_target_page_size()); + uint8_t data_buf[MSG_BUFSZ]; hwaddr out_len; void *out_data; ssize_t recv_byte_count; @@ -202,7 +205,7 @@ static uint16_t handle_recv_msg(HvSynDbg *syndbg, uint64_t outgpa, recv_byte_count = 0; } else { recv_byte_count = recv(syndbg->socket, data_buf, - MIN(sizeof(data_buf), count), MSG_WAITALL); + MIN(MSG_BUFSZ, count), MSG_WAITALL); if (recv_byte_count == -1) { return HV_STATUS_INVALID_PARAMETER; } diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c index b147ea06d8..961406cdd6 100644 --- a/hw/hyperv/vmbus.c +++ b/hw/hyperv/vmbus.c @@ -19,7 +19,7 @@ #include "hw/hyperv/vmbus.h" #include "hw/hyperv/vmbus-bridge.h" #include "hw/sysbus.h" -#include "cpu.h" +#include "exec/target_page.h" #include "trace.h" enum { diff --git a/hw/i2c/imx_i2c.c b/hw/i2c/imx_i2c.c index 91f84c2ad7..d26177c85d 100644 --- a/hw/i2c/imx_i2c.c +++ b/hw/i2c/imx_i2c.c @@ -79,13 +79,12 @@ static void imx_i2c_reset(DeviceState *dev) static inline void imx_i2c_raise_interrupt(IMXI2CState *s) { - /* - * raise an interrupt if the device is enabled and it is configured - * to generate some interrupts. - */ - if (imx_i2c_is_enabled(s) && imx_i2c_interrupt_is_enabled(s)) { + if (imx_i2c_is_enabled(s)) { s->i2sr |= I2SR_IIF; - qemu_irq_raise(s->irq); + + if (imx_i2c_interrupt_is_enabled(s)) { + qemu_irq_raise(s->irq); + } } } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 3fffa4a332..f40ad062f9 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -139,7 +139,7 @@ static void init_common_fadt_data(MachineState *ms, Object *o, /* * "ICH9-LPC" or "PIIX4_PM" has "smm-compat" property to keep the old * behavior for compatibility irrelevant to smm_enabled, which doesn't - * comforms to ACPI spec. + * conform to the ACPI spec. */ bool smm_enabled = object_property_get_bool(o, "smm-compat", NULL) ? true : x86_machine_is_smm_enabled(x86ms); @@ -589,8 +589,8 @@ void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus) } } -static bool build_append_notfication_callback(Aml *parent_scope, - const PCIBus *bus) +static bool build_append_notification_callback(Aml *parent_scope, + const PCIBus *bus) { Aml *method; PCIBus *sec; @@ -604,7 +604,7 @@ static bool build_append_notfication_callback(Aml *parent_scope, continue; } nr_notifiers = nr_notifiers + - build_append_notfication_callback(br_scope, sec); + build_append_notification_callback(br_scope, sec); /* * add new child scope to parent * and keep track of bus that have PCNT, @@ -1773,7 +1773,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, PCIBus *b = PCI_HOST_BRIDGE(pci_host)->bus; scope = aml_scope("\\_SB.PCI0"); - has_pcnt = build_append_notfication_callback(scope, b); + has_pcnt = build_append_notification_callback(scope, b); if (has_pcnt) { aml_append(dsdt, scope); } diff --git a/hw/i386/acpi-build.h b/hw/i386/acpi-build.h index 0dce155c8c..275ec058a1 100644 --- a/hw/i386/acpi-build.h +++ b/hw/i386/acpi-build.h @@ -5,7 +5,7 @@ extern const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio; -/* PCI Hot-plug registers bases. See docs/spec/acpi_pci_hotplug.txt */ +/* PCI Hot-plug registers' base. See docs/specs/acpi_pci_hotplug.rst */ #define ACPI_PCIHP_SEJ_BASE 0x8 #define ACPI_PCIHP_BNMR_BASE 0x10 diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 71afb45b63..304dffac32 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -182,7 +182,6 @@ static uint64_t ich9_cc_read(void *opaque, hwaddr addr, } /* IRQ routing */ -/* */ static void ich9_lpc_rout(uint8_t pirq_rout, int *pic_irq, int *pic_dis) { *pic_irq = pirq_rout & ICH9_LPC_PIRQ_ROUT_MASK; diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index cd896fc0ca..e9a74de6f4 100644 --- a/hw/net/e1000x_regs.h +++ b/hw/net/e1000x_regs.h @@ -900,7 +900,7 @@ struct e1000_context_desc { uint16_t tucse; /* TCP checksum end */ } tcp_fields; } upper_setup; - uint32_t cmd_and_length; /* */ + uint32_t cmd_and_length; union { uint32_t data; struct { diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index e8b4c64c5f..0aba47c71c 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -182,7 +182,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) /* * Resources defined for PXBs are composed of the following parts: - * 1. The resources the pci-brige/pcie-root-port need. + * 1. The resources the pci-bridge/pcie-root-port need. * 2. The resources the devices behind pxb need. */ crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), &crs_range_set, diff --git a/hw/pci/pci.c b/hw/pci/pci.c index fe38c4c028..352b3d12c8 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -54,13 +54,6 @@ #include "hw/xen/xen.h" #include "hw/i386/kvm/xen_evtchn.h" -//#define DEBUG_PCI -#ifdef DEBUG_PCI -# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__) -#else -# define PCI_DPRINTF(format, ...) do { } while (0) -#endif - bool pci_available = true; static char *pcibus_get_dev_path(DeviceState *dev); @@ -2439,12 +2432,12 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Only a valid rom will be patched. */ rom_magic = pci_get_word(ptr); if (rom_magic != 0xaa55) { - PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic); + trace_pci_bad_rom_magic(rom_magic, 0xaa55); return; } pcir_offset = pci_get_word(ptr + 0x18); if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) { - PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset); + trace_pci_bad_pcir_offset(pcir_offset); return; } @@ -2453,8 +2446,8 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) rom_vendor_id = pci_get_word(ptr + pcir_offset + 4); rom_device_id = pci_get_word(ptr + pcir_offset + 6); - PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile, - vendor_id, device_id, rom_vendor_id, rom_device_id); + trace_pci_rom_and_pci_ids(pdev->romfile, vendor_id, device_id, + rom_vendor_id, rom_device_id); checksum = ptr[6]; @@ -2462,7 +2455,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Patch vendor id and checksum (at offset 6 for etherboot roms). */ checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8); checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8); - PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + trace_pci_rom_checksum_change(ptr[6], checksum); ptr[6] = checksum; pci_set_word(ptr + pcir_offset + 4, vendor_id); } @@ -2471,7 +2464,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Patch device id and checksum (at offset 6 for etherboot roms). */ checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8); checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8); - PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + trace_pci_rom_checksum_change(ptr[6], checksum); ptr[6] = checksum; pci_set_word(ptr + pcir_offset + 6, device_id); } diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index 54f639e3d4..f3841a2656 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -188,7 +188,7 @@ int pcie_count_ds_ports(PCIBus *bus) return dsp_count; } -static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler, +static bool pcie_slot_is_hotpluggable_bus(HotplugHandler *plug_handler, BusState *bus) { PCIESlot *s = PCIE_SLOT(bus->parent); @@ -221,7 +221,7 @@ static void pcie_slot_class_init(ObjectClass *oc, const void *data) hc->plug = pcie_cap_slot_plug_cb; hc->unplug = pcie_cap_slot_unplug_cb; hc->unplug_request = pcie_cap_slot_unplug_request_cb; - hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus; + hc->is_hotpluggable_bus = pcie_slot_is_hotpluggable_bus; } static const TypeInfo pcie_slot_type_info = { diff --git a/hw/pci/trace-events b/hw/pci/trace-events index 6a9968962f..02c80d3ec8 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -6,6 +6,10 @@ pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, u pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s" +pci_bad_rom_magic(uint16_t bad_rom_magic, uint16_t good_rom_magic) "Bad ROM magic number: %04"PRIX16". Should be: %04"PRIX16 +pci_bad_pcir_offset(uint16_t pcir_offset) "Bad PCIR offset 0x%"PRIx16" or signature" +pci_rom_and_pci_ids(char *romfile, uint16_t vendor_id, uint16_t device_id, uint16_t rom_vendor_id, uint16_t rom_device_id) "%s: ROM ID %04"PRIx16":%04"PRIx16" | PCI ID %04"PRIx16":%04"PRIx16 +pci_rom_checksum_change(uint8_t old_checksum, uint8_t new_checksum) "ROM checksum changed from %02"PRIx8" to %02"PRIx8 # pci_host.c pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x" diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 1207c08d8d..785c0a0197 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -74,10 +74,10 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, unsigned int irq, Error **errp) { int fd; - size_t argsz; + int ret; IOHandler *fd_read; EventNotifier *notifier; - g_autofree struct vfio_irq_info *irq_info = NULL; + struct vfio_irq_info irq_info; VFIODevice *vdev = &vapdev->vdev; switch (irq) { @@ -96,14 +96,15 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, return false; } - argsz = sizeof(*irq_info); - irq_info = g_malloc0(argsz); - irq_info->index = irq; - irq_info->argsz = argsz; + ret = vfio_device_get_irq_info(vdev, irq, &irq_info); + + if (ret < 0) { + error_setg_errno(errp, -ret, "vfio: Error getting irq info"); + return false; + } - if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, - irq_info) < 0 || irq_info->count < 1) { - error_setg_errno(errp, errno, "vfio: Error getting irq info"); + if (irq_info.count < 1) { + error_setg(errp, "vfio: Error getting irq info, count=0"); return false; } diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index fde0c3fbef..cea9d6e005 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -376,8 +376,8 @@ static bool vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, Error **errp) { VFIODevice *vdev = &vcdev->vdev; - g_autofree struct vfio_irq_info *irq_info = NULL; - size_t argsz; + struct vfio_irq_info irq_info; + int ret; int fd; EventNotifier *notifier; IOHandler *fd_read; @@ -406,13 +406,15 @@ static bool vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, return false; } - argsz = sizeof(*irq_info); - irq_info = g_malloc0(argsz); - irq_info->index = irq; - irq_info->argsz = argsz; - if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, - irq_info) < 0 || irq_info->count < 1) { - error_setg_errno(errp, errno, "vfio: Error getting irq info"); + ret = vfio_device_get_irq_info(vdev, irq, &irq_info); + + if (ret < 0) { + error_setg_errno(errp, -ret, "vfio: Error getting irq info"); + return false; + } + + if (irq_info.count < 1) { + error_setg(errp, "vfio: Error getting irq info, count=0"); return false; } @@ -502,7 +504,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) vcdev->io_region_offset = info->offset; vcdev->io_region = g_malloc0(info->size); - g_free(info); /* check for the optional async command region */ ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW, @@ -515,7 +516,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) } vcdev->async_cmd_region_offset = info->offset; vcdev->async_cmd_region = g_malloc0(info->size); - g_free(info); } ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW, @@ -528,7 +528,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) } vcdev->schib_region_offset = info->offset; vcdev->schib_region = g_malloc(info->size); - g_free(info); } ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW, @@ -542,7 +541,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) } vcdev->crw_region_offset = info->offset; vcdev->crw_region = g_malloc(info->size); - g_free(info); } return true; @@ -552,7 +550,6 @@ out_err: g_free(vcdev->schib_region); g_free(vcdev->async_cmd_region); g_free(vcdev->io_region); - g_free(info); return false; } diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 09340fd97a..1c6ca94b60 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -85,12 +85,12 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) + IOMMUTLBEntry *iotlb, bool unmap_all) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); g_assert(vioc->dma_unmap); - return vioc->dma_unmap(bcontainer, iova, size, iotlb); + return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all); } bool vfio_container_add_section_window(VFIOContainerBase *bcontainer, @@ -198,11 +198,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT; - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { - return -errno; - } - - return 0; + return vbasedev->io_ops->device_feature(vbasedev, feature); } static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer, diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 77ff56b43f..a9f0dbaec4 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -119,12 +119,9 @@ unmap_exit: return ret; } -/* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ -static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) +static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) { const VFIOContainer *container = container_of(bcontainer, VFIOContainer, bcontainer); @@ -181,6 +178,34 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, return 0; } +/* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb, bool unmap_all) +{ + int ret; + + if (unmap_all) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + Int128 llsize = int128_rshift(int128_2_64(), 1); + + ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize), + iotlb); + + if (ret == 0) { + ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize), + int128_get64(llsize), iotlb); + } + + } else { + ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb); + } + + return ret; +} + static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly) { @@ -205,7 +230,7 @@ static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, */ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || (errno == EBUSY && - vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 && + vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, false) == 0 && ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { return 0; } @@ -511,16 +536,10 @@ static bool vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) return true; } -static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, - Error **errp) +static bool vfio_container_attach_discard_disable(VFIOContainer *container, + VFIOGroup *group, Error **errp) { - VFIOContainer *container; - VFIOContainerBase *bcontainer; - int ret, fd; - VFIOAddressSpace *space; - VFIOIOMMUClass *vioc; - - space = vfio_address_space_get(as); + int ret; /* * VFIO is currently incompatible with discarding of RAM insofar as the @@ -553,97 +572,118 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, * details once we know which type of IOMMU we are using. */ + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { + error_report("vfio: error disconnecting group %d from" + " container", group->groupid); + } + } + return !ret; +} + +static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group, + Error **errp) +{ + if (!vfio_container_attach_discard_disable(container, group, errp)) { + return false; + } + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + vfio_group_add_kvm_device(group); + return true; +} + +static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group) +{ + QLIST_REMOVE(group, container_next); + group->container = NULL; + vfio_group_del_kvm_device(group); + vfio_ram_block_discard_disable(container, false); +} + +static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, + Error **errp) +{ + VFIOContainer *container; + VFIOContainerBase *bcontainer; + int ret, fd = -1; + VFIOAddressSpace *space; + VFIOIOMMUClass *vioc = NULL; + bool new_container = false; + bool group_was_added = false; + + space = vfio_address_space_get(as); + QLIST_FOREACH(bcontainer, &space->containers, next) { container = container_of(bcontainer, VFIOContainer, bcontainer); if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { - error_setg_errno(errp, -ret, - "Cannot set discarding of RAM broken"); - if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, - &container->fd)) { - error_report("vfio: error disconnecting group %d from" - " container", group->groupid); - } - return false; - } - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - vfio_group_add_kvm_device(group); - return true; + return vfio_container_group_add(container, group, errp); } } fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); if (fd < 0) { - goto put_space_exit; + goto fail; } ret = ioctl(fd, VFIO_GET_API_VERSION); if (ret != VFIO_API_VERSION) { error_setg(errp, "supported vfio version: %d, " "reported version: %d", VFIO_API_VERSION, ret); - goto close_fd_exit; + goto fail; } container = vfio_create_container(fd, group, errp); if (!container) { - goto close_fd_exit; + goto fail; } + new_container = true; bcontainer = &container->bcontainer; if (!vfio_cpr_register_container(bcontainer, errp)) { - goto free_container_exit; - } - - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { - error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - goto unregister_container_exit; + goto fail; } vioc = VFIO_IOMMU_GET_CLASS(bcontainer); assert(vioc->setup); if (!vioc->setup(bcontainer, errp)) { - goto enable_discards_exit; + goto fail; } - vfio_group_add_kvm_device(group); - vfio_address_space_insert(space, bcontainer); - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); + if (!vfio_container_group_add(container, group, errp)) { + goto fail; + } + group_was_added = true; if (!vfio_listener_register(bcontainer, errp)) { - goto listener_release_exit; + goto fail; } bcontainer->initialized = true; return true; -listener_release_exit: - QLIST_REMOVE(group, container_next); - vfio_group_del_kvm_device(group); + +fail: vfio_listener_unregister(bcontainer); - if (vioc->release) { + + if (group_was_added) { + vfio_container_group_del(container, group); + } + if (vioc && vioc->release) { vioc->release(bcontainer); } - -enable_discards_exit: - vfio_ram_block_discard_disable(container, false); - -unregister_container_exit: - vfio_cpr_unregister_container(bcontainer); - -free_container_exit: - object_unref(container); - -close_fd_exit: - close(fd); - -put_space_exit: + if (new_container) { + vfio_cpr_unregister_container(bcontainer); + object_unref(container); + } + if (fd >= 0) { + close(fd); + } vfio_address_space_put(space); return false; @@ -811,18 +851,14 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, } } + vfio_device_prepare(vbasedev, &group->container->bcontainer, info); + vbasedev->fd = fd; vbasedev->group = group; QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); - vbasedev->num_irqs = info->num_irqs; - vbasedev->num_regions = info->num_regions; - vbasedev->flags = info->flags; - trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs); - vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); - return true; } @@ -875,7 +911,6 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, int groupid = vfio_device_get_groupid(vbasedev, errp); VFIODevice *vbasedev_iter; VFIOGroup *group; - VFIOContainerBase *bcontainer; if (groupid < 0) { return false; @@ -904,11 +939,6 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, goto device_put_exit; } - bcontainer = &group->container->bcontainer; - vbasedev->bcontainer = bcontainer; - QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); - return true; device_put_exit: @@ -922,10 +952,10 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev) { VFIOGroup *group = vbasedev->group; - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; trace_vfio_device_detach(vbasedev->name, group->groupid); + + vfio_device_unprepare(vbasedev); + object_unref(vbasedev->hiod); vfio_device_put(vbasedev); vfio_group_put(group); diff --git a/hw/vfio/device.c b/hw/vfio/device.c index d625a7c4db..9fba2c7272 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -82,7 +82,7 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index) .count = 0, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } void vfio_device_irq_unmask(VFIODevice *vbasedev, int index) @@ -95,7 +95,7 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } void vfio_device_irq_mask(VFIODevice *vbasedev, int index) @@ -108,7 +108,7 @@ void vfio_device_irq_mask(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } static inline const char *action_to_str(int action) @@ -167,7 +167,7 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex pfd = (int32_t *)&irq_set->data; *pfd = fd; - if (!ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + if (!vbasedev->io_ops->set_irqs(vbasedev, irq_set)) { return true; } @@ -185,10 +185,28 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex return false; } +int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, + struct vfio_irq_info *info) +{ + memset(info, 0, sizeof(*info)); + + info->argsz = sizeof(*info); + info->index = index; + + return vbasedev->io_ops->get_irq_info(vbasedev, info); +} + int vfio_device_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info) { size_t argsz = sizeof(struct vfio_region_info); + int ret; + + /* check cache */ + if (vbasedev->reginfo[index] != NULL) { + *info = vbasedev->reginfo[index]; + return 0; + } *info = g_malloc0(argsz); @@ -196,10 +214,11 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index, retry: (*info)->argsz = argsz; - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + ret = vbasedev->io_ops->get_region_info(vbasedev, *info); + if (ret != 0) { g_free(*info); *info = NULL; - return -errno; + return ret; } if ((*info)->argsz > argsz) { @@ -209,6 +228,9 @@ retry: goto retry; } + /* fill cache */ + vbasedev->reginfo[index] = *info; + return 0; } @@ -227,7 +249,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE); if (!hdr) { - g_free(*info); continue; } @@ -239,8 +260,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, if (cap_type->type == type && cap_type->subtype == subtype) { return 0; } - - g_free(*info); } *info = NULL; @@ -249,7 +268,7 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) { - g_autofree struct vfio_region_info *info = NULL; + struct vfio_region_info *info = NULL; bool ret = false; if (!vfio_device_get_region_info(vbasedev, region, &info)) { @@ -305,11 +324,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) vbasedev->fd = fd; } +static VFIODeviceIOOps vfio_device_io_ops_ioctl; + void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard) { vbasedev->type = type; vbasedev->ops = ops; + vbasedev->io_ops = &vfio_device_io_ops_ioctl; vbasedev->dev = dev; vbasedev->fd = -1; @@ -370,27 +392,35 @@ bool vfio_device_hiod_create_and_realize(VFIODevice *vbasedev, VFIODevice *vfio_get_vfio_device(Object *obj) { if (object_dynamic_cast(obj, TYPE_VFIO_PCI)) { - return &VFIO_PCI(obj)->vbasedev; + return &VFIO_PCI_BASE(obj)->vbasedev; } else { return NULL; } } -bool vfio_device_attach(char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) +bool vfio_device_attach_by_iommu_type(const char *iommu_type, char *name, + VFIODevice *vbasedev, AddressSpace *as, + Error **errp) { const VFIOIOMMUClass *ops = - VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); - - if (vbasedev->iommufd) { - ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - } + VFIO_IOMMU_CLASS(object_class_by_name(iommu_type)); assert(ops); return ops->attach_device(name, vbasedev, as, errp); } +bool vfio_device_attach(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) +{ + const char *iommu_type = vbasedev->iommufd ? + TYPE_VFIO_IOMMU_IOMMUFD : + TYPE_VFIO_IOMMU_LEGACY; + + return vfio_device_attach_by_iommu_type(iommu_type, name, vbasedev, + as, errp); +} + void vfio_device_detach(VFIODevice *vbasedev) { if (!vbasedev->bcontainer) { @@ -398,3 +428,120 @@ void vfio_device_detach(VFIODevice *vbasedev) } VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev); } + +void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, + struct vfio_device_info *info) +{ + vbasedev->num_irqs = info->num_irqs; + vbasedev->num_regions = info->num_regions; + vbasedev->flags = info->flags; + vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); + + vbasedev->bcontainer = bcontainer; + QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); + + QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + + vbasedev->reginfo = g_new0(struct vfio_region_info *, + vbasedev->num_regions); +} + +void vfio_device_unprepare(VFIODevice *vbasedev) +{ + int i; + + for (i = 0; i < vbasedev->num_regions; i++) { + g_free(vbasedev->reginfo[i]); + } + g_free(vbasedev->reginfo); + vbasedev->reginfo = NULL; + + QLIST_REMOVE(vbasedev, container_next); + QLIST_REMOVE(vbasedev, global_next); + vbasedev->bcontainer = NULL; +} + +/* + * Traditional ioctl() based io + */ + +static int vfio_device_io_device_feature(VFIODevice *vbasedev, + struct vfio_device_feature *feature) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_get_irq_info(VFIODevice *vbasedev, + struct vfio_irq_info *info) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_set_irqs(VFIODevice *vbasedev, + struct vfio_irq_set *irqs) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irqs); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_region_read(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t size, void *data) +{ + struct vfio_region_info *info; + int ret; + + ret = vfio_device_get_region_info(vbasedev, index, &info); + if (ret != 0) { + return ret; + } + + ret = pread(vbasedev->fd, data, size, info->offset + off); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_region_write(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t size, void *data) +{ + struct vfio_region_info *info; + int ret; + + ret = vfio_device_get_region_info(vbasedev, index, &info); + if (ret != 0) { + return ret; + } + + ret = pwrite(vbasedev->fd, data, size, info->offset + off); + + return ret < 0 ? -errno : ret; +} + +static VFIODeviceIOOps vfio_device_io_ops_ioctl = { + .device_feature = vfio_device_io_device_feature, + .get_region_info = vfio_device_io_get_region_info, + .get_irq_info = vfio_device_io_get_irq_info, + .set_irqs = vfio_device_io_set_irqs, + .region_read = vfio_device_io_region_read, + .region_write = vfio_device_io_region_write, +}; diff --git a/hw/vfio/display.c b/hw/vfio/display.c index f3e6581f15..9c6f5aa265 100644 --- a/hw/vfio/display.c +++ b/hw/vfio/display.c @@ -213,6 +213,7 @@ static VFIODMABuf *vfio_display_get_dmabuf(VFIOPCIDevice *vdev, struct vfio_device_gfx_plane_info plane; VFIODMABuf *dmabuf; int fd, ret; + uint32_t offset = 0; memset(&plane, 0, sizeof(plane)); plane.argsz = sizeof(plane); @@ -245,10 +246,10 @@ static VFIODMABuf *vfio_display_get_dmabuf(VFIOPCIDevice *vdev, dmabuf = g_new0(VFIODMABuf, 1); dmabuf->dmabuf_id = plane.dmabuf_id; - dmabuf->buf = qemu_dmabuf_new(plane.width, plane.height, - plane.stride, 0, 0, plane.width, + dmabuf->buf = qemu_dmabuf_new(plane.width, plane.height, &offset, + &plane.stride, 0, 0, plane.width, plane.height, plane.drm_format, - plane.drm_format_mod, fd, false, false); + plane.drm_format_mod, &fd, 1, false, false); if (plane_type == DRM_PLANE_TYPE_CURSOR) { vfio_display_update_cursor(dmabuf, &plane); diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index d7e4728fdc..e7952d15a0 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -103,6 +103,7 @@ static int igd_gen(VFIOPCIDevice *vdev) /* * Unfortunately, Intel changes it's specification quite often. This makes * it impossible to use a suitable default value for unknown devices. + * Return -1 for not applying any generation-specific quirks. */ return -1; } @@ -182,16 +183,13 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); - return true; } -static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) +static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev, + struct vfio_region_info **opregion, + Error **errp) { - g_autofree struct vfio_region_info *opregion = NULL; int ret; /* Hotplugging is not supported for opregion access */ @@ -202,17 +200,13 @@ static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) ret = vfio_device_get_region_info_type(&vdev->vbasedev, VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion); if (ret) { error_setg_errno(errp, -ret, "Device does not supports IGD OpRegion feature"); return false; } - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { - return false; - } - return true; } @@ -355,8 +349,8 @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev, static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) { - g_autofree struct vfio_region_info *host = NULL; - g_autofree struct vfio_region_info *lpc = NULL; + struct vfio_region_info *host = NULL; + struct vfio_region_info *lpc = NULL; PCIDevice *lpc_bridge; int ret; @@ -419,6 +413,44 @@ static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) return true; } +static bool vfio_pci_igd_override_gms(int gen, uint32_t gms, uint32_t *gmch) +{ + bool ret = false; + + if (gen == -1) { + error_report("x-igd-gms is not supported on this device"); + } else if (gen < 8) { + if (gms <= 0x10) { + *gmch &= ~(IGD_GMCH_GEN6_GMS_MASK << IGD_GMCH_GEN6_GMS_SHIFT); + *gmch |= gms << IGD_GMCH_GEN6_GMS_SHIFT; + ret = true; + } else { + error_report(QERR_INVALID_PARAMETER_VALUE, "x-igd-gms", "0~0x10"); + } + } else if (gen == 8) { + if (gms <= 0x40) { + *gmch &= ~(IGD_GMCH_GEN8_GMS_MASK << IGD_GMCH_GEN8_GMS_SHIFT); + *gmch |= gms << IGD_GMCH_GEN8_GMS_SHIFT; + ret = true; + } else { + error_report(QERR_INVALID_PARAMETER_VALUE, "x-igd-gms", "0~0x40"); + } + } else { + /* 0x0 to 0x40: 32MB increments starting at 0MB */ + /* 0xf0 to 0xfe: 4MB increments starting at 4MB */ + if ((gms <= 0x40) || (gms >= 0xf0 && gms <= 0xfe)) { + *gmch &= ~(IGD_GMCH_GEN8_GMS_MASK << IGD_GMCH_GEN8_GMS_SHIFT); + *gmch |= gms << IGD_GMCH_GEN8_GMS_SHIFT; + ret = true; + } else { + error_report(QERR_INVALID_PARAMETER_VALUE, + "x-igd-gms", "0~0x40 or 0xf0~0xfe"); + } + } + + return ret; +} + #define IGD_GGC_MMIO_OFFSET 0x108040 #define IGD_BDSM_MMIO_OFFSET 0x1080C0 @@ -428,41 +460,35 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) VFIOConfigMirrorQuirk *ggc_mirror, *bdsm_mirror; int gen; - /* - * This must be an Intel VGA device at address 00:02.0 for us to even - * consider enabling legacy mode. Some driver have dependencies on the PCI - * bus address. - */ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || !vfio_is_vga(vdev) || nr != 0) { return; } - /* - * Only on IGD devices of gen 11 and above, the BDSM register is mirrored - * into MMIO space and read from MMIO space by the Windows driver. - */ + /* Only on IGD Gen6-12 device needs quirks in BAR 0 */ gen = igd_gen(vdev); if (gen < 6) { return; } - ggc_quirk = vfio_quirk_alloc(1); - ggc_mirror = ggc_quirk->data = g_malloc0(sizeof(*ggc_mirror)); - ggc_mirror->mem = ggc_quirk->mem; - ggc_mirror->vdev = vdev; - ggc_mirror->bar = nr; - ggc_mirror->offset = IGD_GGC_MMIO_OFFSET; - ggc_mirror->config_offset = IGD_GMCH; - - memory_region_init_io(ggc_mirror->mem, OBJECT(vdev), - &vfio_generic_mirror_quirk, ggc_mirror, - "vfio-igd-ggc-quirk", 2); - memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, - ggc_mirror->offset, ggc_mirror->mem, - 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, ggc_quirk, next); + if (vdev->igd_gms) { + ggc_quirk = vfio_quirk_alloc(1); + ggc_mirror = ggc_quirk->data = g_malloc0(sizeof(*ggc_mirror)); + ggc_mirror->mem = ggc_quirk->mem; + ggc_mirror->vdev = vdev; + ggc_mirror->bar = nr; + ggc_mirror->offset = IGD_GGC_MMIO_OFFSET; + ggc_mirror->config_offset = IGD_GMCH; + + memory_region_init_io(ggc_mirror->mem, OBJECT(vdev), + &vfio_generic_mirror_quirk, ggc_mirror, + "vfio-igd-ggc-quirk", 2); + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, + ggc_mirror->offset, ggc_mirror->mem, + 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, ggc_quirk, next); + } bdsm_quirk = vfio_quirk_alloc(1); bdsm_mirror = bdsm_quirk->data = g_malloc0(sizeof(*bdsm_mirror)); @@ -484,44 +510,37 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) { + struct vfio_region_info *opregion = NULL; int ret, gen; - uint64_t gms_size; + uint64_t gms_size = 0; uint64_t *bdsm_size; uint32_t gmch; bool legacy_mode_enabled = false; Error *err = NULL; - /* - * This must be an Intel VGA device at address 00:02.0 for us to even - * consider enabling legacy mode. The vBIOS has dependencies on the - * PCI bus address. - */ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || !vfio_is_vga(vdev)) { return true; } - /* - * IGD is not a standard, they like to change their specs often. We - * only attempt to support back to SandBridge and we hope that newer - * devices maintain compatibility with generation 8. - */ - gen = igd_gen(vdev); - if (gen == -1) { - error_report("IGD device %s is unsupported in legacy mode, " - "try SandyBridge or newer", vdev->vbasedev.name); + /* IGD device always comes with OpRegion */ + if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) { return true; } + info_report("OpRegion detected on Intel display %x.", vdev->device_id); + gen = igd_gen(vdev); gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); /* * For backward compatibility, enable legacy mode when + * - Device geneation is 6 to 9 (including both) * - Machine type is i440fx (pc_piix) * - IGD device is at guest BDF 00:02.0 * - Not manually disabled by x-igd-legacy-mode=off */ if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) && + (gen >= 6 && gen <= 9) && !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") && (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev), 0, PCI_DEVFN(0x2, 0)))) { @@ -532,7 +551,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) * - OpRegion * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host */ - g_autofree struct vfio_region_info *rom = NULL; + struct vfio_region_info *rom = NULL; legacy_mode_enabled = true; info_report("IGD legacy mode enabled, " @@ -566,13 +585,15 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) vdev->features |= VFIO_FEATURE_ENABLE_IGD_LPC; } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) { error_setg(&err, - "Machine is not i440fx or assigned BDF is not 00:02.0"); + "Machine is not i440fx, assigned BDF is not 00:02.0, " + "or device %04x (gen %d) doesn't support legacy mode", + vdev->device_id, gen); goto error; } /* Setup OpRegion access */ if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && - !vfio_pci_igd_setup_opregion(vdev, errp)) { + !vfio_pci_igd_opregion_init(vdev, opregion, errp)) { goto error; } @@ -580,7 +601,15 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_LPC) && !vfio_pci_igd_setup_lpc_bridge(vdev, errp)) { goto error; - } + } + + /* + * ASLS (OpRegion address) is read-only, emulated + * It contains HPA, guest firmware need to reprogram it with GPA. + */ + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); /* * Allow user to override dsm size using x-igd-gms option, in multiples of @@ -588,56 +617,44 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) * set from DVMT Pre-Allocated option in host BIOS. */ if (vdev->igd_gms) { - if (gen < 8) { - if (vdev->igd_gms <= 0x10) { - gmch &= ~(IGD_GMCH_GEN6_GMS_MASK << IGD_GMCH_GEN6_GMS_SHIFT); - gmch |= vdev->igd_gms << IGD_GMCH_GEN6_GMS_SHIFT; - } else { - error_report(QERR_INVALID_PARAMETER_VALUE, - "x-igd-gms", "0~0x10"); - } - } else { - if (vdev->igd_gms <= 0x40) { - gmch &= ~(IGD_GMCH_GEN8_GMS_MASK << IGD_GMCH_GEN8_GMS_SHIFT); - gmch |= vdev->igd_gms << IGD_GMCH_GEN8_GMS_SHIFT; - } else { - error_report(QERR_INVALID_PARAMETER_VALUE, - "x-igd-gms", "0~0x40"); - } + if (!vfio_pci_igd_override_gms(gen, vdev->igd_gms, &gmch)) { + return false; } + + /* GMCH is read-only, emulated */ + pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); + pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0); + pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); } - gms_size = igd_stolen_memory_size(gen, gmch); + if (gen > 0) { + gms_size = igd_stolen_memory_size(gen, gmch); + + /* BDSM is read-write, emulated. BIOS needs to be able to write it */ + if (gen < 11) { + pci_set_long(vdev->pdev.config + IGD_BDSM, 0); + pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); + pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0); + } else { + pci_set_quad(vdev->pdev.config + IGD_BDSM_GEN11, 0); + pci_set_quad(vdev->pdev.wmask + IGD_BDSM_GEN11, ~0); + pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); + } + } /* * Request reserved memory for stolen memory via fw_cfg. VM firmware * must allocate a 1MB aligned reserved memory region below 4GB with - * the requested size (in bytes) for use by the Intel PCI class VGA - * device at VM address 00:02.0. The base address of this reserved - * memory region must be written to the device BDSM register at PCI - * config offset 0x5C. + * the requested size (in bytes) for use by the IGD device. The base + * address of this reserved memory region must be written to the + * device BDSM register. + * For newer device without BDSM register, this fw_cfg item is 0. */ bdsm_size = g_malloc(sizeof(*bdsm_size)); *bdsm_size = cpu_to_le64(gms_size); fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size", bdsm_size, sizeof(*bdsm_size)); - /* GMCH is read-only, emulated */ - pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); - pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0); - pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); - - /* BDSM is read-write, emulated. The BIOS needs to be able to write it */ - if (gen < 11) { - pci_set_long(vdev->pdev.config + IGD_BDSM, 0); - pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); - pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0); - } else { - pci_set_quad(vdev->pdev.config + IGD_BDSM_GEN11, 0); - pci_set_quad(vdev->pdev.wmask + IGD_BDSM_GEN11, ~0); - pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); - } - trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); return true; @@ -664,8 +681,27 @@ error: */ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp) { + struct vfio_region_info *opregion = NULL; + int gen; + + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || + !vfio_is_vga(vdev)) { + return true; + } + + /* FIXME: Cherryview is Gen8, but don't support GVT-g */ + gen = igd_gen(vdev); + if (gen != 8 && gen != 9) { + return true; + } + + if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) { + /* Should never reach here, KVMGT always emulates OpRegion */ + return false; + } + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && - !vfio_pci_igd_setup_opregion(vdev, errp)) { + !vfio_pci_igd_opregion_init(vdev, opregion, errp)) { return false; } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 232c06dd15..af1c7ab10a 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -46,11 +46,28 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) + IOMMUTLBEntry *iotlb, bool unmap_all) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + /* unmap in halves */ + if (unmap_all) { + Int128 llsize = int128_rshift(int128_2_64(), 1); + int ret; + + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + 0, int128_get64(llsize)); + + if (ret == 0) { + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + int128_get64(llsize), + int128_get64(llsize)); + } + + return ret; + } + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ return iommufd_backend_unmap_dma(container->be, container->ioas_id, iova, size); @@ -588,14 +605,7 @@ found_container: iommufd_cdev_ram_block_discard_disable(false); } - vbasedev->group = 0; - vbasedev->num_irqs = dev_info.num_irqs; - vbasedev->num_regions = dev_info.num_regions; - vbasedev->flags = dev_info.flags; - vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - vbasedev->bcontainer = bcontainer; - QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + vfio_device_prepare(vbasedev, bcontainer, &dev_info); trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, vbasedev->num_regions, vbasedev->flags); @@ -622,9 +632,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; + vfio_device_unprepare(vbasedev); if (!vbasedev->ram_block_discard_allowed) { iommufd_cdev_ram_block_discard_disable(false); diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index 6f77e18a7a..bfacb3d8d9 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -172,7 +172,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) } } else { ret = vfio_container_dma_unmap(bcontainer, iova, - iotlb->addr_mask + 1, iotlb); + iotlb->addr_mask + 1, iotlb, false); if (ret) { error_setg(&local_err, "vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " @@ -201,7 +201,7 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, int ret; /* Unmap with a single call. */ - ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL); + ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL, false); if (ret) { error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, strerror(-ret)); @@ -411,6 +411,32 @@ static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, return true; } +static void vfio_listener_begin(MemoryListener *listener) +{ + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); + void (*listener_begin)(VFIOContainerBase *bcontainer); + + listener_begin = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin; + + if (listener_begin) { + listener_begin(bcontainer); + } +} + +static void vfio_listener_commit(MemoryListener *listener) +{ + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); + void (*listener_commit)(VFIOContainerBase *bcontainer); + + listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin; + + if (listener_commit) { + listener_commit(bcontainer); + } +} + static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp) { /* @@ -634,21 +660,14 @@ static void vfio_listener_region_del(MemoryListener *listener, } if (try_unmap) { + bool unmap_all = false; + if (int128_eq(llsize, int128_2_64())) { - /* The unmap ioctl doesn't accept a full 64-bit span. */ - llsize = int128_rshift(llsize, 1); - ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); - if (ret) { - error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", - bcontainer, iova, int128_get64(llsize), ret, - strerror(-ret)); - } - iova += int128_get64(llsize); + unmap_all = true; + llsize = int128_zero(); } - ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); + ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize), + NULL, unmap_all); if (ret) { error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx") = %d (%s)", @@ -801,13 +820,17 @@ static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + int ret; + if (!vbasedev->dirty_tracking) { continue; } - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + ret = vbasedev->io_ops->device_feature(vbasedev, feature); + + if (ret != 0) { warn_report("%s: Failed to stop DMA logging, err %d (%s)", - vbasedev->name, -errno, strerror(errno)); + vbasedev->name, -ret, strerror(-ret)); } vbasedev->dirty_tracking = false; } @@ -908,10 +931,9 @@ static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, continue; } - ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + ret = vbasedev->io_ops->device_feature(vbasedev, feature); if (ret) { - ret = -errno; - error_setg_errno(errp, errno, "%s: Failed to start DMA logging", + error_setg_errno(errp, -ret, "%s: Failed to start DMA logging", vbasedev->name); goto out; } @@ -1165,6 +1187,8 @@ static void vfio_listener_log_sync(MemoryListener *listener, static const MemoryListener vfio_memory_listener = { .name = "vfio", + .begin = vfio_listener_begin, + .commit = vfio_listener_commit, .region_add = vfio_listener_region_add, .region_del = vfio_listener_region_del, .log_global_start = vfio_listener_log_global_start, diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 6908bcc0d3..a1bfdfe375 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -241,7 +241,7 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route) static void vfio_intx_routing_notifier(PCIDevice *pdev) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); PCIINTxRoute route; if (vdev->interrupt != VFIO_INT_INTx) { @@ -381,7 +381,7 @@ static void vfio_msi_interrupt(void *opaque) static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) { g_autofree struct vfio_irq_set *irq_set = NULL; - int ret = 0, argsz; + int argsz; int32_t *fd; argsz = sizeof(*irq_set) + sizeof(*fd); @@ -396,9 +396,7 @@ static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) fd = (int32_t *)&irq_set->data; *fd = -1; - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); - - return ret; + return vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); } static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) @@ -455,7 +453,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) fds[i] = fd; } - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); g_free(irq_set); @@ -516,7 +514,7 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIOMSIVector *vector; int ret; bool resizing = !!(vdev->nr_vectors < nr + 1); @@ -581,7 +579,8 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); ret = vfio_enable_vectors(vdev, true); if (ret) { - error_report("vfio: failed to enable vectors, %d", ret); + error_report("vfio: failed to enable vectors, %s", + strerror(-ret)); } } else { Error *err = NULL; @@ -621,7 +620,7 @@ static int vfio_msix_vector_use(PCIDevice *pdev, static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIOMSIVector *vector = &vdev->msi_vectors[nr]; trace_vfio_msix_vector_release(vdev->vbasedev.name, nr); @@ -695,7 +694,8 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) if (vdev->nr_vectors) { ret = vfio_enable_vectors(vdev, true); if (ret) { - error_report("vfio: failed to enable vectors, %d", ret); + error_report("vfio: failed to enable vectors, %s", + strerror(-ret)); } } else { /* @@ -712,7 +712,8 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) */ ret = vfio_enable_msix_no_vec(vdev); if (ret) { - error_report("vfio: failed to enable MSI-X, %d", ret); + error_report("vfio: failed to enable MSI-X, %s", + strerror(-ret)); } } @@ -765,7 +766,8 @@ retry: ret = vfio_enable_vectors(vdev, false); if (ret) { if (ret < 0) { - error_report("vfio: Error: Failed to setup MSI fds: %m"); + error_report("vfio: Error: Failed to setup MSI fds: %s", + strerror(-ret)); } else { error_report("vfio: Error: Failed to enable %d " "MSI vectors, retry with %d", vdev->nr_vectors, ret); @@ -881,18 +883,22 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { - g_autofree struct vfio_region_info *reg_info = NULL; + VFIODevice *vbasedev = &vdev->vbasedev; + struct vfio_region_info *reg_info = NULL; uint64_t size; off_t off = 0; ssize_t bytes; + int ret; - if (vfio_device_get_region_info(&vdev->vbasedev, - VFIO_PCI_ROM_REGION_INDEX, ®_info)) { - error_report("vfio: Error getting ROM info: %m"); + ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_ROM_REGION_INDEX, + ®_info); + + if (ret != 0) { + error_report("vfio: Error getting ROM info: %s", strerror(-ret)); return; } - trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size, + trace_vfio_pci_load_rom(vbasedev->name, (unsigned long)reg_info->size, (unsigned long)reg_info->offset, (unsigned long)reg_info->flags); @@ -901,8 +907,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) if (!vdev->rom_size) { vdev->rom_read_failed = true; - error_report("vfio-pci: Cannot read device rom at " - "%s", vdev->vbasedev.name); + error_report("vfio-pci: Cannot read device rom at %s", vbasedev->name); error_printf("Device option ROM contents are probably invalid " "(check dmesg).\nSkip option ROM probe with rombar=0, " "or load from file with romfile=\n"); @@ -913,18 +918,22 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) memset(vdev->rom, 0xff, size); while (size) { - bytes = pread(vdev->vbasedev.fd, vdev->rom + off, - size, vdev->rom_offset + off); + bytes = vbasedev->io_ops->region_read(vbasedev, + VFIO_PCI_ROM_REGION_INDEX, + off, size, vdev->rom + off); + if (bytes == 0) { break; } else if (bytes > 0) { off += bytes; size -= bytes; } else { - if (errno == EINTR || errno == EAGAIN) { + if (bytes == -EINTR || bytes == -EAGAIN) { continue; } - error_report("vfio: Error reading device ROM: %m"); + error_report("vfio: Error reading device ROM: %s", + strreaderror(bytes)); + break; } } @@ -960,6 +969,24 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) } } +/* "Raw" read of underlying config space. */ +static int vfio_pci_config_space_read(VFIOPCIDevice *vdev, off_t offset, + uint32_t size, void *data) +{ + return vdev->vbasedev.io_ops->region_read(&vdev->vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, + offset, size, data); +} + +/* "Raw" write of underlying config space. */ +static int vfio_pci_config_space_write(VFIOPCIDevice *vdev, off_t offset, + uint32_t size, void *data) +{ + return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, + offset, size, data); +} + static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) { VFIOPCIDevice *vdev = opaque; @@ -1012,10 +1039,9 @@ static const MemoryRegionOps vfio_rom_ops = { static void vfio_pci_size_rom(VFIOPCIDevice *vdev) { + VFIODevice *vbasedev = &vdev->vbasedev; uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); - off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; char *name; - int fd = vdev->vbasedev.fd; if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ @@ -1032,11 +1058,12 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) * Use the same size ROM BAR as the physical device. The contents * will get filled in later when the guest tries to read it. */ - if (pread(fd, &orig, 4, offset) != 4 || - pwrite(fd, &size, 4, offset) != 4 || - pread(fd, &size, 4, offset) != 4 || - pwrite(fd, &orig, 4, offset) != 4) { - error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name); + if (vfio_pci_config_space_read(vdev, PCI_ROM_ADDRESS, 4, &orig) != 4 || + vfio_pci_config_space_write(vdev, PCI_ROM_ADDRESS, 4, &size) != 4 || + vfio_pci_config_space_read(vdev, PCI_ROM_ADDRESS, 4, &size) != 4 || + vfio_pci_config_space_write(vdev, PCI_ROM_ADDRESS, 4, &orig) != 4) { + + error_report("%s(%s) ROM access failed", __func__, vbasedev->name); return; } @@ -1169,7 +1196,7 @@ static const MemoryRegionOps vfio_vga_ops = { */ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIORegion *region = &vdev->bars[bar].region; MemoryRegion *mmap_mr, *region_mr, *base_mr; PCIIORegion *r; @@ -1215,7 +1242,8 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) */ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); @@ -1228,12 +1256,12 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { ssize_t ret; - ret = pread(vdev->vbasedev.fd, &phys_val, len, - vdev->config_offset + addr); + ret = vfio_pci_config_space_read(vdev, addr, len, &phys_val); if (ret != len) { - error_report("%s(%s, 0x%x, 0x%x) failed: %m", - __func__, vdev->vbasedev.name, addr, len); - return -errno; + error_report("%s(%s, 0x%x, 0x%x) failed: %s", + __func__, vbasedev->name, addr, len, + strreaderror(ret)); + return -1; } phys_val = le32_to_cpu(phys_val); } @@ -1248,16 +1276,19 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; uint32_t val_le = cpu_to_le32(val); + int ret; trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len); /* Write everything to VFIO, let it filter out what we can't write */ - if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) - != len) { - error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->vbasedev.name, addr, val, len); + ret = vfio_pci_config_space_write(vdev, addr, len, &val_le); + if (ret != len) { + error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %s", + __func__, vbasedev->name, addr, val, len, + strwriteerror(ret)); } /* MSI/MSI-X Enabling/Disabling */ @@ -1345,9 +1376,11 @@ static bool vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) int ret, entries; Error *err = NULL; - if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), - vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { - error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_CAP_FLAGS, + sizeof(ctrl), &ctrl); + if (ret != sizeof(ctrl)) { + error_setg(errp, "failed reading MSI PCI_CAP_FLAGS: %s", + strreaderror(ret)); return false; } ctrl = le16_to_cpu(ctrl); @@ -1554,31 +1587,35 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) uint8_t pos; uint16_t ctrl; uint32_t table, pba; - int ret, fd = vdev->vbasedev.fd; - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info), - .index = VFIO_PCI_MSIX_IRQ_INDEX }; + struct vfio_irq_info irq_info; VFIOMSIXInfo *msix; + int ret; pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); if (!pos) { return true; } - if (pread(fd, &ctrl, sizeof(ctrl), - vdev->config_offset + pos + PCI_MSIX_FLAGS) != sizeof(ctrl)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_MSIX_FLAGS, + sizeof(ctrl), &ctrl); + if (ret != sizeof(ctrl)) { + error_setg(errp, "failed to read PCI MSIX FLAGS: %s", + strreaderror(ret)); return false; } - if (pread(fd, &table, sizeof(table), - vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_MSIX_TABLE, + sizeof(table), &table); + if (ret != sizeof(table)) { + error_setg(errp, "failed to read PCI MSIX TABLE: %s", + strreaderror(ret)); return false; } - if (pread(fd, &pba, sizeof(pba), - vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX PBA"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_MSIX_PBA, + sizeof(pba), &pba); + if (ret != sizeof(pba)) { + error_setg(errp, "failed to read PCI MSIX PBA: %s", strreaderror(ret)); return false; } @@ -1593,7 +1630,8 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, + &irq_info); if (ret < 0) { error_setg_errno(errp, -ret, "failed to get MSI-X irq info"); g_free(msix); @@ -1737,10 +1775,10 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) } /* Determine what type of BAR this is for registration */ - ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), - vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); + ret = vfio_pci_config_space_read(vdev, PCI_BASE_ADDRESS_0 + (4 * nr), + sizeof(pci_bar), &pci_bar); if (ret != sizeof(pci_bar)) { - error_report("vfio: Failed to read BAR %d (%m)", nr); + error_report("vfio: Failed to read BAR %d: %s", nr, strreaderror(ret)); return; } @@ -2443,21 +2481,23 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) void vfio_pci_post_reset(VFIOPCIDevice *vdev) { + VFIODevice *vbasedev = &vdev->vbasedev; Error *err = NULL; - int nr; + int ret, nr; if (!vfio_intx_enable(vdev, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) { - off_t addr = vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr); + off_t addr = PCI_BASE_ADDRESS_0 + (4 * nr); uint32_t val = 0; uint32_t len = sizeof(val); - if (pwrite(vdev->vbasedev.fd, &val, len, addr) != len) { - error_report("%s(%s) reset bar %d failed: %m", __func__, - vdev->vbasedev.name, nr); + ret = vfio_pci_config_space_write(vdev, addr, len, &val); + if (ret != len) { + error_report("%s(%s) reset bar %d failed: %s", __func__, + vbasedev->name, nr, strwriteerror(ret)); } } @@ -2670,7 +2710,7 @@ static VFIODeviceOps vfio_pci_ops = { bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) { VFIODevice *vbasedev = &vdev->vbasedev; - g_autofree struct vfio_region_info *reg_info = NULL; + struct vfio_region_info *reg_info = NULL; int ret; ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, ®_info); @@ -2735,8 +2775,8 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) { VFIODevice *vbasedev = &vdev->vbasedev; - g_autofree struct vfio_region_info *reg_info = NULL; - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; + struct vfio_region_info *reg_info = NULL; + struct vfio_irq_info irq_info; int i, ret = -1; /* Sanity check device */ @@ -2797,12 +2837,10 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) } } - irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ - trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); + trace_vfio_populate_device_get_irq_info_failure(strerror(-ret)); } else if (irq_info.count == 1) { vdev->pci_aer = true; } else { @@ -2911,17 +2949,18 @@ static void vfio_req_notifier_handler(void *opaque) static void vfio_register_req_notifier(VFIOPCIDevice *vdev) { - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info), - .index = VFIO_PCI_REQ_IRQ_INDEX }; + struct vfio_irq_info irq_info; Error *err = NULL; int32_t fd; + int ret; if (!(vdev->features & VFIO_FEATURE_ENABLE_REQ)) { return; } - if (ioctl(vdev->vbasedev.fd, - VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0 || irq_info.count < 1) { + ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, + &irq_info); + if (ret < 0 || irq_info.count < 1) { return; } @@ -3090,11 +3129,12 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) static void vfio_realize(PCIDevice *pdev, Error **errp) { ERRP_GUARD(); - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; int i, ret; char uuid[UUID_STR_LEN]; g_autofree char *name = NULL; + uint32_t config_space_size; if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -3149,13 +3189,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto error; } + config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); + /* Get a copy of config space */ - ret = pread(vbasedev->fd, vdev->pdev.config, - MIN(pci_config_size(&vdev->pdev), vdev->config_size), - vdev->config_offset); - if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { - ret = ret < 0 ? -errno : -EFAULT; - error_setg_errno(errp, -ret, "failed to read device config space"); + ret = vfio_pci_config_space_read(vdev, 0, config_space_size, + vdev->pdev.config); + if (ret < (int)config_space_size) { + ret = ret < 0 ? -ret : EFAULT; + error_setg_errno(errp, ret, "failed to read device config space"); goto error; } @@ -3259,7 +3300,7 @@ error: static void vfio_instance_finalize(Object *obj) { - VFIOPCIDevice *vdev = VFIO_PCI(obj); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); vfio_display_finalize(vdev); vfio_bars_finalize(vdev); @@ -3277,7 +3318,7 @@ static void vfio_instance_finalize(Object *obj) static void vfio_exitfn(PCIDevice *pdev) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; vfio_unregister_req_notifier(vdev); @@ -3301,7 +3342,7 @@ static void vfio_exitfn(PCIDevice *pdev) static void vfio_pci_reset(DeviceState *dev) { - VFIOPCIDevice *vdev = VFIO_PCI(dev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); trace_vfio_pci_reset(vdev->vbasedev.name); @@ -3341,7 +3382,7 @@ post_reset: static void vfio_instance_init(Object *obj) { PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIOPCIDevice *vdev = VFIO_PCI(obj); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); VFIODevice *vbasedev = &vdev->vbasedev; device_add_bootindex_property(obj, &vdev->bootindex, @@ -3362,6 +3403,31 @@ static void vfio_instance_init(Object *obj) pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } +static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); + + dc->desc = "VFIO PCI base device"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pdc->exit = vfio_exitfn; + pdc->config_read = vfio_pci_read_config; + pdc->config_write = vfio_pci_write_config; +} + +static const TypeInfo vfio_pci_base_dev_info = { + .name = TYPE_VFIO_PCI_BASE, + .parent = TYPE_PCI_DEVICE, + .instance_size = 0, + .abstract = true, + .class_init = vfio_pci_base_dev_class_init, + .interfaces = (const InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + }, +}; + static PropertyInfo vfio_pci_migration_multifd_transfer_prop; static const Property vfio_pci_dev_properties[] = { @@ -3385,7 +3451,7 @@ static const Property vfio_pci_dev_properties[] = { DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_REQ_BIT, true), DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, - VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), + VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, true), DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_IGD_LPC_BIT, false), DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice, @@ -3432,7 +3498,8 @@ static const Property vfio_pci_dev_properties[] = { #ifdef CONFIG_IOMMUFD static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) { - vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + vfio_device_set_fd(&vdev->vbasedev, str, errp); } #endif @@ -3447,11 +3514,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); #endif dc->desc = "VFIO-based PCI device assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); pdc->realize = vfio_realize; - pdc->exit = vfio_exitfn; - pdc->config_read = vfio_pci_read_config; - pdc->config_write = vfio_pci_write_config; object_class_property_set_description(klass, /* 1.3 */ "host", @@ -3576,16 +3639,11 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) static const TypeInfo vfio_pci_dev_info = { .name = TYPE_VFIO_PCI, - .parent = TYPE_PCI_DEVICE, + .parent = TYPE_VFIO_PCI_BASE, .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_dev_class_init, .instance_init = vfio_instance_init, .instance_finalize = vfio_instance_finalize, - .interfaces = (const InterfaceInfo[]) { - { INTERFACE_PCIE_DEVICE }, - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { } - }, }; static const Property vfio_pci_dev_nohotplug_properties[] = { @@ -3632,6 +3690,7 @@ static void register_vfio_pci_dev_type(void) vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto; vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true; + type_register_static(&vfio_pci_base_dev_info); type_register_static(&vfio_pci_dev_info); type_register_static(&vfio_pci_nohotplug_dev_info); } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index f835b1dbc2..5ce0fb916f 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -118,8 +118,16 @@ typedef struct VFIOMSIXInfo { bool noresize; } VFIOMSIXInfo; +/* + * TYPE_VFIO_PCI_BASE is an abstract type used to share code + * between VFIO implementations that use a kernel driver + * with those that use user sockets. + */ +#define TYPE_VFIO_PCI_BASE "vfio-pci-base" +OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_BASE) + #define TYPE_VFIO_PCI "vfio-pci" -OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI) +/* TYPE_VFIO_PCI shares struct VFIOPCIDevice. */ struct VFIOPCIDevice { PCIDevice pdev; diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index ffb3681607..9a21f2e50a 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -474,10 +474,10 @@ static bool vfio_populate_device(VFIODevice *vbasedev, Error **errp) QSIMPLEQ_INIT(&vdev->pending_intp_queue); for (i = 0; i < vbasedev->num_irqs; i++) { - struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + struct vfio_irq_info irq; + + ret = vfio_device_get_irq_info(vbasedev, i, &irq); - irq.index = i; - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); if (ret) { error_setg_errno(errp, -ret, "failed to get device irq info"); goto irq_err; diff --git a/hw/vfio/region.c b/hw/vfio/region.c index 04bf9eb098..34752c3f65 100644 --- a/hw/vfio/region.c +++ b/hw/vfio/region.c @@ -45,6 +45,7 @@ void vfio_region_write(void *opaque, hwaddr addr, uint32_t dword; uint64_t qword; } buf; + int ret; switch (size) { case 1: @@ -64,11 +65,13 @@ void vfio_region_write(void *opaque, hwaddr addr, break; } - if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + ret = vbasedev->io_ops->region_write(vbasedev, region->nr, + addr, size, &buf); + if (ret != size) { error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ",%d) failed: %m", + ",%d) failed: %s", __func__, vbasedev->name, region->nr, - addr, data, size); + addr, data, size, strwriteerror(ret)); } trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); @@ -96,11 +99,13 @@ uint64_t vfio_region_read(void *opaque, uint64_t qword; } buf; uint64_t data = 0; + int ret; - if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { - error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", + ret = vbasedev->io_ops->region_read(vbasedev, region->nr, addr, size, &buf); + if (ret != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %s", __func__, vbasedev->name, region->nr, - addr, size); + addr, size, strreaderror(ret)); return (uint64_t)-1; } switch (size) { @@ -182,7 +187,7 @@ static int vfio_setup_region_sparse_mmaps(VFIORegion *region, int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, int index, const char *name) { - g_autofree struct vfio_region_info *info = NULL; + struct vfio_region_info *info = NULL; int ret; ret = vfio_device_get_region_info(vbasedev, index, &info); diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c index 9a677e8ed7..78e0bc8f64 100644 --- a/hw/xen/xen-hvm-common.c +++ b/hw/xen/xen-hvm-common.c @@ -711,7 +711,7 @@ static int xen_map_ioreq_server(XenIOState *state) /* * If we fail to map the shared page with xenforeignmemory_map_resource() * or if we're using buffered ioreqs, we need xen_get_ioreq_server_info() - * to provide the the addresses to map the shared page and/or to get the + * to provide the addresses to map the shared page and/or to get the * event-channel port for buffered ioreqs. */ if (state->shared_page == NULL || state->has_bufioreq) { diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c index 698b5c53ed..e31d379702 100644 --- a/hw/xen/xen-mapcache.c +++ b/hw/xen/xen-mapcache.c @@ -75,7 +75,8 @@ typedef struct MapCache { } MapCache; static MapCache *mapcache; -static MapCache *mapcache_grants; +static MapCache *mapcache_grants_ro; +static MapCache *mapcache_grants_rw; static xengnttab_handle *xen_region_gnttabdev; static inline void mapcache_lock(MapCache *mc) @@ -176,9 +177,12 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) * Grant mappings must use XC_PAGE_SIZE granularity since we can't * map anything beyond the number of pages granted to us. */ - mapcache_grants = xen_map_cache_init_single(f, opaque, - XC_PAGE_SHIFT, - max_mcache_size); + mapcache_grants_ro = xen_map_cache_init_single(f, opaque, + XC_PAGE_SHIFT, + max_mcache_size); + mapcache_grants_rw = xen_map_cache_init_single(f, opaque, + XC_PAGE_SHIFT, + max_mcache_size); setrlimit(RLIMIT_AS, &rlimit_as); } @@ -376,12 +380,12 @@ tryagain: entry = &mc->entry[address_index % mc->nr_buckets]; - while (entry && (lock || entry->lock) && entry->vaddr_base && - (entry->paddr_index != address_index || entry->size != cache_size || + while (entry && (!entry->vaddr_base || + entry->paddr_index != address_index || entry->size != cache_size || !test_bits(address_offset >> XC_PAGE_SHIFT, test_bit_size >> XC_PAGE_SHIFT, entry->valid_mapping))) { - if (!free_entry && !entry->lock) { + if (!free_entry && (!entry->lock || !entry->vaddr_base)) { free_entry = entry; free_pentry = pentry; } @@ -456,9 +460,13 @@ uint8_t *xen_map_cache(MemoryRegion *mr, bool is_write) { bool grant = xen_mr_is_grants(mr); - MapCache *mc = grant ? mapcache_grants : mapcache; + MapCache *mc = mapcache; uint8_t *p; + if (grant) { + mc = is_write ? mapcache_grants_rw : mapcache_grants_ro; + } + if (grant && !lock) { /* * Grants are only supported via address_space_map(). Anything @@ -523,7 +531,10 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr) addr = xen_ram_addr_from_mapcache_single(mapcache, ptr); if (addr == RAM_ADDR_INVALID) { - addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr); + addr = xen_ram_addr_from_mapcache_single(mapcache_grants_ro, ptr); + } + if (addr == RAM_ADDR_INVALID) { + addr = xen_ram_addr_from_mapcache_single(mapcache_grants_rw, ptr); } return addr; @@ -626,7 +637,8 @@ static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t *buffer) static void xen_invalidate_map_cache_entry_all(uint8_t *buffer) { xen_invalidate_map_cache_entry_single(mapcache, buffer); - xen_invalidate_map_cache_entry_single(mapcache_grants, buffer); + xen_invalidate_map_cache_entry_single(mapcache_grants_ro, buffer); + xen_invalidate_map_cache_entry_single(mapcache_grants_rw, buffer); } static void xen_invalidate_map_cache_entry_bh(void *opaque) diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index dab1e7e580..a68485547d 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -182,7 +182,7 @@ void list_cpus(void); * @host_pc: the host pc within the translation * @data: output data * - * Attempt to load the the unwind state for a host pc occurring in + * Attempt to load the unwind state for a host pc occurring in * translated code. If @host_pc is not in translated code, the * function returns false; otherwise @data is loaded. * This is the same unwind info as given to restore_state_to_opc. diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index ac21a95913..a97904bada 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -58,7 +58,7 @@ typedef struct AcpiPciHpState { void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root, MemoryRegion *io, uint16_t io_base); -bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus); +bool acpi_pcihp_is_hotpluggable_bus(AcpiPciHpState *s, BusState *bus); void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, diff --git a/include/hw/boards.h b/include/hw/boards.h index 765dc8dd35..a7b1fcffae 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -636,7 +636,11 @@ struct MachineState { /* * How many years/major releases for each phase * of the life cycle. Assumes use of versioning - * scheme where major is bumped each year + * scheme where major is bumped each year. + * + * These values must match the ver_machine_deprecation_version + * and ver_machine_deletion_version logic in docs/conf.py and + * the text in docs/about/deprecated.rst */ #define MACHINE_VER_DELETION_MAJOR 6 #define MACHINE_VER_DEPRECATION_MAJOR 3 @@ -650,11 +654,42 @@ struct MachineState { " years old are subject to deletion after " \ stringify(MACHINE_VER_DELETION_MAJOR) " years" -#define _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor) \ +#define _MACHINE_VER_IS_CURRENT_EXPIRED(cutoff, major, minor) \ (((QEMU_VERSION_MAJOR - major) > cutoff) || \ (((QEMU_VERSION_MAJOR - major) == cutoff) && \ (QEMU_VERSION_MINOR - minor) >= 0)) +#define _MACHINE_VER_IS_NEXT_MINOR_EXPIRED(cutoff, major, minor) \ + (((QEMU_VERSION_MAJOR - major) > cutoff) || \ + (((QEMU_VERSION_MAJOR - major) == cutoff) && \ + ((QEMU_VERSION_MINOR + 1) - minor) >= 0)) + +#define _MACHINE_VER_IS_NEXT_MAJOR_EXPIRED(cutoff, major, minor) \ + ((((QEMU_VERSION_MAJOR + 1) - major) > cutoff) || \ + ((((QEMU_VERSION_MAJOR + 1) - major) == cutoff) && \ + (0 - minor) >= 0)) + +/* + * - The first check applies to formal releases + * - The second check applies to dev snapshots / release candidates + * where the next major version is the same. + * e.g. 9.0.50, 9.1.50, 9.0.90, 9.1.90 + * - The third check applies to dev snapshots / release candidates + * where the next major version will change. + * e.g. 9.2.50, 9.2.90 + * + * NB: this assumes we do 3 minor releases per year, before bumping major, + * and dev snapshots / release candidates are numbered with micro >= 50 + * If this ever changes the logic below will need modifying.... + */ +#define _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor) \ + ((QEMU_VERSION_MICRO < 50 && \ + _MACHINE_VER_IS_CURRENT_EXPIRED(cutoff, major, minor)) || \ + (QEMU_VERSION_MICRO >= 50 && QEMU_VERSION_MINOR < 2 && \ + _MACHINE_VER_IS_NEXT_MINOR_EXPIRED(cutoff, major, minor)) || \ + (QEMU_VERSION_MICRO >= 50 && QEMU_VERSION_MINOR == 2 && \ + _MACHINE_VER_IS_NEXT_MAJOR_EXPIRED(cutoff, major, minor))) + #define _MACHINE_VER_IS_EXPIRED2(cutoff, major, minor) \ _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor) #define _MACHINE_VER_IS_EXPIRED3(cutoff, major, minor, micro) \ @@ -719,28 +754,11 @@ struct MachineState { * suitable period of time has passed, it will cause * execution of the method to return, avoiding registration * of the machine - * - * The new deprecation and deletion policy for versioned - * machine types was introduced in QEMU 9.1.0. - * - * Under the new policy a number of old machine types (any - * prior to 2.12) would be liable for immediate deletion - * which would be a violation of our historical deprecation - * and removal policy - * - * Thus deletions are temporarily gated on existance of - * the env variable "QEMU_DELETE_MACHINES" / QEMU version - * number >= 10.1.0. This gate can be deleted in the 10.1.0 - * dev cycle */ #define MACHINE_VER_DELETION(...) \ do { \ if (MACHINE_VER_SHOULD_DELETE(__VA_ARGS__)) { \ - if (getenv("QEMU_DELETE_MACHINES") || \ - QEMU_VERSION_MAJOR > 10 || (QEMU_VERSION_MAJOR == 10 && \ - QEMU_VERSION_MINOR >= 1)) { \ - return; \ - } \ + return; \ } \ } while (0) diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 12b2ff1f7d..1e87f7d393 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -1121,20 +1121,8 @@ bool cpu_exec_realizefn(CPUState *cpu, Error **errp); void cpu_exec_unrealizefn(CPUState *cpu); void cpu_exec_reset_hold(CPUState *cpu); -#ifdef COMPILING_PER_TARGET - extern const VMStateDescription vmstate_cpu_common; -#define VMSTATE_CPU() { \ - .name = "parent_obj", \ - .size = sizeof(CPUState), \ - .vmsd = &vmstate_cpu_common, \ - .flags = VMS_STRUCT, \ - .offset = 0, \ -} - -#endif /* COMPILING_PER_TARGET */ - #define UNASSIGNED_CPU_INDEX -1 #define UNASSIGNED_CLUSTER_INDEX -1 diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index d717b4e13d..63a8b65278 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -10,7 +10,8 @@ #ifndef HW_HYPERV_HYPERV_H #define HW_HYPERV_HYPERV_H -#include "cpu-qom.h" +#include "exec/hwaddr.h" +#include "hw/core/cpu.h" #include "hw/hyperv/hyperv-proto.h" typedef struct HvSintRoute HvSintRoute; diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 5527e02722..3d392b0fd8 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -81,7 +81,7 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, void *vaddr, bool readonly); int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); + IOMMUTLBEntry *iotlb, bool unmap_all); bool vfio_container_add_section_window(VFIOContainerBase *bcontainer, MemoryRegionSection *section, Error **errp); @@ -117,12 +117,25 @@ struct VFIOIOMMUClass { /* basic feature */ bool (*setup)(VFIOContainerBase *bcontainer, Error **errp); + void (*listener_begin)(VFIOContainerBase *bcontainer); + void (*listener_commit)(VFIOContainerBase *bcontainer); int (*dma_map)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly); + /** + * @dma_unmap + * + * Unmap an address range from the container. + * + * @bcontainer: #VFIOContainerBase to use for unmap + * @iova: start address to unmap + * @size: size of the range to unmap + * @iotlb: The IOMMU TLB mapping entry (or NULL) + * @unmap_all: if set, unmap the entire address space + */ int (*dma_unmap)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); + IOMMUTLBEntry *iotlb, bool unmap_all); bool (*attach_device)(const char *name, VFIODevice *vbasedev, AddressSpace *as, Error **errp); void (*detach_device)(VFIODevice *vbasedev); diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index 81c95bb51e..8bcb3c19f6 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -41,6 +41,7 @@ enum { }; typedef struct VFIODeviceOps VFIODeviceOps; +typedef struct VFIODeviceIOOps VFIODeviceIOOps; typedef struct VFIOMigration VFIOMigration; typedef struct IOMMUFDBackend IOMMUFDBackend; @@ -66,6 +67,7 @@ typedef struct VFIODevice { OnOffAuto migration_multifd_transfer; bool migration_events; VFIODeviceOps *ops; + VFIODeviceIOOps *io_ops; unsigned int num_irqs; unsigned int num_regions; unsigned int flags; @@ -81,6 +83,7 @@ typedef struct VFIODevice { IOMMUFDBackend *iommufd; VFIOIOASHwpt *hwpt; QLIST_ENTRY(VFIODevice) hwpt_next; + struct vfio_region_info **reginfo; } VFIODevice; struct VFIODeviceOps { @@ -115,6 +118,20 @@ struct VFIODeviceOps { int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); }; +/* + * Given a return value of either a short number of bytes read or -errno, + * construct a meaningful error message. + */ +#define strreaderror(ret) \ + (ret < 0 ? strerror(-ret) : "short read") + +/* + * Given a return value of either a short number of bytes written or -errno, + * construct a meaningful error message. + */ +#define strwriteerror(ret) \ + (ret < 0 ? strerror(-ret) : "short write") + void vfio_device_irq_disable(VFIODevice *vbasedev, int index); void vfio_device_irq_unmask(VFIODevice *vbasedev, int index); void vfio_device_irq_mask(VFIODevice *vbasedev, int index); @@ -127,6 +144,9 @@ bool vfio_device_hiod_create_and_realize(VFIODevice *vbasedev, const char *typename, Error **errp); bool vfio_device_attach(char *name, VFIODevice *vbasedev, AddressSpace *as, Error **errp); +bool vfio_device_attach_by_iommu_type(const char *iommu_type, char *name, + VFIODevice *vbasedev, AddressSpace *as, + Error **errp); void vfio_device_detach(VFIODevice *vbasedev); VFIODevice *vfio_get_vfio_device(Object *obj); @@ -134,11 +154,73 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; extern VFIODeviceList vfio_device_list; #ifdef CONFIG_LINUX +/* + * How devices communicate with the server. The default option is through + * ioctl() to the kernel VFIO driver, but vfio-user can use a socket to a remote + * process. + */ +struct VFIODeviceIOOps { + /** + * @device_feature + * + * Fill in feature info for the given device. + */ + int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *); + + /** + * @get_region_info + * + * Fill in @info with information on the region given by @info->index. + */ + int (*get_region_info)(VFIODevice *vdev, + struct vfio_region_info *info); + + /** + * @get_irq_info + * + * Fill in @irq with information on the IRQ given by @info->index. + */ + int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); + + /** + * @set_irqs + * + * Configure IRQs as defined by @irqs. + */ + int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); + + /** + * @region_read + * + * Read @size bytes from the region @nr at offset @off into the buffer + * @data. + */ + int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, + void *data); + + /** + * @region_write + * + * Write @size bytes to the region @nr at offset @off from the buffer + * @data. + */ + int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, + void *data); +}; + +void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, + struct vfio_device_info *info); + +void vfio_device_unprepare(VFIODevice *vbasedev); + int vfio_device_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info); int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, uint32_t subtype, struct vfio_region_info **info); bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type); + +int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, + struct vfio_irq_info *info); #endif /* Returns 0 on success, or a negative errno. */ diff --git a/include/hw/xen/interface/io/blkif.h b/include/hw/xen/interface/io/blkif.h index 22f1eef0c0..c5527999d1 100644 --- a/include/hw/xen/interface/io/blkif.h +++ b/include/hw/xen/interface/io/blkif.h @@ -324,7 +324,7 @@ * access (even when it should be read-only). If the frontend hits the * maximum number of allowed persistently mapped grants, it can fallback * to non persistent mode. This will cause a performance degradation, - * since the the backend driver will still try to map those grants + * since the backend driver will still try to map those grants * persistently. Since the persistent grants protocol is compatible with * the previous protocol, a frontend driver can choose to work in * persistent mode even when the backend doesn't support it. diff --git a/include/qemu/host-pci-mmio.h b/include/qemu/host-pci-mmio.h new file mode 100644 index 0000000000..a8ed9938ac --- /dev/null +++ b/include/qemu/host-pci-mmio.h @@ -0,0 +1,136 @@ +/* + * API for host PCI MMIO accesses (e.g. Linux VFIO BARs) + * + * Copyright 2025 IBM Corp. + * Author(s): Farhan Ali <alifm@linux.ibm.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HOST_PCI_MMIO_H +#define HOST_PCI_MMIO_H + +#include "qemu/bswap.h" +#include "qemu/s390x_pci_mmio.h" + +static inline uint8_t host_pci_ldub_p(const void *ioaddr) +{ + uint8_t ret = 0; +#ifdef __s390x__ + ret = s390x_pci_mmio_read_8(ioaddr); +#else + ret = ldub_p(ioaddr); +#endif + + return ret; +} + +static inline uint16_t host_pci_lduw_le_p(const void *ioaddr) +{ + uint16_t ret = 0; +#ifdef __s390x__ + ret = le16_to_cpu(s390x_pci_mmio_read_16(ioaddr)); +#else + ret = lduw_le_p(ioaddr); +#endif + + return ret; +} + +static inline uint32_t host_pci_ldl_le_p(const void *ioaddr) +{ + uint32_t ret = 0; +#ifdef __s390x__ + ret = le32_to_cpu(s390x_pci_mmio_read_32(ioaddr)); +#else + ret = ldl_le_p(ioaddr); +#endif + + return ret; +} + +static inline uint64_t host_pci_ldq_le_p(const void *ioaddr) +{ + uint64_t ret = 0; +#ifdef __s390x__ + ret = le64_to_cpu(s390x_pci_mmio_read_64(ioaddr)); +#else + ret = ldq_le_p(ioaddr); +#endif + + return ret; +} + +static inline void host_pci_stb_p(void *ioaddr, uint8_t val) +{ +#ifdef __s390x__ + s390x_pci_mmio_write_8(ioaddr, val); +#else + stb_p(ioaddr, val); +#endif +} + +static inline void host_pci_stw_le_p(void *ioaddr, uint16_t val) +{ +#ifdef __s390x__ + s390x_pci_mmio_write_16(ioaddr, cpu_to_le16(val)); +#else + stw_le_p(ioaddr, val); +#endif +} + +static inline void host_pci_stl_le_p(void *ioaddr, uint32_t val) +{ +#ifdef __s390x__ + s390x_pci_mmio_write_32(ioaddr, cpu_to_le32(val)); +#else + stl_le_p(ioaddr, val); +#endif +} + +static inline void host_pci_stq_le_p(void *ioaddr, uint64_t val) +{ +#ifdef __s390x__ + s390x_pci_mmio_write_64(ioaddr, cpu_to_le64(val)); +#else + stq_le_p(ioaddr, val); +#endif +} + +static inline uint64_t host_pci_ldn_le_p(const void *ioaddr, int sz) +{ + switch (sz) { + case 1: + return host_pci_ldub_p(ioaddr); + case 2: + return host_pci_lduw_le_p(ioaddr); + case 4: + return host_pci_ldl_le_p(ioaddr); + case 8: + return host_pci_ldq_le_p(ioaddr); + default: + g_assert_not_reached(); + } +} + +static inline void host_pci_stn_le_p(void *ioaddr, int sz, uint64_t v) +{ + switch (sz) { + case 1: + host_pci_stb_p(ioaddr, v); + break; + case 2: + host_pci_stw_le_p(ioaddr, v); + break; + case 4: + host_pci_stl_le_p(ioaddr, v); + break; + case 8: + host_pci_stq_le_p(ioaddr, v); + break; + default: + g_assert_not_reached(); + } +} + +#endif diff --git a/include/qemu/s390x_pci_mmio.h b/include/qemu/s390x_pci_mmio.h new file mode 100644 index 0000000000..c5f63ecefa --- /dev/null +++ b/include/qemu/s390x_pci_mmio.h @@ -0,0 +1,24 @@ +/* + * s390x PCI MMIO definitions + * + * Copyright 2025 IBM Corp. + * Author(s): Farhan Ali <alifm@linux.ibm.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef S390X_PCI_MMIO_H +#define S390X_PCI_MMIO_H + +#ifdef __s390x__ +uint8_t s390x_pci_mmio_read_8(const void *ioaddr); +uint16_t s390x_pci_mmio_read_16(const void *ioaddr); +uint32_t s390x_pci_mmio_read_32(const void *ioaddr); +uint64_t s390x_pci_mmio_read_64(const void *ioaddr); + +void s390x_pci_mmio_write_8(void *ioaddr, uint8_t val); +void s390x_pci_mmio_write_16(void *ioaddr, uint16_t val); +void s390x_pci_mmio_write_32(void *ioaddr, uint32_t val); +void s390x_pci_mmio_write_64(void *ioaddr, uint64_t val); +#endif /* __s390x__ */ + +#endif /* S390X_PCI_MMIO_H */ diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h index 09355f54c5..a483d72f42 100644 --- a/include/standard-headers/asm-x86/setup_data.h +++ b/include/standard-headers/asm-x86/setup_data.h @@ -18,7 +18,7 @@ #define SETUP_INDIRECT (1<<31) #define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include "standard-headers/linux/types.h" @@ -78,6 +78,6 @@ struct ima_setup_data { uint64_t size; } QEMU_PACKED; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index 708647776f..a8b759dcbc 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -420,6 +420,7 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_ARM 0x08 #define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09 #define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a +#define DRM_FORMAT_MOD_VENDOR_MTK 0x0b /* add more to the end as needed */ @@ -1452,6 +1453,46 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) */ #define AMLOGIC_FBC_OPTION_MEM_SAVING (1ULL << 0) +/* MediaTek modifiers + * Bits Parameter Notes + * ----- ------------------------ --------------------------------------------- + * 7: 0 TILE LAYOUT Values are MTK_FMT_MOD_TILE_* + * 15: 8 COMPRESSION Values are MTK_FMT_MOD_COMPRESS_* + * 23:16 10 BIT LAYOUT Values are MTK_FMT_MOD_10BIT_LAYOUT_* + * + */ + +#define DRM_FORMAT_MOD_MTK(__flags) fourcc_mod_code(MTK, __flags) + +/* + * MediaTek Tiled Modifier + * The lowest 8 bits of the modifier is used to specify the tiling + * layout. Only the 16L_32S tiling is used for now, but we define an + * "untiled" version and leave room for future expansion. + */ +#define MTK_FMT_MOD_TILE_MASK 0xf +#define MTK_FMT_MOD_TILE_NONE 0x0 +#define MTK_FMT_MOD_TILE_16L32S 0x1 + +/* + * Bits 8-15 specify compression options + */ +#define MTK_FMT_MOD_COMPRESS_MASK (0xf << 8) +#define MTK_FMT_MOD_COMPRESS_NONE (0x0 << 8) +#define MTK_FMT_MOD_COMPRESS_V1 (0x1 << 8) + +/* + * Bits 16-23 specify how the bits of 10 bit formats are + * stored out in memory + */ +#define MTK_FMT_MOD_10BIT_LAYOUT_MASK (0xf << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_PACKED (0x0 << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_LSBTILED (0x1 << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_LSBRASTER (0x2 << 16) + +/* alias for the most common tiling format */ +#define DRM_FORMAT_MOD_MTK_16L_32S_TILE DRM_FORMAT_MOD_MTK(MTK_FMT_MOD_TILE_16L32S) + /* * AMD modifiers * diff --git a/include/standard-headers/linux/const.h b/include/standard-headers/linux/const.h index 2122610de7..95ede23342 100644 --- a/include/standard-headers/linux/const.h +++ b/include/standard-headers/linux/const.h @@ -33,7 +33,7 @@ * Missing __asm__ support * * __BIT128() would not work in the __asm__ code, as it shifts an - * 'unsigned __init128' data type as direct representation of + * 'unsigned __int128' data type as direct representation of * 128 bit constants is not supported in the gcc compiler, as * they get silently truncated. * diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index e83382531c..5d1ad5fdea 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -2059,6 +2059,24 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102, + ETHTOOL_LINK_MODE_200000baseCR_Full_BIT = 103, + ETHTOOL_LINK_MODE_200000baseKR_Full_BIT = 104, + ETHTOOL_LINK_MODE_200000baseDR_Full_BIT = 105, + ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT = 106, + ETHTOOL_LINK_MODE_200000baseSR_Full_BIT = 107, + ETHTOOL_LINK_MODE_200000baseVR_Full_BIT = 108, + ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT = 109, + ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT = 110, + ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT = 111, + ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT = 112, + ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT = 113, + ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT = 114, + ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT = 115, + ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT = 116, + ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT = 117, + ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT = 118, + ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT = 119, + ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT = 120, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS @@ -2271,6 +2289,10 @@ static inline int ethtool_validate_duplex(uint8_t duplex) * be exploited to reduce the RSS queue spread. */ #define RXH_XFRM_SYM_XOR (1 << 0) +/* Similar to SYM_XOR, except that one copy of the XOR'ed fields is replaced by + * an OR of the same fields + */ +#define RXH_XFRM_SYM_OR_XOR (1 << 1) #define RXH_XFRM_NO_CHANGE 0xff /* L2-L4 network traffic flow types */ diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index d303effb2a..a2b5815d89 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -229,6 +229,9 @@ * - FUSE_URING_IN_OUT_HEADER_SZ * - FUSE_URING_OP_IN_OUT_SZ * - enum fuse_uring_cmd + * + * 7.43 + * - add FUSE_REQUEST_TIMEOUT */ #ifndef _LINUX_FUSE_H @@ -260,7 +263,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 42 +#define FUSE_KERNEL_MINOR_VERSION 43 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -431,6 +434,8 @@ struct fuse_file_lock { * of the request ID indicates resend requests * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts * FUSE_OVER_IO_URING: Indicate that client supports io-uring + * FUSE_REQUEST_TIMEOUT: kernel supports timing out requests. + * init_out.request_timeout contains the timeout (in secs) */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -473,11 +478,11 @@ struct fuse_file_lock { #define FUSE_PASSTHROUGH (1ULL << 37) #define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) #define FUSE_HAS_RESEND (1ULL << 39) - /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP #define FUSE_ALLOW_IDMAP (1ULL << 40) #define FUSE_OVER_IO_URING (1ULL << 41) +#define FUSE_REQUEST_TIMEOUT (1ULL << 42) /** * CUSE INIT request/reply flags @@ -905,7 +910,8 @@ struct fuse_init_out { uint16_t map_alignment; uint32_t flags2; uint32_t max_stack_depth; - uint32_t unused[6]; + uint16_t request_timeout; + uint16_t unused[11]; }; #define CUSE_INIT_INFO_MAX 4096 diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index 3445c4970e..ba326710f9 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -486,6 +486,7 @@ #define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ +#define PCI_EXP_FLAGS_FLIT 0x8000 /* Flit Mode Supported */ #define PCI_EXP_DEVCAP 0x04 /* Device capabilities */ #define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */ #define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */ @@ -795,6 +796,8 @@ #define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ #define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ #define PCI_ERR_CAP_PREFIX_LOG_PRESENT 0x00000800 /* TLP Prefix Log Present */ +#define PCI_ERR_CAP_TLP_LOG_FLIT 0x00040000 /* TLP was logged in Flit Mode */ +#define PCI_ERR_CAP_TLP_LOG_SIZE 0x00f80000 /* Logged TLP Size (only in Flit mode) */ #define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */ #define PCI_ERR_ROOT_COMMAND 0x2c /* Root Error Command */ #define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ @@ -1013,7 +1016,7 @@ /* Resizable BARs */ #define PCI_REBAR_CAP 4 /* capability register */ -#define PCI_REBAR_CAP_SIZES 0x00FFFFF0 /* supported BAR sizes */ +#define PCI_REBAR_CAP_SIZES 0xFFFFFFF0 /* supported BAR sizes */ #define PCI_REBAR_CTRL 8 /* control register */ #define PCI_REBAR_CTRL_BAR_IDX 0x00000007 /* BAR index */ #define PCI_REBAR_CTRL_NBAR_MASK 0x000000E0 /* # of resizable BARs */ @@ -1061,8 +1064,9 @@ #define PCI_EXP_DPC_CAP_RP_EXT 0x0020 /* Root Port Extensions */ #define PCI_EXP_DPC_CAP_POISONED_TLP 0x0040 /* Poisoned TLP Egress Blocking Supported */ #define PCI_EXP_DPC_CAP_SW_TRIGGER 0x0080 /* Software Triggering Supported */ -#define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size */ +#define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size [3:0] */ #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ +#define PCI_EXP_DPC_RP_PIO_LOG_SIZE4 0x2000 /* RP PIO Log Size [4] */ #define PCI_EXP_DPC_CTL 0x06 /* DPC control */ #define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ @@ -1205,9 +1209,12 @@ #define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff #define PCI_DOE_DATA_OBJECT_DISC_REQ_3_VER 0x0000ff00 #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff -#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000 +#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE 0x00ff0000 #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000 +/* Deprecated old name, replaced with PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE */ +#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE + /* Compute Express Link (CXL r3.1, sec 8.1.5) */ #define PCI_DVSEC_CXL_PORT 3 #define PCI_DVSEC_CXL_PORT_CTL 0x0c diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h index fc594fe5fc..982e854f14 100644 --- a/include/standard-headers/linux/virtio_net.h +++ b/include/standard-headers/linux/virtio_net.h @@ -327,6 +327,19 @@ struct virtio_net_rss_config { uint8_t hash_key_data[/* hash_key_length */]; }; +struct virtio_net_rss_config_hdr { + uint32_t hash_types; + uint16_t indirection_table_mask; + uint16_t unclassified_queue; + uint16_t indirection_table[/* 1 + indirection_table_mask */]; +}; + +struct virtio_net_rss_config_trailer { + uint16_t max_tx_vq; + uint8_t hash_key_length; + uint8_t hash_key_data[/* hash_key_length */]; +}; + #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 /* diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h index 860f12e0a4..160d57899f 100644 --- a/include/standard-headers/linux/virtio_snd.h +++ b/include/standard-headers/linux/virtio_snd.h @@ -25,7 +25,7 @@ struct virtio_snd_config { uint32_t streams; /* # of available channel maps */ uint32_t chmaps; - /* # of available control elements */ + /* # of available control elements (if VIRTIO_SND_F_CTLS) */ uint32_t controls; }; diff --git a/include/system/hvf.h b/include/system/hvf.h index 730f927f03..7b45a2e198 100644 --- a/include/system/hvf.h +++ b/include/system/hvf.h @@ -14,19 +14,24 @@ #define HVF_H #include "qemu/accel.h" +#include "qemu/queue.h" +#include "exec/vaddr.h" #include "qom/object.h" #ifdef COMPILING_PER_TARGET -#include "cpu.h" +# ifdef CONFIG_HVF +# define CONFIG_HVF_IS_POSSIBLE +# endif /* !CONFIG_HVF */ +#else +# define CONFIG_HVF_IS_POSSIBLE +#endif /* COMPILING_PER_TARGET */ -#ifdef CONFIG_HVF +#ifdef CONFIG_HVF_IS_POSSIBLE extern bool hvf_allowed; #define hvf_enabled() (hvf_allowed) -#else /* !CONFIG_HVF */ +#else /* !CONFIG_HVF_IS_POSSIBLE */ #define hvf_enabled() 0 -#endif /* !CONFIG_HVF */ - -#endif /* COMPILING_PER_TARGET */ +#endif /* !CONFIG_HVF_IS_POSSIBLE */ #define TYPE_HVF_ACCEL ACCEL_CLASS_NAME("hvf") diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h index 42ae18433f..8c8b84012d 100644 --- a/include/system/hvf_int.h +++ b/include/system/hvf_int.h @@ -11,6 +11,8 @@ #ifndef HVF_INT_H #define HVF_INT_H +#include "qemu/queue.h" + #ifdef __aarch64__ #include <Hypervisor/Hypervisor.h> typedef hv_vcpu_t hvf_vcpuid; diff --git a/include/ui/dmabuf.h b/include/ui/dmabuf.h index dc74ba895a..3decdca497 100644 --- a/include/ui/dmabuf.h +++ b/include/ui/dmabuf.h @@ -10,24 +10,29 @@ #ifndef DMABUF_H #define DMABUF_H +#define DMABUF_MAX_PLANES 4 + typedef struct QemuDmaBuf QemuDmaBuf; QemuDmaBuf *qemu_dmabuf_new(uint32_t width, uint32_t height, - uint32_t stride, uint32_t x, - uint32_t y, uint32_t backing_width, - uint32_t backing_height, uint32_t fourcc, - uint64_t modifier, int dmabuf_fd, + const uint32_t *offset, const uint32_t *stride, + uint32_t x, uint32_t y, + uint32_t backing_width, uint32_t backing_height, + uint32_t fourcc, uint64_t modifier, + const int32_t *dmabuf_fd, uint32_t num_planes, bool allow_fences, bool y0_top); void qemu_dmabuf_free(QemuDmaBuf *dmabuf); G_DEFINE_AUTOPTR_CLEANUP_FUNC(QemuDmaBuf, qemu_dmabuf_free); -int qemu_dmabuf_get_fd(QemuDmaBuf *dmabuf); -int qemu_dmabuf_dup_fd(QemuDmaBuf *dmabuf); +const int *qemu_dmabuf_get_fds(QemuDmaBuf *dmabuf, int *nfds); +void qemu_dmabuf_dup_fds(QemuDmaBuf *dmabuf, int *fds, int nfds); void qemu_dmabuf_close(QemuDmaBuf *dmabuf); uint32_t qemu_dmabuf_get_width(QemuDmaBuf *dmabuf); uint32_t qemu_dmabuf_get_height(QemuDmaBuf *dmabuf); -uint32_t qemu_dmabuf_get_stride(QemuDmaBuf *dmabuf); +const uint32_t *qemu_dmabuf_get_offsets(QemuDmaBuf *dmabuf, int *noffsets); +const uint32_t *qemu_dmabuf_get_strides(QemuDmaBuf *dmabuf, int *nstrides); +uint32_t qemu_dmabuf_get_num_planes(QemuDmaBuf *dmabuf); uint32_t qemu_dmabuf_get_fourcc(QemuDmaBuf *dmabuf); uint64_t qemu_dmabuf_get_modifier(QemuDmaBuf *dmabuf); uint32_t qemu_dmabuf_get_texture(QemuDmaBuf *dmabuf); @@ -44,6 +49,5 @@ void qemu_dmabuf_set_texture(QemuDmaBuf *dmabuf, uint32_t texture); void qemu_dmabuf_set_fence_fd(QemuDmaBuf *dmabuf, int32_t fence_fd); void qemu_dmabuf_set_sync(QemuDmaBuf *dmabuf, void *sync); void qemu_dmabuf_set_draw_submitted(QemuDmaBuf *dmabuf, bool draw_submitted); -void qemu_dmabuf_set_fd(QemuDmaBuf *dmabuf, int32_t fd); #endif diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h index 4b8c0d2281..fb80e15142 100644 --- a/include/ui/egl-helpers.h +++ b/include/ui/egl-helpers.h @@ -46,8 +46,9 @@ extern int qemu_egl_rn_fd; extern struct gbm_device *qemu_egl_rn_gbm_dev; int egl_rendernode_init(const char *rendernode, DisplayGLMode mode); -int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc, - EGLuint64KHR *modifier); +bool egl_dmabuf_export_texture(uint32_t tex_id, int *fd, EGLint *offset, + EGLint *stride, EGLint *fourcc, int *num_planes, + EGLuint64KHR *modifier); void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf); void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf); diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index ec1e82bdc8..4e6aff08df 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -105,6 +105,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -365,6 +366,7 @@ enum { KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0, }; +/* Vendor hyper call function numbers 0-63 */ #define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2) enum { @@ -372,6 +374,14 @@ enum { KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1, }; +/* Vendor hyper call function numbers 64-127 */ +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2 KVM_REG_ARM_FW_FEAT_BMAP_REG(3) + +enum { + KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER = 0, + KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS = 1, +}; + /* Device Control API on vm fd */ #define KVM_ARM_VM_SMCCC_CTRL 0 #define KVM_ARM_VM_SMCCC_FILTER 0 @@ -394,6 +404,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) diff --git a/linux-headers/asm-arm64/unistd_64.h b/linux-headers/asm-arm64/unistd_64.h index d4e90fff76..ee9aaebdf3 100644 --- a/linux-headers/asm-arm64/unistd_64.h +++ b/linux-headers/asm-arm64/unistd_64.h @@ -323,6 +323,7 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h index 1ea2c4c33b..ef1c27fa3c 100644 --- a/linux-headers/asm-generic/mman-common.h +++ b/linux-headers/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h index 88dc393c2b..2892a45023 100644 --- a/linux-headers/asm-generic/unistd.h +++ b/linux-headers/asm-generic/unistd.h @@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 /* * 32 bit systems traditionally used different diff --git a/linux-headers/asm-loongarch/unistd_64.h b/linux-headers/asm-loongarch/unistd_64.h index 23fb96a8a7..50d22df8f7 100644 --- a/linux-headers/asm-loongarch/unistd_64.h +++ b/linux-headers/asm-loongarch/unistd_64.h @@ -319,6 +319,7 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h index 9a75719644..bdcc2f460b 100644 --- a/linux-headers/asm-mips/unistd_n32.h +++ b/linux-headers/asm-mips/unistd_n32.h @@ -395,5 +395,6 @@ #define __NR_getxattrat (__NR_Linux + 464) #define __NR_listxattrat (__NR_Linux + 465) #define __NR_removexattrat (__NR_Linux + 466) +#define __NR_open_tree_attr (__NR_Linux + 467) #endif /* _ASM_UNISTD_N32_H */ diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h index 7086783b0c..3b6b0193b6 100644 --- a/linux-headers/asm-mips/unistd_n64.h +++ b/linux-headers/asm-mips/unistd_n64.h @@ -371,5 +371,6 @@ #define __NR_getxattrat (__NR_Linux + 464) #define __NR_listxattrat (__NR_Linux + 465) #define __NR_removexattrat (__NR_Linux + 466) +#define __NR_open_tree_attr (__NR_Linux + 467) #endif /* _ASM_UNISTD_N64_H */ diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h index b3825823e4..4609a4b4d3 100644 --- a/linux-headers/asm-mips/unistd_o32.h +++ b/linux-headers/asm-mips/unistd_o32.h @@ -441,5 +441,6 @@ #define __NR_getxattrat (__NR_Linux + 464) #define __NR_listxattrat (__NR_Linux + 465) #define __NR_removexattrat (__NR_Linux + 466) +#define __NR_open_tree_attr (__NR_Linux + 467) #endif /* _ASM_UNISTD_O32_H */ diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h index 38ee4dc35d..5d38a427e0 100644 --- a/linux-headers/asm-powerpc/unistd_32.h +++ b/linux-headers/asm-powerpc/unistd_32.h @@ -448,6 +448,7 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h index 5e5f156834..860a488e4d 100644 --- a/linux-headers/asm-powerpc/unistd_64.h +++ b/linux-headers/asm-powerpc/unistd_64.h @@ -420,6 +420,7 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h index f06bc5efcd..5f59fd226c 100644 --- a/linux-headers/asm-riscv/kvm.h +++ b/linux-headers/asm-riscv/kvm.h @@ -182,6 +182,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SVVPTC, KVM_RISCV_ISA_EXT_ZABHA, KVM_RISCV_ISA_EXT_ZICCRSE, + KVM_RISCV_ISA_EXT_ZAAMO, + KVM_RISCV_ISA_EXT_ZALRSC, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/linux-headers/asm-riscv/unistd_32.h b/linux-headers/asm-riscv/unistd_32.h index 74f6127aed..a5e769f1d9 100644 --- a/linux-headers/asm-riscv/unistd_32.h +++ b/linux-headers/asm-riscv/unistd_32.h @@ -314,6 +314,7 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-riscv/unistd_64.h b/linux-headers/asm-riscv/unistd_64.h index bb6a15a2ec..8df4d64841 100644 --- a/linux-headers/asm-riscv/unistd_64.h +++ b/linux-headers/asm-riscv/unistd_64.h @@ -324,6 +324,7 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h index 620201cb36..85eedbd18e 100644 --- a/linux-headers/asm-s390/unistd_32.h +++ b/linux-headers/asm-s390/unistd_32.h @@ -439,5 +439,6 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_S390_UNISTD_32_H */ diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h index e7e4a10aaf..c03b1b9701 100644 --- a/linux-headers/asm-s390/unistd_64.h +++ b/linux-headers/asm-s390/unistd_64.h @@ -387,5 +387,6 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_S390_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 86f2c34e7a..dc591fb17e 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -557,6 +557,9 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index a2eb492a75..491d6b4eb6 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -457,6 +457,7 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 2f5fc400f5..7cf88bf9bd 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -380,6 +380,7 @@ #define __NR_getxattrat 464 #define __NR_listxattrat 465 #define __NR_removexattrat 466 +#define __NR_open_tree_attr 467 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index fecd832e7f..82959111e6 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -333,6 +333,7 @@ #define __NR_getxattrat (__X32_SYSCALL_BIT + 464) #define __NR_listxattrat (__X32_SYSCALL_BIT + 465) #define __NR_removexattrat (__X32_SYSCALL_BIT + 466) +#define __NR_open_tree_attr (__X32_SYSCALL_BIT + 467) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h index c0d00c0a98..58596d18f4 100644 --- a/linux-headers/linux/bits.h +++ b/linux-headers/linux/bits.h @@ -4,13 +4,9 @@ #ifndef _LINUX_BITS_H #define _LINUX_BITS_H -#define __GENMASK(h, l) \ - (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ - (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) \ - (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ - (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/linux-headers/linux/const.h b/linux-headers/linux/const.h index 2122610de7..95ede23342 100644 --- a/linux-headers/linux/const.h +++ b/linux-headers/linux/const.h @@ -33,7 +33,7 @@ * Missing __asm__ support * * __BIT128() would not work in the __asm__ code, as it shifts an - * 'unsigned __init128' data type as direct representation of + * 'unsigned __int128' data type as direct representation of * 128 bit constants is not supported in the gcc compiler, as * they get silently truncated. * diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h index ccbdca5e11..cb0f7d6b4d 100644 --- a/linux-headers/linux/iommufd.h +++ b/linux-headers/linux/iommufd.h @@ -55,6 +55,7 @@ enum { IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, + IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, }; /** @@ -392,6 +393,9 @@ struct iommu_vfio_ioas { * Any domain attached to the non-PASID part of the * device must also be flagged, otherwise attaching a * PASID will blocked. + * For the user that wants to attach PASID, ioas is + * not recommended for both the non-PASID part + * and PASID part of the device. * If IOMMU does not support PASID it will return * error (-EOPNOTSUPP). */ @@ -608,9 +612,17 @@ enum iommu_hw_info_type { * IOMMU_HWPT_GET_DIRTY_BITMAP * IOMMU_HWPT_SET_DIRTY_TRACKING * + * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. + * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. */ enum iommufd_hw_capabilities { IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, + IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1, + IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, }; /** @@ -626,6 +638,9 @@ enum iommufd_hw_capabilities { * iommu_hw_info_type. * @out_capabilities: Output the generic iommu capability info type as defined * in the enum iommu_hw_capabilities. + * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support. + * PCI devices turn to out_capabilities to check if the + * specific capabilities is supported or not. * @__reserved: Must be 0 * * Query an iommu type specific hardware information data from an iommu behind @@ -649,7 +664,8 @@ struct iommu_hw_info { __u32 data_len; __aligned_u64 data_uptr; __u32 out_data_type; - __u32 __reserved; + __u8 out_max_pasid_log2; + __u8 __reserved[3]; __aligned_u64 out_capabilities; }; #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) @@ -1014,4 +1030,115 @@ struct iommu_ioas_change_process { #define IOMMU_IOAS_CHANGE_PROCESS \ _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) +/** + * enum iommu_veventq_flag - flag for struct iommufd_vevent_header + * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs + */ +enum iommu_veventq_flag { + IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0), +}; + +/** + * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status + * @flags: Combination of enum iommu_veventq_flag + * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of + * [0, INT_MAX] where the following index of INT_MAX is 0 + * + * Each iommufd_vevent_header reports a sequence index of the following vEVENT: + * + * +----------------------+-------+----------------------+-------+---+-------+ + * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN | + * +----------------------+-------+----------------------+-------+---+-------+ + * + * And this sequence index is expected to be monotonic to the sequence index of + * the previous vEVENT. If two adjacent sequence indexes has a delta larger than + * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs: + * + * +-----+----------------------+-------+----------------------+-------+-----+ + * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... | + * +-----+----------------------+-------+----------------------+-------+-----+ + * + * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT + * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header + * would be added to the tail, and no data would follow this header: + * + * +--+----------------------+-------+-----------------------------------------+ + * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} | + * +--+----------------------+-------+-----------------------------------------+ + */ +struct iommufd_vevent_header { + __u32 flags; + __u32 sequence; +}; + +/** + * enum iommu_veventq_type - Virtual Event Queue Type + * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use + * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue + */ +enum iommu_veventq_type { + IOMMU_VEVENTQ_TYPE_DEFAULT = 0, + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1, +}; + +/** + * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event + * (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3) + * @evt: 256-bit ARM SMMUv3 Event record, little-endian. + * Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec) + * - 0x04 C_BAD_STE + * - 0x06 F_STREAM_DISABLED + * - 0x08 C_BAD_SUBSTREAMID + * - 0x0a C_BAD_CD + * - 0x10 F_TRANSLATION + * - 0x11 F_ADDR_SIZE + * - 0x12 F_ACCESS + * - 0x13 F_PERMISSION + * + * StreamID field reports a virtual device ID. To receive a virtual event for a + * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC. + */ +struct iommu_vevent_arm_smmuv3 { + __aligned_le64 evt[4]; +}; + +/** + * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC) + * @size: sizeof(struct iommu_veventq_alloc) + * @flags: Must be 0 + * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with + * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type + * @veventq_depth: Maximum number of events in the vEVENTQ + * @out_veventq_id: The ID of the new vEVENTQ + * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the + * successfully returned fd after using it + * @__reserved: Must be 0 + * + * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU + * can have multiple FDs for different types, but is confined to one per @type. + * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ, + * if there are vEVENTs available. A vEVENTQ will lose events due to overflow, + * if the number of the vEVENTs hits @veventq_depth. + * + * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by + * a type-specific data structure, in a normal case: + * + * +-+---------+-------+---------+-------+-----+---------+-------+-+ + * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | | + * +-+---------+-------+---------+-------+-----+---------+-------+-+ + * + * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to + * struct iommufd_vevent_header). + */ +struct iommu_veventq_alloc { + __u32 size; + __u32 flags; + __u32 viommu_id; + __u32 type; + __u32 veventq_depth; + __u32 out_veventq_id; + __u32 out_veventq_fd; + __u32 __reserved; +}; +#define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) #endif diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 27181b3dd8..e5f3e8b5a0 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -921,6 +921,7 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h index 17bf191573..113c4ceb78 100644 --- a/linux-headers/linux/psp-sev.h +++ b/linux-headers/linux/psp-sev.h @@ -73,13 +73,20 @@ typedef enum { SEV_RET_INVALID_PARAM, SEV_RET_RESOURCE_LIMIT, SEV_RET_SECURE_DATA_INVALID, - SEV_RET_INVALID_KEY = 0x27, - SEV_RET_INVALID_PAGE_SIZE, - SEV_RET_INVALID_PAGE_STATE, - SEV_RET_INVALID_MDATA_ENTRY, - SEV_RET_INVALID_PAGE_OWNER, - SEV_RET_INVALID_PAGE_AEAD_OFLOW, - SEV_RET_RMP_INIT_REQUIRED, + SEV_RET_INVALID_PAGE_SIZE = 0x0019, + SEV_RET_INVALID_PAGE_STATE = 0x001A, + SEV_RET_INVALID_MDATA_ENTRY = 0x001B, + SEV_RET_INVALID_PAGE_OWNER = 0x001C, + SEV_RET_AEAD_OFLOW = 0x001D, + SEV_RET_EXIT_RING_BUFFER = 0x001F, + SEV_RET_RMP_INIT_REQUIRED = 0x0020, + SEV_RET_BAD_SVN = 0x0021, + SEV_RET_BAD_VERSION = 0x0022, + SEV_RET_SHUTDOWN_REQUIRED = 0x0023, + SEV_RET_UPDATE_FAILED = 0x0024, + SEV_RET_RESTORE_REQUIRED = 0x0025, + SEV_RET_RMP_INITIALIZATION_FAILED = 0x0026, + SEV_RET_INVALID_KEY = 0x0027, SEV_RET_MAX, } sev_ret_code; diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h index e1416f7937..e1fcfcf3b3 100644 --- a/linux-headers/linux/stddef.h +++ b/linux-headers/linux/stddef.h @@ -70,4 +70,6 @@ #define __counted_by_be(m) #endif +#define __kernel_nonstring + #endif /* _LINUX_STDDEF_H */ diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 1b5e254d6a..79bf8c0cc5 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -671,6 +671,7 @@ enum { */ enum { VFIO_AP_REQ_IRQ_INDEX, + VFIO_AP_CFG_CHG_IRQ_INDEX, VFIO_AP_NUM_IRQS }; @@ -931,29 +932,34 @@ struct vfio_device_bind_iommufd { * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19, * struct vfio_device_attach_iommufd_pt) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Flags for attach. * @pt_id: Input the target id which can represent an ioas or a hwpt * allocated via iommufd subsystem. * Output the input ioas id or the attached hwpt id which could * be the specified hwpt itself or a hwpt automatically created * for the specified ioas by kernel during the attachment. + * @pasid: The pasid to be attached, only meaningful when + * VFIO_DEVICE_ATTACH_PASID is set in @flags * * Associate the device with an address space within the bound iommufd. * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only * allowed on cdev fds. * - * If a vfio device is currently attached to a valid hw_pagetable, without doing - * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl - * passing in another hw_pagetable (hwpt) id is allowed. This action, also known - * as a hw_pagetable replacement, will replace the device's currently attached - * hw_pagetable with a new hw_pagetable corresponding to the given pt_id. + * If a vfio device or a pasid of this device is currently attached to a valid + * hw_pagetable (hwpt), without doing a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second + * VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl passing in another hwpt id is allowed. + * This action, also known as a hw_pagetable replacement, will replace the + * currently attached hwpt of the device or the pasid of this device with a new + * hwpt corresponding to the given pt_id. * * Return: 0 on success, -errno on failure. */ struct vfio_device_attach_iommufd_pt { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_ATTACH_PASID (1 << 0) __u32 pt_id; + __u32 pasid; }; #define VFIO_DEVICE_ATTACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 19) @@ -962,17 +968,21 @@ struct vfio_device_attach_iommufd_pt { * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20, * struct vfio_device_detach_iommufd_pt) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Flags for detach. + * @pasid: The pasid to be detached, only meaningful when + * VFIO_DEVICE_DETACH_PASID is set in @flags * - * Remove the association of the device and its current associated address - * space. After it, the device should be in a blocking DMA state. This is only - * allowed on cdev fds. + * Remove the association of the device or a pasid of the device and its current + * associated address space. After it, the device or the pasid should be in a + * blocking DMA state. This is only allowed on cdev fds. * * Return: 0 on success, -errno on failure. */ struct vfio_device_detach_iommufd_pt { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_DETACH_PASID (1 << 0) + __u32 pasid; }; #define VFIO_DEVICE_DETACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 20) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index f88a80c31e..002e1e668e 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -645,7 +645,7 @@ static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len, * * However, this case is rather common with executable images, * so the workaround is important for even trivial tests, whereas - * the mmap of of a file being extended is less common. + * the mmap of a file being extended is less common. */ static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot, int mmap_flags, int page_flags, int fd, diff --git a/meson.build b/meson.build index 27f1150152..e819a7084c 100644 --- a/meson.build +++ b/meson.build @@ -3191,6 +3191,11 @@ if host_os == 'windows' }''', name: '_lock_file and _unlock_file')) endif +if spice.found() + config_host_data.set('HAVE_SPICE_QXL_GL_SCANOUT2', + cc.has_function('spice_qxl_gl_scanout2', dependencies: spice)) +endif + if host_os == 'windows' mingw_has_setjmp_longjmp = cc.links(''' #include <setjmp.h> diff --git a/qapi/machine-target.json b/qapi/machine-target.json index 541f93eeb7..426ce4ee82 100644 --- a/qapi/machine-target.json +++ b/qapi/machine-target.json @@ -350,7 +350,7 @@ # migration-safe in the future (since 4.1) # # @deprecated: If true, this CPU model is deprecated and may be -# removed in in some future version of QEMU according to the QEMU +# removed in some future version of QEMU according to the QEMU # deprecation policy. (since 5.2) # # @unavailable-features is a list of QOM property names that represent diff --git a/qapi/qom.json b/qapi/qom.json index 28ce24cd8d..04c118e4d6 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -871,7 +871,7 @@ # link characteristics read from PCIe Configuration space. # To get the full path latency from CPU to CXL attached DRAM # CXL device: Add the latency from CPU to Generic Port (from -# HMAT indexed via the the node ID in this SRAT structure) to +# HMAT indexed via the node ID in this SRAT structure) to # that for CXL bus links, the latency across intermediate switches # and from the EP port to the actual memory. Bandwidth is more # complex as there may be interleaving across multiple devices diff --git a/qom/object.c b/qom/object.c index 664f0f24ae..7b013f40a0 100644 --- a/qom/object.c +++ b/qom/object.c @@ -485,7 +485,7 @@ bool object_apply_global_props(Object *obj, const GPtrArray *props, * Slot 0: accelerator's global property defaults * Slot 1: machine's global property defaults * Slot 2: global properties from legacy command line option - * Each is a GPtrArray of of GlobalProperty. + * Each is a GPtrArray of GlobalProperty. * Applied in order, later entries override earlier ones. */ static GPtrArray *object_compat_props[3]; diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c index e866547618..293755f409 100644 --- a/qom/qom-qmp-cmds.c +++ b/qom/qom-qmp-cmds.c @@ -28,15 +28,11 @@ #include "qom/object_interfaces.h" #include "qom/qom-qobject.h" -ObjectPropertyInfoList *qmp_qom_list(const char *path, Error **errp) +static Object *qom_resolve_path(const char *path, Error **errp) { - Object *obj; bool ambiguous = false; - ObjectPropertyInfoList *props = NULL; - ObjectProperty *prop; - ObjectPropertyIterator iter; + Object *obj = object_resolve_path(path, &ambiguous); - obj = object_resolve_path(path, &ambiguous); if (obj == NULL) { if (ambiguous) { error_setg(errp, "Path '%s' is ambiguous", path); @@ -44,6 +40,19 @@ ObjectPropertyInfoList *qmp_qom_list(const char *path, Error **errp) error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, "Device '%s' not found", path); } + } + return obj; +} + +ObjectPropertyInfoList *qmp_qom_list(const char *path, Error **errp) +{ + Object *obj; + ObjectPropertyInfoList *props = NULL; + ObjectProperty *prop; + ObjectPropertyIterator iter; + + obj = qom_resolve_path(path, errp); + if (obj == NULL) { return NULL; } diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 8913e4fb99..b43b8ef75a 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -177,7 +177,7 @@ EOF # Remove everything except the macros from bootparam.h avoiding the # unnecessary import of several video/ist/etc headers - sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ + sed -e '/__ASSEMBLER__/,/__ASSEMBLER__/d' \ "$hdrdir/include/asm/bootparam.h" > "$hdrdir/bootparam.h" cp_portable "$hdrdir/bootparam.h" \ "$output/include/standard-headers/asm-$arch" diff --git a/target/alpha/machine.c b/target/alpha/machine.c index f09834f635..5f302b166d 100644 --- a/target/alpha/machine.c +++ b/target/alpha/machine.c @@ -74,7 +74,7 @@ static const VMStateDescription vmstate_env = { }; static const VMStateField vmstate_cpu_fields[] = { - VMSTATE_CPU(), + VMSTATE_STRUCT(parent_obj, AlphaCPU, 0, vmstate_cpu_common, CPUState), VMSTATE_STRUCT(env, AlphaCPU, 1, vmstate_env, CPUAlphaState), VMSTATE_END_OF_LIST() }; diff --git a/target/hppa/machine.c b/target/hppa/machine.c index bb47a2e689..13e555151a 100644 --- a/target/hppa/machine.c +++ b/target/hppa/machine.c @@ -216,7 +216,7 @@ static const VMStateDescription vmstate_env = { }; static const VMStateField vmstate_cpu_fields[] = { - VMSTATE_CPU(), + VMSTATE_STRUCT(parent_obj, HPPACPU, 0, vmstate_cpu_common, CPUState), VMSTATE_STRUCT(env, HPPACPU, 1, vmstate_env, CPUHPPAState), VMSTATE_END_OF_LIST() }; diff --git a/target/microblaze/machine.c b/target/microblaze/machine.c index 51705e4f5c..a4cf38dc89 100644 --- a/target/microblaze/machine.c +++ b/target/microblaze/machine.c @@ -93,7 +93,7 @@ static const VMStateDescription vmstate_env = { }; static const VMStateField vmstate_cpu_fields[] = { - VMSTATE_CPU(), + VMSTATE_STRUCT(parent_obj, MicroBlazeCPU, 0, vmstate_cpu_common, CPUState), VMSTATE_STRUCT(env, MicroBlazeCPU, 1, vmstate_env, CPUMBState), VMSTATE_END_OF_LIST() }; diff --git a/target/openrisc/machine.c b/target/openrisc/machine.c index 3574e571cb..081c706d02 100644 --- a/target/openrisc/machine.c +++ b/target/openrisc/machine.c @@ -136,7 +136,7 @@ const VMStateDescription vmstate_openrisc_cpu = { .minimum_version_id = 1, .post_load = cpu_post_load, .fields = (const VMStateField[]) { - VMSTATE_CPU(), + VMSTATE_STRUCT(parent_obj, OpenRISCCPU, 0, vmstate_cpu_common, CPUState), VMSTATE_STRUCT(env, OpenRISCCPU, 1, vmstate_env, CPUOpenRISCState), VMSTATE_END_OF_LIST() } diff --git a/tests/qtest/q35-test.c b/tests/qtest/q35-test.c index 75d4078b79..62fff49fc8 100644 --- a/tests/qtest/q35-test.c +++ b/tests/qtest/q35-test.c @@ -246,41 +246,6 @@ static void test_smram_smbase_lock(void) qtest_quit(qts); } -static void test_without_smram_base(void) -{ - QPCIBus *pcibus; - QPCIDevice *pcidev; - QTestState *qts; - int i; - - qts = qtest_init("-M pc-q35-4.1"); - - pcibus = qpci_new_pc(qts, NULL); - g_assert(pcibus != NULL); - - pcidev = qpci_device_find(pcibus, 0); - g_assert(pcidev != NULL); - - /* check that RAM is accessible */ - qtest_writeb(qts, SMBASE, SMRAM_TEST_PATTERN); - g_assert_cmpint(qtest_readb(qts, SMBASE), ==, SMRAM_TEST_PATTERN); - - /* check that writing to 0x9c succeeds */ - for (i = 0; i <= 0xff; i++) { - qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_F_SMBASE, i); - g_assert(qpci_config_readb(pcidev, MCH_HOST_BRIDGE_F_SMBASE) == i); - } - - /* check that RAM is still accessible */ - qtest_writeb(qts, SMBASE, SMRAM_TEST_PATTERN + 1); - g_assert_cmpint(qtest_readb(qts, SMBASE), ==, (SMRAM_TEST_PATTERN + 1)); - - g_free(pcidev); - qpci_free_pc(pcibus); - - qtest_quit(qts); -} - int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); @@ -293,6 +258,6 @@ int main(int argc, char **argv) qtest_add_data_func("/q35/tseg-size/ext/16mb", &tseg_ext_16mb, test_tseg_size); qtest_add_func("/q35/smram/smbase_lock", test_smram_smbase_lock); - qtest_add_func("/q35/smram/legacy_smbase", test_without_smram_base); + return g_test_run(); } diff --git a/ui/dbus-display1.xml b/ui/dbus-display1.xml index 72deefa455..4a41a7e0f3 100644 --- a/ui/dbus-display1.xml +++ b/ui/dbus-display1.xml @@ -615,6 +615,51 @@ </interface> <!-- + org.qemu.Display1.Listener.Unix.ScanoutDMABUF2: + + This optional client-side interface can complement + org.qemu.Display1.Listener on ``/org/qemu/Display1/Listener`` for + Unix-specific DMABUF scanout setup which support multi plane. + --> + <?if $(env.HOST_OS) != windows?> + <interface name="org.qemu.Display1.Listener.Unix.ScanoutDMABUF2"> + <!-- + ScanoutDMABUF2: + @dmabuf: DMABUF file descriptor of each plane. + @x: display x offset, in pixels + @y: display y offset, in pixels + @width: display width, in pixels. + @height: display height, in pixels. + @offset: offset of each plane, in bytes. + @stride: stride of each plane, in bytes. + @num_planes: plane number. + @fourcc: DMABUF fourcc. + @backing_width: backing framebuffer width, in pixels + @backing_height: backing framebuffer height, in pixels + @modifier: DMABUF modifier. + @y0_top: whether Y position 0 is the top or not. + + Resize and update the display content with DMABUF. + --> + <method name="ScanoutDMABUF2"> + <arg type="ah" name="dmabuf" direction="in"/> + <arg type="u" name="x" direction="in"/> + <arg type="u" name="y" direction="in"/> + <arg type="u" name="width" direction="in"/> + <arg type="u" name="height" direction="in"/> + <arg type="au" name="offset" direction="in"/> + <arg type="au" name="stride" direction="in"/> + <arg type="u" name="num_planes" direction="in"/> + <arg type="u" name="fourcc" direction="in"/> + <arg type="u" name="backing_width" direction="in"/> + <arg type="u" name="backing_height" direction="in"/> + <arg type="t" name="modifier" direction="in"/> + <arg type="b" name="y0_top" direction="in"/> + </method> + </interface> + <?endif?> + + <!-- org.qemu.Display1.Clipboard: This interface must be implemented by both the client and the server on diff --git a/ui/dbus-listener.c b/ui/dbus-listener.c index 51244c9240..42875b8eed 100644 --- a/ui/dbus-listener.c +++ b/ui/dbus-listener.c @@ -85,6 +85,7 @@ struct _DBusDisplayListener { #endif #else /* !WIN32 */ QemuDBusDisplay1ListenerUnixMap *map_proxy; + QemuDBusDisplay1ListenerUnixScanoutDMABUF2 *scanout_dmabuf_v2_proxy; #endif guint dbus_filter; @@ -288,10 +289,9 @@ static void dbus_call_update_gl(DisplayChangeListener *dcl, } #ifdef CONFIG_GBM -static void dbus_scanout_dmabuf(DisplayChangeListener *dcl, - QemuDmaBuf *dmabuf) +static void dbus_scanout_dmabuf_v1(DBusDisplayListener *ddl, + QemuDmaBuf *dmabuf) { - DBusDisplayListener *ddl = container_of(dcl, DBusDisplayListener, dcl); g_autoptr(GError) err = NULL; g_autoptr(GUnixFDList) fd_list = NULL; int fd; @@ -299,7 +299,7 @@ static void dbus_scanout_dmabuf(DisplayChangeListener *dcl, uint64_t modifier; bool y0_top; - fd = qemu_dmabuf_get_fd(dmabuf); + fd = qemu_dmabuf_get_fds(dmabuf, NULL)[0]; fd_list = g_unix_fd_list_new(); if (g_unix_fd_list_append(fd_list, fd, &err) != 0) { error_report("Failed to setup dmabuf fdlist: %s", err->message); @@ -310,7 +310,7 @@ static void dbus_scanout_dmabuf(DisplayChangeListener *dcl, width = qemu_dmabuf_get_width(dmabuf); height = qemu_dmabuf_get_height(dmabuf); - stride = qemu_dmabuf_get_stride(dmabuf); + stride = qemu_dmabuf_get_strides(dmabuf, NULL)[0]; fourcc = qemu_dmabuf_get_fourcc(dmabuf); modifier = qemu_dmabuf_get_modifier(dmabuf); y0_top = qemu_dmabuf_get_y0_top(dmabuf); @@ -322,6 +322,87 @@ static void dbus_scanout_dmabuf(DisplayChangeListener *dcl, y0_top, G_DBUS_CALL_FLAGS_NONE, -1, fd_list, NULL, NULL, NULL); } + +static void dbus_scanout_dmabuf_v2(DBusDisplayListener *ddl, + QemuDmaBuf *dmabuf) +{ + g_autoptr(GError) err = NULL; + g_autoptr(GUnixFDList) fd_list = NULL; + int i, fd_index[DMABUF_MAX_PLANES], num_fds; + uint32_t x, y, width, height, fourcc, backing_width, backing_height; + GVariant *fd, *offset, *stride, *fd_handles[DMABUF_MAX_PLANES]; + uint64_t modifier; + bool y0_top; + int nfds, noffsets, nstrides; + const int *fds = qemu_dmabuf_get_fds(dmabuf, &nfds); + const uint32_t *offsets = qemu_dmabuf_get_offsets(dmabuf, &noffsets); + const uint32_t *strides = qemu_dmabuf_get_strides(dmabuf, &nstrides); + uint32_t num_planes = qemu_dmabuf_get_num_planes(dmabuf); + + assert(nfds >= num_planes); + assert(noffsets >= num_planes); + assert(nstrides >= num_planes); + + fd_list = g_unix_fd_list_new(); + + for (num_fds = 0; num_fds < num_planes; num_fds++) { + int plane_fd = fds[num_fds]; + + if (plane_fd < 0) { + break; + } + + fd_index[num_fds] = g_unix_fd_list_append(fd_list, plane_fd, &err); + if (fd_index[num_fds] < 0) { + error_report("Failed to setup dmabuf fdlist: %s", err->message); + return; + } + } + + ddl_discard_display_messages(ddl); + + x = qemu_dmabuf_get_x(dmabuf); + y = qemu_dmabuf_get_y(dmabuf); + width = qemu_dmabuf_get_width(dmabuf); + height = qemu_dmabuf_get_height(dmabuf); + fourcc = qemu_dmabuf_get_fourcc(dmabuf); + backing_width = qemu_dmabuf_get_backing_width(dmabuf); + backing_height = qemu_dmabuf_get_backing_height(dmabuf); + modifier = qemu_dmabuf_get_modifier(dmabuf); + y0_top = qemu_dmabuf_get_y0_top(dmabuf); + + offset = g_variant_new_fixed_array(G_VARIANT_TYPE_UINT32, + offsets, num_planes, sizeof(uint32_t)); + stride = g_variant_new_fixed_array(G_VARIANT_TYPE_UINT32, + strides, num_planes, sizeof(uint32_t)); + + for (i = 0; i < num_fds; i++) { + fd_handles[i] = g_variant_new_handle(fd_index[i]); + } + fd = g_variant_new_array(G_VARIANT_TYPE_HANDLE, fd_handles, num_fds); + + qemu_dbus_display1_listener_unix_scanout_dmabuf2_call_scanout_dmabuf2( + ddl->scanout_dmabuf_v2_proxy, fd, x, y, width, height, offset, stride, + num_planes, fourcc, backing_width, backing_height, modifier, y0_top, + G_DBUS_CALL_FLAGS_NONE, -1, fd_list, NULL, NULL, NULL); +} + +static void dbus_scanout_dmabuf(DisplayChangeListener *dcl, + QemuDmaBuf *dmabuf) +{ + DBusDisplayListener *ddl = container_of(dcl, DBusDisplayListener, dcl); + + if (ddl->scanout_dmabuf_v2_proxy) { + dbus_scanout_dmabuf_v2(ddl, dmabuf); + } else { + if (qemu_dmabuf_get_num_planes(dmabuf) > 1) { + g_debug("org.qemu.Display1.Listener.ScanoutDMABUF " + "does not support mutli plane"); + return; + } + dbus_scanout_dmabuf_v1(ddl, dmabuf); + } +} #endif /* GBM */ #endif /* OPENGL */ @@ -504,19 +585,18 @@ static void dbus_scanout_texture(DisplayChangeListener *dcl, backing_width, backing_height, x, y, w, h); #ifdef CONFIG_GBM g_autoptr(QemuDmaBuf) dmabuf = NULL; - int fd; - uint32_t stride, fourcc; + int fd[DMABUF_MAX_PLANES], num_planes; + uint32_t offset[DMABUF_MAX_PLANES], stride[DMABUF_MAX_PLANES], fourcc; uint64_t modifier; assert(tex_id); - fd = egl_get_fd_for_texture(tex_id, (EGLint *)&stride, (EGLint *)&fourcc, - &modifier); - if (fd < 0) { - error_report("%s: failed to get fd for texture", __func__); + if (!egl_dmabuf_export_texture(tex_id, fd, (EGLint *)offset, (EGLint *)stride, + (EGLint *)&fourcc, &num_planes, &modifier)) { + error_report("%s: failed to export dmabuf for texture", __func__); return; } - dmabuf = qemu_dmabuf_new(w, h, stride, x, y, backing_width, - backing_height, fourcc, modifier, fd, + dmabuf = qemu_dmabuf_new(w, h, offset, stride, x, y, backing_width, + backing_height, fourcc, modifier, fd, num_planes, false, backing_y_0_top); dbus_scanout_dmabuf(dcl, dmabuf); @@ -883,6 +963,8 @@ dbus_display_listener_dispose(GObject *object) #ifdef CONFIG_OPENGL egl_fb_destroy(&ddl->fb); #endif +#else /* !WIN32 */ + g_clear_object(&ddl->scanout_dmabuf_v2_proxy); #endif G_OBJECT_CLASS(dbus_display_listener_parent_class)->dispose(object); @@ -1071,6 +1153,26 @@ dbus_display_listener_setup_shared_map(DBusDisplayListener *ddl) #endif } +static void dbus_display_listener_setup_scanout_dmabuf_v2(DBusDisplayListener *ddl) +{ +#ifndef WIN32 + g_autoptr(GError) err = NULL; + + if (!dbus_display_listener_implements( + ddl, "org.qemu.Display1.Listener.Unix.ScanoutDMABUF2")) { + return; + } + ddl->scanout_dmabuf_v2_proxy = + qemu_dbus_display1_listener_unix_scanout_dmabuf2_proxy_new_sync( + ddl->conn, G_DBUS_PROXY_FLAGS_DO_NOT_AUTO_START, NULL, + "/org/qemu/Display1/Listener", NULL, &err); + if (!ddl->scanout_dmabuf_v2_proxy) { + g_debug("Failed to setup Unix scanout dmabuf v2 proxy: %s", err->message); + return; + } +#endif +} + static GDBusMessage * dbus_filter(GDBusConnection *connection, GDBusMessage *message, @@ -1159,6 +1261,7 @@ dbus_display_listener_new(const char *bus_name, dbus_display_listener_setup_shared_map(ddl); trace_dbus_can_share_map(ddl->can_share_map); dbus_display_listener_setup_d3d11(ddl); + dbus_display_listener_setup_scanout_dmabuf_v2(ddl); con = qemu_console_lookup_by_index(dbus_display_console_get_index(console)); assert(con); diff --git a/ui/dmabuf.c b/ui/dmabuf.c index df7a09703f..7433a268f0 100644 --- a/ui/dmabuf.c +++ b/ui/dmabuf.c @@ -11,10 +11,12 @@ #include "ui/dmabuf.h" struct QemuDmaBuf { - int fd; + int fd[DMABUF_MAX_PLANES]; uint32_t width; uint32_t height; - uint32_t stride; + uint32_t offset[DMABUF_MAX_PLANES]; + uint32_t stride[DMABUF_MAX_PLANES]; + uint32_t num_planes; uint32_t fourcc; uint64_t modifier; uint32_t texture; @@ -30,28 +32,33 @@ struct QemuDmaBuf { }; QemuDmaBuf *qemu_dmabuf_new(uint32_t width, uint32_t height, - uint32_t stride, uint32_t x, - uint32_t y, uint32_t backing_width, - uint32_t backing_height, uint32_t fourcc, - uint64_t modifier, int32_t dmabuf_fd, + const uint32_t *offset, const uint32_t *stride, + uint32_t x, uint32_t y, + uint32_t backing_width, uint32_t backing_height, + uint32_t fourcc, uint64_t modifier, + const int32_t *dmabuf_fd, uint32_t num_planes, bool allow_fences, bool y0_top) { QemuDmaBuf *dmabuf; + assert(num_planes > 0 && num_planes <= DMABUF_MAX_PLANES); + dmabuf = g_new0(QemuDmaBuf, 1); dmabuf->width = width; dmabuf->height = height; - dmabuf->stride = stride; + memcpy(dmabuf->offset, offset, num_planes * sizeof(*offset)); + memcpy(dmabuf->stride, stride, num_planes * sizeof(*stride)); dmabuf->x = x; dmabuf->y = y; dmabuf->backing_width = backing_width; dmabuf->backing_height = backing_height; dmabuf->fourcc = fourcc; dmabuf->modifier = modifier; - dmabuf->fd = dmabuf_fd; + memcpy(dmabuf->fd, dmabuf_fd, num_planes * sizeof(*dmabuf_fd)); dmabuf->allow_fences = allow_fences; dmabuf->y0_top = y0_top; dmabuf->fence_fd = -1; + dmabuf->num_planes = num_planes; return dmabuf; } @@ -65,31 +72,40 @@ void qemu_dmabuf_free(QemuDmaBuf *dmabuf) g_free(dmabuf); } -int qemu_dmabuf_get_fd(QemuDmaBuf *dmabuf) +const int *qemu_dmabuf_get_fds(QemuDmaBuf *dmabuf, int *nfds) { assert(dmabuf != NULL); + if (nfds) { + *nfds = ARRAY_SIZE(dmabuf->fd); + } + return dmabuf->fd; } -int qemu_dmabuf_dup_fd(QemuDmaBuf *dmabuf) +void qemu_dmabuf_dup_fds(QemuDmaBuf *dmabuf, int *fds, int nfds) { + int i; + assert(dmabuf != NULL); + assert(nfds >= dmabuf->num_planes); - if (dmabuf->fd >= 0) { - return dup(dmabuf->fd); - } else { - return -1; + for (i = 0; i < dmabuf->num_planes; i++) { + fds[i] = dmabuf->fd[i] >= 0 ? dup(dmabuf->fd[i]) : -1; } } void qemu_dmabuf_close(QemuDmaBuf *dmabuf) { + int i; + assert(dmabuf != NULL); - if (dmabuf->fd >= 0) { - close(dmabuf->fd); - dmabuf->fd = -1; + for (i = 0; i < dmabuf->num_planes; i++) { + if (dmabuf->fd[i] >= 0) { + close(dmabuf->fd[i]); + dmabuf->fd[i] = -1; + } } } @@ -107,13 +123,35 @@ uint32_t qemu_dmabuf_get_height(QemuDmaBuf *dmabuf) return dmabuf->height; } -uint32_t qemu_dmabuf_get_stride(QemuDmaBuf *dmabuf) +const uint32_t *qemu_dmabuf_get_offsets(QemuDmaBuf *dmabuf, int *noffsets) +{ + assert(dmabuf != NULL); + + if (noffsets) { + *noffsets = ARRAY_SIZE(dmabuf->offset); + } + + return dmabuf->offset; +} + +const uint32_t *qemu_dmabuf_get_strides(QemuDmaBuf *dmabuf, int *nstrides) { assert(dmabuf != NULL); + if (nstrides) { + *nstrides = ARRAY_SIZE(dmabuf->stride); + } + return dmabuf->stride; } +uint32_t qemu_dmabuf_get_num_planes(QemuDmaBuf *dmabuf) +{ + assert(dmabuf != NULL); + + return dmabuf->num_planes; +} + uint32_t qemu_dmabuf_get_fourcc(QemuDmaBuf *dmabuf) { assert(dmabuf != NULL); @@ -221,9 +259,3 @@ void qemu_dmabuf_set_draw_submitted(QemuDmaBuf *dmabuf, bool draw_submitted) assert(dmabuf != NULL); dmabuf->draw_submitted = draw_submitted; } - -void qemu_dmabuf_set_fd(QemuDmaBuf *dmabuf, int32_t fd) -{ - assert(dmabuf != NULL); - dmabuf->fd = fd; -} diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c index d591159480..9cda2bbbee 100644 --- a/ui/egl-helpers.c +++ b/ui/egl-helpers.c @@ -23,6 +23,7 @@ #include "system/system.h" #include "qapi/error.h" #include "trace.h" +#include "standard-headers/drm/drm_fourcc.h" EGLDisplay *qemu_egl_display; EGLConfig qemu_egl_config; @@ -257,6 +258,11 @@ int egl_rendernode_init(const char *rendernode, DisplayGLMode mode) error_report("egl: EGL_MESA_image_dma_buf_export not supported"); goto err; } + if (!epoxy_has_egl_extension(qemu_egl_display, + "EGL_EXT_image_dma_buf_import_modifiers")) { + error_report("egl: EGL_EXT_image_dma_buf_import_modifiers not supported"); + goto err; + } qemu_egl_rn_ctx = qemu_egl_init_ctx(); if (!qemu_egl_rn_ctx) { @@ -277,44 +283,86 @@ err: return -1; } -int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc, - EGLuint64KHR *modifier) +bool egl_dmabuf_export_texture(uint32_t tex_id, int *fd, EGLint *offset, + EGLint *stride, EGLint *fourcc, int *num_planes, + EGLuint64KHR *modifier) { EGLImageKHR image; - EGLint num_planes, fd; + EGLuint64KHR modifiers[DMABUF_MAX_PLANES]; image = eglCreateImageKHR(qemu_egl_display, eglGetCurrentContext(), EGL_GL_TEXTURE_2D_KHR, (EGLClientBuffer)(unsigned long)tex_id, NULL); if (!image) { - return -1; + return false; } eglExportDMABUFImageQueryMESA(qemu_egl_display, image, fourcc, - &num_planes, modifier); - if (num_planes != 1) { - eglDestroyImageKHR(qemu_egl_display, image); - return -1; - } - eglExportDMABUFImageMESA(qemu_egl_display, image, &fd, stride, NULL); + num_planes, modifiers); + eglExportDMABUFImageMESA(qemu_egl_display, image, fd, stride, offset); eglDestroyImageKHR(qemu_egl_display, image); - return fd; + /* Only first modifier matters. */ + if (modifier) { + *modifier = modifiers[0]; + } + + return true; } void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf) { EGLImageKHR image = EGL_NO_IMAGE_KHR; EGLint attrs[64]; - int i = 0; - uint64_t modifier; + int i = 0, j; + uint64_t modifier = qemu_dmabuf_get_modifier(dmabuf); uint32_t texture = qemu_dmabuf_get_texture(dmabuf); + int nfds, noffsets, nstrides; + const int *fds = qemu_dmabuf_get_fds(dmabuf, &nfds); + const uint32_t *offsets = qemu_dmabuf_get_offsets(dmabuf, &noffsets); + const uint32_t *strides = qemu_dmabuf_get_strides(dmabuf, &nstrides); + uint32_t num_planes = qemu_dmabuf_get_num_planes(dmabuf); + + EGLint fd_attrs[] = { + EGL_DMA_BUF_PLANE0_FD_EXT, + EGL_DMA_BUF_PLANE1_FD_EXT, + EGL_DMA_BUF_PLANE2_FD_EXT, + EGL_DMA_BUF_PLANE3_FD_EXT, + }; + EGLint offset_attrs[] = { + EGL_DMA_BUF_PLANE0_OFFSET_EXT, + EGL_DMA_BUF_PLANE1_OFFSET_EXT, + EGL_DMA_BUF_PLANE2_OFFSET_EXT, + EGL_DMA_BUF_PLANE3_OFFSET_EXT, + }; + EGLint stride_attrs[] = { + EGL_DMA_BUF_PLANE0_PITCH_EXT, + EGL_DMA_BUF_PLANE1_PITCH_EXT, + EGL_DMA_BUF_PLANE2_PITCH_EXT, + EGL_DMA_BUF_PLANE3_PITCH_EXT, + }; + EGLint modifier_lo_attrs[] = { + EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, + EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT, + EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT, + EGL_DMA_BUF_PLANE3_MODIFIER_LO_EXT, + }; + EGLint modifier_hi_attrs[] = { + EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, + EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT, + EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT, + EGL_DMA_BUF_PLANE3_MODIFIER_HI_EXT, + }; if (texture != 0) { return; } + assert(nfds >= num_planes); + assert(noffsets >= num_planes); + assert(nstrides >= num_planes); + attrs[i++] = EGL_WIDTH; attrs[i++] = qemu_dmabuf_get_backing_width(dmabuf); attrs[i++] = EGL_HEIGHT; @@ -322,21 +370,22 @@ void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf) attrs[i++] = EGL_LINUX_DRM_FOURCC_EXT; attrs[i++] = qemu_dmabuf_get_fourcc(dmabuf); - attrs[i++] = EGL_DMA_BUF_PLANE0_FD_EXT; - attrs[i++] = qemu_dmabuf_get_fd(dmabuf); - attrs[i++] = EGL_DMA_BUF_PLANE0_PITCH_EXT; - attrs[i++] = qemu_dmabuf_get_stride(dmabuf); - attrs[i++] = EGL_DMA_BUF_PLANE0_OFFSET_EXT; - attrs[i++] = 0; -#ifdef EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT - modifier = qemu_dmabuf_get_modifier(dmabuf); - if (modifier) { - attrs[i++] = EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT; - attrs[i++] = (modifier >> 0) & 0xffffffff; - attrs[i++] = EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT; - attrs[i++] = (modifier >> 32) & 0xffffffff; + for (j = 0; j < num_planes; j++) { + attrs[i++] = fd_attrs[j]; + /* fd[1-3] may be -1 if using a joint buffer for all planes */ + attrs[i++] = fds[j] >= 0 ? fds[j] : fds[0]; + attrs[i++] = stride_attrs[j]; + attrs[i++] = strides[j]; + attrs[i++] = offset_attrs[j]; + attrs[i++] = offsets[j]; + if (modifier != DRM_FORMAT_MOD_INVALID) { + attrs[i++] = modifier_lo_attrs[j]; + attrs[i++] = (modifier >> 0) & 0xffffffff; + attrs[i++] = modifier_hi_attrs[j]; + attrs[i++] = (modifier >> 32) & 0xffffffff; + } } -#endif + attrs[i++] = EGL_NONE; image = eglCreateImageKHR(qemu_egl_display, diff --git a/ui/spice-display.c b/ui/spice-display.c index c794ae0649..9c39d2c5c8 100644 --- a/ui/spice-display.c +++ b/ui/spice-display.c @@ -28,6 +28,8 @@ #include "ui/spice-display.h" +#include "standard-headers/drm/drm_fourcc.h" + bool spice_opengl; int qemu_spice_rect_is_empty(const QXLRect* r) @@ -872,23 +874,48 @@ static void spice_gl_update(DisplayChangeListener *dcl, ssd->gl_updates++; } +static void spice_server_gl_scanout(QXLInstance *qxl, + const int *fd, + uint32_t width, uint32_t height, + const uint32_t *offset, + const uint32_t *stride, + uint32_t num_planes, uint32_t format, + uint64_t modifier, int y_0_top) +{ +#ifdef HAVE_SPICE_QXL_GL_SCANOUT2 + spice_qxl_gl_scanout2(qxl, fd, width, height, offset, stride, + num_planes, format, modifier, y_0_top); +#else + if (num_planes <= 1) { + spice_qxl_gl_scanout(qxl, fd[0], width, height, stride[0], format, y_0_top); + } else { + error_report("SPICE server does not support multi plane GL scanout"); + } +#endif +} + static void spice_gl_switch(DisplayChangeListener *dcl, struct DisplaySurface *new_surface) { SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl); - EGLint stride, fourcc; - int fd; if (ssd->ds) { surface_gl_destroy_texture(ssd->gls, ssd->ds); } ssd->ds = new_surface; if (ssd->ds) { + uint32_t offset[DMABUF_MAX_PLANES], stride[DMABUF_MAX_PLANES]; + int fd[DMABUF_MAX_PLANES], num_planes, fourcc; + uint64_t modifier; + surface_gl_create_texture(ssd->gls, ssd->ds); - fd = egl_get_fd_for_texture(ssd->ds->texture, - &stride, &fourcc, - NULL); - if (fd < 0) { + if (!egl_dmabuf_export_texture(ssd->ds->texture, + fd, + (EGLint *)offset, + (EGLint *)stride, + &fourcc, + &num_planes, + &modifier)) { surface_gl_destroy_texture(ssd->gls, ssd->ds); return; } @@ -899,10 +926,11 @@ static void spice_gl_switch(DisplayChangeListener *dcl, fourcc); /* note: spice server will close the fd */ - spice_qxl_gl_scanout(&ssd->qxl, fd, - surface_width(ssd->ds), - surface_height(ssd->ds), - stride, fourcc, false); + spice_server_gl_scanout(&ssd->qxl, fd, + surface_width(ssd->ds), + surface_height(ssd->ds), + offset, stride, num_planes, + fourcc, modifier, false); ssd->have_surface = true; ssd->have_scanout = false; @@ -925,7 +953,8 @@ static void qemu_spice_gl_scanout_disable(DisplayChangeListener *dcl) SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl); trace_qemu_spice_gl_scanout_disable(ssd->qxl.id); - spice_qxl_gl_scanout(&ssd->qxl, -1, 0, 0, 0, 0, false); + spice_server_gl_scanout(&ssd->qxl, NULL, 0, 0, NULL, NULL, 0, DRM_FORMAT_INVALID, + DRM_FORMAT_MOD_INVALID, false); qemu_spice_gl_monitor_config(ssd, 0, 0, 0, 0); ssd->have_surface = false; ssd->have_scanout = false; @@ -941,20 +970,23 @@ static void qemu_spice_gl_scanout_texture(DisplayChangeListener *dcl, void *d3d_tex2d) { SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl); - EGLint stride = 0, fourcc = 0; - int fd = -1; + EGLint offset[DMABUF_MAX_PLANES], stride[DMABUF_MAX_PLANES], fourcc = 0; + int fd[DMABUF_MAX_PLANES], num_planes; + uint64_t modifier; assert(tex_id); - fd = egl_get_fd_for_texture(tex_id, &stride, &fourcc, NULL); - if (fd < 0) { - fprintf(stderr, "%s: failed to get fd for texture\n", __func__); + if (!egl_dmabuf_export_texture(tex_id, fd, offset, stride, &fourcc, + &num_planes, &modifier)) { + fprintf(stderr, "%s: failed to export dmabuf for texture\n", __func__); return; } + trace_qemu_spice_gl_scanout_texture(ssd->qxl.id, w, h, fourcc); /* note: spice server will close the fd */ - spice_qxl_gl_scanout(&ssd->qxl, fd, backing_width, backing_height, - stride, fourcc, y_0_top); + spice_server_gl_scanout(&ssd->qxl, fd, backing_width, backing_height, + (uint32_t *)offset, (uint32_t *)stride, num_planes, + fourcc, modifier, y_0_top); qemu_spice_gl_monitor_config(ssd, x, y, w, h); ssd->have_surface = false; ssd->have_scanout = true; @@ -1025,11 +1057,10 @@ static void qemu_spice_gl_update(DisplayChangeListener *dcl, uint32_t x, uint32_t y, uint32_t w, uint32_t h) { SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl); - EGLint stride = 0, fourcc = 0; + EGLint fourcc = 0; bool render_cursor = false; bool y_0_top = false; /* FIXME */ uint64_t cookie; - int fd; uint32_t width, height, texture; if (!ssd->have_scanout) { @@ -1064,26 +1095,47 @@ static void qemu_spice_gl_update(DisplayChangeListener *dcl, /* dest framebuffer */ if (ssd->blit_fb.width != width || ssd->blit_fb.height != height) { + int fds[DMABUF_MAX_PLANES], num_planes; + uint32_t offsets[DMABUF_MAX_PLANES], strides[DMABUF_MAX_PLANES]; + uint64_t modifier; + trace_qemu_spice_gl_render_dmabuf(ssd->qxl.id, width, height); egl_fb_destroy(&ssd->blit_fb); egl_fb_setup_new_tex(&ssd->blit_fb, width, height); - fd = egl_get_fd_for_texture(ssd->blit_fb.texture, - &stride, &fourcc, NULL); - spice_qxl_gl_scanout(&ssd->qxl, fd, width, height, - stride, fourcc, false); + if (!egl_dmabuf_export_texture(ssd->blit_fb.texture, fds, + (EGLint *)offsets, (EGLint *)strides, + &fourcc, &num_planes, &modifier)) { + fprintf(stderr, + "%s: failed to export dmabuf for texture\n", __func__); + return; + } + + spice_server_gl_scanout(&ssd->qxl, fds, width, height, offsets, strides, + num_planes, fourcc, modifier, false); } } else { - stride = qemu_dmabuf_get_stride(dmabuf); + int fds[DMABUF_MAX_PLANES]; + int noffsets, nstrides; + const uint32_t *offsets = qemu_dmabuf_get_offsets(dmabuf, &noffsets); + const uint32_t *strides = qemu_dmabuf_get_strides(dmabuf, &nstrides); + uint32_t num_planes = qemu_dmabuf_get_num_planes(dmabuf); + + assert(noffsets >= num_planes); + assert(nstrides >= num_planes); + fourcc = qemu_dmabuf_get_fourcc(dmabuf); y_0_top = qemu_dmabuf_get_y0_top(dmabuf); - fd = qemu_dmabuf_dup_fd(dmabuf); + qemu_dmabuf_dup_fds(dmabuf, fds, DMABUF_MAX_PLANES); trace_qemu_spice_gl_forward_dmabuf(ssd->qxl.id, width, height); /* note: spice server will close the fd, so hand over a dup */ - spice_qxl_gl_scanout(&ssd->qxl, fd, width, height, - stride, fourcc, y_0_top); + spice_server_gl_scanout(&ssd->qxl, fds, width, height, + offsets, strides, num_planes, + fourcc, + qemu_dmabuf_get_modifier(dmabuf), + y_0_top); } qemu_spice_gl_monitor_config(ssd, 0, 0, width, height); ssd->guest_dmabuf_refresh = false; diff --git a/util/meson.build b/util/meson.build index e5cd327e27..1adff96ebd 100644 --- a/util/meson.build +++ b/util/meson.build @@ -133,4 +133,6 @@ elif cpu in ['ppc', 'ppc64'] util_ss.add(files('cpuinfo-ppc.c')) elif cpu in ['riscv32', 'riscv64'] util_ss.add(files('cpuinfo-riscv.c')) +elif cpu == 's390x' + util_ss.add(files('s390x_pci_mmio.c')) endif diff --git a/util/s390x_pci_mmio.c b/util/s390x_pci_mmio.c new file mode 100644 index 0000000000..5ab24fa474 --- /dev/null +++ b/util/s390x_pci_mmio.c @@ -0,0 +1,146 @@ +/* + * s390x PCI MMIO definitions + * + * Copyright 2025 IBM Corp. + * Author(s): Farhan Ali <alifm@linux.ibm.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include <sys/syscall.h> +#include "qemu/s390x_pci_mmio.h" +#include "elf.h" + +union register_pair { + unsigned __int128 pair; + struct { + uint64_t even; + uint64_t odd; + }; +}; + +static bool is_mio_supported; + +static __attribute__((constructor)) void check_is_mio_supported(void) +{ + is_mio_supported = !!(qemu_getauxval(AT_HWCAP) & HWCAP_S390_PCI_MIO); +} + +static uint64_t s390x_pcilgi(const void *ioaddr, size_t len) +{ + union register_pair ioaddr_len = { .even = (uint64_t)ioaddr, + .odd = len }; + uint64_t val; + int cc; + + asm volatile( + /* pcilgi */ + ".insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" + "ipm %[cc]\n" + "srl %[cc],28\n" + : [cc] "=d"(cc), [val] "=d"(val), + [ioaddr_len] "+d"(ioaddr_len.pair) :: "cc"); + + if (cc) { + val = -1ULL; + } + + return val; +} + +static void s390x_pcistgi(void *ioaddr, uint64_t val, size_t len) +{ + union register_pair ioaddr_len = {.even = (uint64_t)ioaddr, .odd = len}; + + asm volatile ( + /* pcistgi */ + ".insn rre,0xb9d40000,%[val],%[ioaddr_len]\n" + : [ioaddr_len] "+d" (ioaddr_len.pair) + : [val] "d" (val) + : "cc", "memory"); +} + +uint8_t s390x_pci_mmio_read_8(const void *ioaddr) +{ + uint8_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint16_t s390x_pci_mmio_read_16(const void *ioaddr) +{ + uint16_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint32_t s390x_pci_mmio_read_32(const void *ioaddr) +{ + uint32_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint64_t s390x_pci_mmio_read_64(const void *ioaddr) +{ + uint64_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +void s390x_pci_mmio_write_8(void *ioaddr, uint8_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_16(void *ioaddr, uint16_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_32(void *ioaddr, uint32_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_64(void *ioaddr, uint64_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} |