diff options
86 files changed, 3690 insertions, 340 deletions
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index 0aa70213fb..0aea7ab84c 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -515,8 +515,6 @@ build-oss-fuzz: echo Testing ${fuzzer} ... ; "${fuzzer}" -runs=1 -seed=1 || exit 1 ; done - # Unrelated to fuzzer: run some tests with -fsanitize=address - - cd build-oss-fuzz && make check-qtest-i386 check-unit build-tci: extends: .native_build_job_template diff --git a/.gitlab-ci.d/edk2/Dockerfile b/.gitlab-ci.d/edk2/Dockerfile index 13029310f6..bbe50ff832 100644 --- a/.gitlab-ci.d/edk2/Dockerfile +++ b/.gitlab-ci.d/edk2/Dockerfile @@ -1,7 +1,7 @@ # # Docker image to cross-compile EDK2 firmware binaries # -FROM ubuntu:16.04 +FROM ubuntu:18.04 MAINTAINER Philippe Mathieu-Daudé <f4bug@amsat.org> @@ -20,7 +20,7 @@ RUN apt update \ iasl \ make \ nasm \ - python \ + python3 \ uuid-dev \ && \ \ diff --git a/MAINTAINERS b/MAINTAINERS index f2e9ce1da2..b976a942dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3159,7 +3159,7 @@ F: docs/interop/firmware.json EDK2 Firmware M: Philippe Mathieu-Daudé <f4bug@amsat.org> -R: Gerd Hoffmann <kraxel@redhat.com> +M: Gerd Hoffmann <kraxel@redhat.com> S: Supported F: hw/i386/*ovmf* F: pc-bios/descriptors/??-edk2-*.json diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 0e66ebb497..27864dfaea 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1961,10 +1961,11 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) return kvm_set_irq(s, route->kroute.gsi, 1); } -int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; int virq; + KVMState *s = c->s; MSIMessage msg = {0, 0}; if (pci_available && dev) { @@ -2004,7 +2005,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) kvm_add_routing_entry(s, &kroute); kvm_arch_add_msi_route_post(&kroute, vector, dev); - kvm_irqchip_commit_routes(s); + c->changes++; return virq; } @@ -2162,7 +2163,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) abort(); } -int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) { return -ENOSYS; } diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 7e0fb884b9..3345882d85 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -77,7 +77,7 @@ int kvm_on_sigbus(int code, void *addr) return 1; } -int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) { return -ENOSYS; } diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c index b6452f1478..3aff42a69e 100644 --- a/hw/intc/xive2.c +++ b/hw/intc/xive2.c @@ -1000,6 +1000,7 @@ static void xive2_end_source_class_init(ObjectClass *klass, void *data) dc->desc = "XIVE END Source"; device_class_set_props(dc, xive2_end_source_properties); dc->realize = xive2_end_source_realize; + dc->user_creatable = false; } static const TypeInfo xive2_end_source_info = { diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 299837e5c1..2307f4a513 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -424,16 +424,19 @@ static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, Error **errp) { PCIDevice *pdev = PCI_DEVICE(s); + KVMRouteChange c; int ret; IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); assert(!s->msi_vectors[vector].pdev); - ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); + c = kvm_irqchip_begin_route_changes(kvm_state); + ret = kvm_irqchip_add_msi_route(&c, vector, pdev); if (ret < 0) { error_setg(errp, "kvm_irqchip_add_msi_route failed"); return; } + kvm_irqchip_commit_route_changes(&c); s->msi_vectors[vector].virq = ret; s->msi_vectors[vector].pdev = pdev; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index b02a0632df..2087516253 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1870,6 +1870,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, err: for (j = 0; j < i; j++) { + virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); g_free(elems[j]); } diff --git a/hw/nvram/eeprom_at24c.c b/hw/nvram/eeprom_at24c.c index da435500ba..01a3093600 100644 --- a/hw/nvram/eeprom_at24c.c +++ b/hw/nvram/eeprom_at24c.c @@ -58,9 +58,10 @@ int at24c_eeprom_event(I2CSlave *s, enum i2c_event event) switch (event) { case I2C_START_SEND: - case I2C_START_RECV: case I2C_FINISH: ee->haveaddr = 0; + /* fallthrough */ + case I2C_START_RECV: DPRINTK("clear\n"); if (ee->blk && ee->changed) { int len = blk_pwrite(ee->blk, 0, ee->mem, ee->rsize, 0); @@ -84,6 +85,10 @@ uint8_t at24c_eeprom_recv(I2CSlave *s) EEPROMState *ee = AT24C_EE(s); uint8_t ret; + if (ee->haveaddr == 1) { + return 0xff; + } + ret = ee->mem[ee->cur]; ee->cur = (ee->cur + 1u) % ee->rsize; diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index aafd46b635..ac801ac835 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -994,30 +994,6 @@ static void pnv_phb3_realize(DeviceState *dev, Error **errp) PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); int i; - /* User created devices */ - if (!phb->chip) { - Error *local_err = NULL; - BusState *s; - - phb->chip = pnv_get_chip(pnv, phb->chip_id); - if (!phb->chip) { - error_setg(errp, "invalid chip id: %d", phb->chip_id); - return; - } - - /* - * Reparent user created devices to the chip to build - * correctly the device tree. - */ - pnv_chip_parent_fixup(phb->chip, OBJECT(phb), phb->phb_id); - - s = qdev_get_parent_bus(DEVICE(phb->chip)); - if (!qdev_set_parent_bus(DEVICE(phb), s, &local_err)) { - error_propagate(errp, local_err); - return; - } - } - if (phb->phb_id >= PNV_CHIP_GET_CLASS(phb->chip)->num_phbs) { error_setg(errp, "invalid PHB index: %d", phb->phb_id); return; @@ -1077,10 +1053,7 @@ static void pnv_phb3_realize(DeviceState *dev, Error **errp) pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb); - if (defaults_enabled()) { - pnv_phb_attach_root_port(PCI_HOST_BRIDGE(phb), - TYPE_PNV_PHB3_ROOT_PORT); - } + pnv_phb_attach_root_port(PCI_HOST_BRIDGE(phb), TYPE_PNV_PHB3_ROOT_PORT); } void pnv_phb3_update_regions(PnvPHB3 *phb) @@ -1131,7 +1104,7 @@ static void pnv_phb3_class_init(ObjectClass *klass, void *data) dc->realize = pnv_phb3_realize; device_class_set_props(dc, pnv_phb3_properties); set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - dc->user_creatable = true; + dc->user_creatable = false; } static const TypeInfo pnv_phb3_type_info = { @@ -1201,7 +1174,7 @@ static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data) device_class_set_parent_realize(dc, pnv_phb3_root_port_realize, &rpc->parent_realize); - dc->user_creatable = true; + dc->user_creatable = false; k->vendor_id = PCI_VENDOR_ID_IBM; k->device_id = 0x03dc; diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index b5b384e9ee..b301762093 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -1545,70 +1545,14 @@ static void pnv_phb4_instance_init(Object *obj) object_initialize_child(obj, "source", &phb->xsrc, TYPE_XIVE_SOURCE); } -static PnvPhb4PecState *pnv_phb4_get_pec(PnvChip *chip, PnvPHB4 *phb, - Error **errp) -{ - Pnv9Chip *chip9 = PNV9_CHIP(chip); - int chip_id = phb->chip_id; - int index = phb->phb_id; - int i, j; - - for (i = 0; i < chip->num_pecs; i++) { - /* - * For each PEC, check the amount of phbs it supports - * and see if the given phb4 index matches an index. - */ - PnvPhb4PecState *pec = &chip9->pecs[i]; - - for (j = 0; j < pec->num_phbs; j++) { - if (index == pnv_phb4_pec_get_phb_id(pec, j)) { - return pec; - } - } - } - - error_setg(errp, - "pnv-phb4 chip-id %d index %d didn't match any existing PEC", - chip_id, index); - - return NULL; -} - static void pnv_phb4_realize(DeviceState *dev, Error **errp) { PnvPHB4 *phb = PNV_PHB4(dev); - PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); - PnvChip *chip = pnv_get_chip(pnv, phb->chip_id); PCIHostState *pci = PCI_HOST_BRIDGE(dev); XiveSource *xsrc = &phb->xsrc; - BusState *s; - Error *local_err = NULL; int nr_irqs; char name[32]; - if (!chip) { - error_setg(errp, "invalid chip id: %d", phb->chip_id); - return; - } - - /* User created PHBs need to be assigned to a PEC */ - if (!phb->pec) { - phb->pec = pnv_phb4_get_pec(chip, phb, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } - - /* Reparent the PHB to the chip to build the device tree */ - pnv_chip_parent_fixup(chip, OBJECT(phb), phb->phb_id); - - s = qdev_get_parent_bus(DEVICE(chip)); - if (!qdev_set_parent_bus(DEVICE(phb), s, &local_err)) { - error_propagate(errp, local_err); - return; - } - /* Set the "big_phb" flag */ phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3; @@ -1766,7 +1710,7 @@ static void pnv_phb4_class_init(ObjectClass *klass, void *data) dc->realize = pnv_phb4_realize; device_class_set_props(dc, pnv_phb4_properties); set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - dc->user_creatable = true; + dc->user_creatable = false; xfc->notify = pnv_phb4_xive_notify; } @@ -1783,6 +1727,12 @@ static const TypeInfo pnv_phb4_type_info = { } }; +static const TypeInfo pnv_phb5_type_info = { + .name = TYPE_PNV_PHB5, + .parent = TYPE_PNV_PHB4, + .instance_size = sizeof(PnvPHB4), +}; + static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data) { BusClass *k = BUS_CLASS(klass); @@ -1858,7 +1808,7 @@ static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data) PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass); dc->desc = "IBM PHB4 PCIE Root Port"; - dc->user_creatable = true; + dc->user_creatable = false; device_class_set_parent_realize(dc, pnv_phb4_root_port_realize, &rpc->parent_realize); @@ -1888,7 +1838,7 @@ static void pnv_phb5_root_port_class_init(ObjectClass *klass, void *data) PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); dc->desc = "IBM PHB5 PCIE Root Port"; - dc->user_creatable = true; + dc->user_creatable = false; k->vendor_id = PCI_VENDOR_ID_IBM; k->device_id = PNV_PHB5_DEVICE_ID; @@ -1907,6 +1857,7 @@ static void pnv_phb4_register_types(void) type_register_static(&pnv_phb5_root_port_info); type_register_static(&pnv_phb4_root_port_info); type_register_static(&pnv_phb4_type_info); + type_register_static(&pnv_phb5_type_info); type_register_static(&pnv_phb4_iommu_memory_region_info); } diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c index 0ab36e9c8f..6f1121a948 100644 --- a/hw/pci-host/pnv_phb4_pec.c +++ b/hw/pci-host/pnv_phb4_pec.c @@ -116,9 +116,11 @@ static void pnv_pec_default_phb_realize(PnvPhb4PecState *pec, int stack_no, Error **errp) { - PnvPHB4 *phb = PNV_PHB4(qdev_new(TYPE_PNV_PHB4)); + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec); + PnvPHB4 *phb = PNV_PHB4(qdev_new(pecc->phb_type)); int phb_id = pnv_phb4_pec_get_phb_id(pec, stack_no); + object_property_add_child(OBJECT(pec), "phb[*]", OBJECT(phb)); object_property_set_link(OBJECT(phb), "pec", OBJECT(pec), &error_abort); object_property_set_int(OBJECT(phb), "chip-id", pec->chip_id, @@ -131,9 +133,7 @@ static void pnv_pec_default_phb_realize(PnvPhb4PecState *pec, } /* Add a single Root port if running with defaults */ - pnv_phb_attach_root_port(PCI_HOST_BRIDGE(phb), - PNV_PHB4_PEC_GET_CLASS(pec)->rp_model); - + pnv_phb_attach_root_port(PCI_HOST_BRIDGE(phb), pecc->rp_model); } static void pnv_pec_realize(DeviceState *dev, Error **errp) @@ -151,10 +151,8 @@ static void pnv_pec_realize(DeviceState *dev, Error **errp) pec->num_phbs = pecc->num_phbs[pec->index]; /* Create PHBs if running with defaults */ - if (defaults_enabled()) { - for (i = 0; i < pec->num_phbs; i++) { - pnv_pec_default_phb_realize(pec, i, errp); - } + for (i = 0; i < pec->num_phbs; i++) { + pnv_pec_default_phb_realize(pec, i, errp); } /* Initialize the XSCOM regions for the PEC registers */ @@ -265,6 +263,7 @@ static void pnv_pec_class_init(ObjectClass *klass, void *data) pecc->stk_compat = stk_compat; pecc->stk_compat_size = sizeof(stk_compat); pecc->version = PNV_PHB4_VERSION; + pecc->phb_type = TYPE_PNV_PHB4; pecc->num_phbs = pnv_pec_num_phbs; pecc->rp_model = TYPE_PNV_PHB4_ROOT_PORT; } @@ -317,6 +316,7 @@ static void pnv_phb5_pec_class_init(ObjectClass *klass, void *data) pecc->stk_compat = stk_compat; pecc->stk_compat_size = sizeof(stk_compat); pecc->version = PNV_PHB5_VERSION; + pecc->phb_type = TYPE_PNV_PHB5; pecc->num_phbs = pnv_phb5_pec_num_stacks; pecc->rp_model = TYPE_PNV_PHB5_ROOT_PORT; } diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 0ac86e104f..00f57c9678 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -1141,9 +1141,7 @@ static void pnv_chip_power8_instance_init(Object *obj) object_initialize_child(obj, "homer", &chip8->homer, TYPE_PNV8_HOMER); - if (defaults_enabled()) { - chip8->num_phbs = pcc->num_phbs; - } + chip8->num_phbs = pcc->num_phbs; for (i = 0; i < chip8->num_phbs; i++) { object_initialize_child(obj, "phb[*]", &chip8->phbs[i], TYPE_PNV_PHB3); @@ -1600,9 +1598,7 @@ static void pnv_chip_power10_instance_init(Object *obj) object_initialize_child(obj, "occ", &chip10->occ, TYPE_PNV10_OCC); object_initialize_child(obj, "homer", &chip10->homer, TYPE_PNV10_HOMER); - if (defaults_enabled()) { - chip->num_pecs = pcc->num_pecs; - } + chip->num_pecs = pcc->num_pecs; for (i = 0; i < chip->num_pecs; i++) { object_initialize_child(obj, "pec[*]", &chip10->pecs[i], @@ -1976,23 +1972,6 @@ static ICSState *pnv_ics_get(XICSFabric *xi, int irq) return NULL; } -void pnv_chip_parent_fixup(PnvChip *chip, Object *obj, int index) -{ - Object *parent = OBJECT(chip); - g_autofree char *default_id = - g_strdup_printf("%s[%d]", object_get_typename(obj), index); - - if (obj->parent == parent) { - return; - } - - object_ref(obj); - object_unparent(obj); - object_property_add_child( - parent, DEVICE(obj)->id ? DEVICE(obj)->id : default_id, obj); - object_unref(obj); -} - PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id) { int i; @@ -2132,8 +2111,6 @@ static void pnv_machine_power8_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); - - machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB3); } static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) @@ -2152,8 +2129,6 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) pmc->compat = compat; pmc->compat_size = sizeof(compat); pmc->dt_power_mgt = pnv_dt_power_mgt; - - machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB4); } static void pnv_machine_power10_class_init(ObjectClass *oc, void *data) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 7b45353ce2..d07a4e99b1 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -412,6 +412,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, int vector_n, bool msix) { + KVMRouteChange c; int virq; if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { @@ -422,11 +423,13 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, return; } - virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev); + c = kvm_irqchip_begin_route_changes(kvm_state); + virq = kvm_irqchip_add_msi_route(&c, vector_n, &vdev->pdev); if (virq < 0) { event_notifier_cleanup(&vector->kvm_interrupt); return; } + kvm_irqchip_commit_route_changes(&c); if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, NULL, virq) < 0) { diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 521f7d64a8..6047670804 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -11,7 +11,7 @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) virtio_ss = ss.source_set() virtio_ss.add(files('virtio.c')) -virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c')) +virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c')) virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c')) virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c new file mode 100644 index 0000000000..55fed1fefb --- /dev/null +++ b/hw/virtio/vhost-iova-tree.c @@ -0,0 +1,110 @@ +/* + * vhost software live migration iova tree + * + * SPDX-FileCopyrightText: Red Hat, Inc. 2021 + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/iova-tree.h" +#include "vhost-iova-tree.h" + +#define iova_min_addr qemu_real_host_page_size + +/** + * VhostIOVATree, able to: + * - Translate iova address + * - Reverse translate iova address (from translated to iova) + * - Allocate IOVA regions for translated range (linear operation) + */ +struct VhostIOVATree { + /* First addressable iova address in the device */ + uint64_t iova_first; + + /* Last addressable iova address in the device */ + uint64_t iova_last; + + /* IOVA address to qemu memory maps. */ + IOVATree *iova_taddr_map; +}; + +/** + * Create a new IOVA tree + * + * Returns the new IOVA tree + */ +VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last) +{ + VhostIOVATree *tree = g_new(VhostIOVATree, 1); + + /* Some devices do not like 0 addresses */ + tree->iova_first = MAX(iova_first, iova_min_addr); + tree->iova_last = iova_last; + + tree->iova_taddr_map = iova_tree_new(); + return tree; +} + +/** + * Delete an iova tree + */ +void vhost_iova_tree_delete(VhostIOVATree *iova_tree) +{ + iova_tree_destroy(iova_tree->iova_taddr_map); + g_free(iova_tree); +} + +/** + * Find the IOVA address stored from a memory address + * + * @tree: The iova tree + * @map: The map with the memory address + * + * Return the stored mapping, or NULL if not found. + */ +const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree, + const DMAMap *map) +{ + return iova_tree_find_iova(tree->iova_taddr_map, map); +} + +/** + * Allocate a new mapping + * + * @tree: The iova tree + * @map: The iova map + * + * Returns: + * - IOVA_OK if the map fits in the container + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow) + * - IOVA_ERR_NOMEM if tree cannot allocate more space. + * + * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK. + */ +int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map) +{ + /* Some vhost devices do not like addr 0. Skip first page */ + hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size; + + if (map->translated_addr + map->size < map->translated_addr || + map->perm == IOMMU_NONE) { + return IOVA_ERR_INVALID; + } + + /* Allocate a node in IOVA address */ + return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first, + tree->iova_last); +} + +/** + * Remove existing mappings from iova tree + * + * @iova_tree: The vhost iova tree + * @map: The map to remove + */ +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map) +{ + iova_tree_remove(iova_tree->iova_taddr_map, map); +} diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h new file mode 100644 index 0000000000..6a4f24e0f9 --- /dev/null +++ b/hw/virtio/vhost-iova-tree.h @@ -0,0 +1,27 @@ +/* + * vhost software live migration iova tree + * + * SPDX-FileCopyrightText: Red Hat, Inc. 2021 + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H +#define HW_VIRTIO_VHOST_IOVA_TREE_H + +#include "qemu/iova-tree.h" +#include "exec/memory.h" + +typedef struct VhostIOVATree VhostIOVATree; + +VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last); +void vhost_iova_tree_delete(VhostIOVATree *iova_tree); +G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete); + +const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree, + const DMAMap *map); +int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); +void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map); + +#endif diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c new file mode 100644 index 0000000000..b232803d1b --- /dev/null +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -0,0 +1,636 @@ +/* + * vhost shadow virtqueue + * + * SPDX-FileCopyrightText: Red Hat, Inc. 2021 + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/virtio/vhost-shadow-virtqueue.h" + +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qemu/main-loop.h" +#include "qemu/log.h" +#include "qemu/memalign.h" +#include "linux-headers/linux/vhost.h" + +/** + * Validate the transport device features that both guests can use with the SVQ + * and SVQs can use with the device. + * + * @dev_features: The features + * @errp: Error pointer + */ +bool vhost_svq_valid_features(uint64_t features, Error **errp) +{ + bool ok = true; + uint64_t svq_features = features; + + for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END; + ++b) { + switch (b) { + case VIRTIO_F_ANY_LAYOUT: + continue; + + case VIRTIO_F_ACCESS_PLATFORM: + /* SVQ trust in the host's IOMMU to translate addresses */ + case VIRTIO_F_VERSION_1: + /* SVQ trust that the guest vring is little endian */ + if (!(svq_features & BIT_ULL(b))) { + svq_features |= BIT_ULL(b); + ok = false; + } + continue; + + default: + if (svq_features & BIT_ULL(b)) { + svq_features &= ~BIT_ULL(b); + ok = false; + } + } + } + + if (!ok) { + error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64 + ", ok: 0x%"PRIx64, features, svq_features); + } + return ok; +} + +/** + * Number of descriptors that the SVQ can make available from the guest. + * + * @svq: The svq + */ +static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) +{ + return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); +} + +/** + * Translate addresses between the qemu's virtual address and the SVQ IOVA + * + * @svq: Shadow VirtQueue + * @vaddr: Translated IOVA addresses + * @iovec: Source qemu's VA addresses + * @num: Length of iovec and minimum length of vaddr + */ +static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, + hwaddr *addrs, const struct iovec *iovec, + size_t num) +{ + if (num == 0) { + return true; + } + + for (size_t i = 0; i < num; ++i) { + DMAMap needle = { + .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, + .size = iovec[i].iov_len, + }; + Int128 needle_last, map_last; + size_t off; + + const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); + /* + * Map cannot be NULL since iova map contains all guest space and + * qemu already has a physical address mapped + */ + if (unlikely(!map)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Invalid address 0x%"HWADDR_PRIx" given by guest", + needle.translated_addr); + return false; + } + + off = needle.translated_addr - map->translated_addr; + addrs[i] = map->iova + off; + + needle_last = int128_add(int128_make64(needle.translated_addr), + int128_make64(iovec[i].iov_len)); + map_last = int128_make64(map->translated_addr + map->size); + if (unlikely(int128_gt(needle_last, map_last))) { + qemu_log_mask(LOG_GUEST_ERROR, + "Guest buffer expands over iova range"); + return false; + } + } + + return true; +} + +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + const struct iovec *iovec, size_t num, + bool more_descs, bool write) +{ + uint16_t i = svq->free_head, last = svq->free_head; + unsigned n; + uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; + vring_desc_t *descs = svq->vring.desc; + + if (num == 0) { + return; + } + + for (n = 0; n < num; n++) { + if (more_descs || (n + 1 < num)) { + descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); + } else { + descs[i].flags = flags; + } + descs[i].addr = cpu_to_le64(sg[n]); + descs[i].len = cpu_to_le32(iovec[n].iov_len); + + last = i; + i = cpu_to_le16(descs[i].next); + } + + svq->free_head = le16_to_cpu(descs[last].next); +} + +static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + VirtQueueElement *elem, unsigned *head) +{ + unsigned avail_idx; + vring_avail_t *avail = svq->vring.avail; + bool ok; + g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); + + *head = svq->free_head; + + /* We need some descriptors here */ + if (unlikely(!elem->out_num && !elem->in_num)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Guest provided element with no descriptors"); + return false; + } + + ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); + if (unlikely(!ok)) { + return false; + } + vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, + elem->in_num > 0, false); + + + ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); + if (unlikely(!ok)) { + return false; + } + + vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); + + /* + * Put the entry in the available array (but don't update avail->idx until + * they do sync). + */ + avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1); + avail->ring[avail_idx] = cpu_to_le16(*head); + svq->shadow_avail_idx++; + + /* Update the avail index after write the descriptor */ + smp_wmb(); + avail->idx = cpu_to_le16(svq->shadow_avail_idx); + + return true; +} + +static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) +{ + unsigned qemu_head; + bool ok = vhost_svq_add_split(svq, elem, &qemu_head); + if (unlikely(!ok)) { + return false; + } + + svq->ring_id_maps[qemu_head] = elem; + return true; +} + +static void vhost_svq_kick(VhostShadowVirtqueue *svq) +{ + /* + * We need to expose the available array entries before checking the used + * flags + */ + smp_mb(); + if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { + return; + } + + event_notifier_set(&svq->hdev_kick); +} + +/** + * Forward available buffers. + * + * @svq: Shadow VirtQueue + * + * Note that this function does not guarantee that all guest's available + * buffers are available to the device in SVQ avail ring. The guest may have + * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in + * qemu vaddr. + * + * If that happens, guest's kick notifications will be disabled until the + * device uses some buffers. + */ +static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) +{ + /* Clear event notifier */ + event_notifier_test_and_clear(&svq->svq_kick); + + /* Forward to the device as many available buffers as possible */ + do { + virtio_queue_set_notification(svq->vq, false); + + while (true) { + VirtQueueElement *elem; + bool ok; + + if (svq->next_guest_avail_elem) { + elem = g_steal_pointer(&svq->next_guest_avail_elem); + } else { + elem = virtqueue_pop(svq->vq, sizeof(*elem)); + } + + if (!elem) { + break; + } + + if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { + /* + * This condition is possible since a contiguous buffer in GPA + * does not imply a contiguous buffer in qemu's VA + * scatter-gather segments. If that happens, the buffer exposed + * to the device needs to be a chain of descriptors at this + * moment. + * + * SVQ cannot hold more available buffers if we are here: + * queue the current guest descriptor and ignore further kicks + * until some elements are used. + */ + svq->next_guest_avail_elem = elem; + return; + } + + ok = vhost_svq_add(svq, elem); + if (unlikely(!ok)) { + /* VQ is broken, just return and ignore any other kicks */ + return; + } + vhost_svq_kick(svq); + } + + virtio_queue_set_notification(svq->vq, true); + } while (!virtio_queue_empty(svq->vq)); +} + +/** + * Handle guest's kick. + * + * @n: guest kick event notifier, the one that guest set to notify svq. + */ +static void vhost_handle_guest_kick_notifier(EventNotifier *n) +{ + VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick); + event_notifier_test_and_clear(n); + vhost_handle_guest_kick(svq); +} + +static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) +{ + if (svq->last_used_idx != svq->shadow_used_idx) { + return true; + } + + svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); + + return svq->last_used_idx != svq->shadow_used_idx; +} + +/** + * Enable vhost device calls after disable them. + * + * @svq: The svq + * + * It returns false if there are pending used buffers from the vhost device, + * avoiding the possible races between SVQ checking for more work and enabling + * callbacks. True if SVQ used vring has no more pending buffers. + */ +static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) +{ + svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + /* Make sure the flag is written before the read of used_idx */ + smp_mb(); + return !vhost_svq_more_used(svq); +} + +static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) +{ + svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); +} + +static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + uint32_t *len) +{ + vring_desc_t *descs = svq->vring.desc; + const vring_used_t *used = svq->vring.used; + vring_used_elem_t used_elem; + uint16_t last_used; + + if (!vhost_svq_more_used(svq)) { + return NULL; + } + + /* Only get used array entries after they have been exposed by dev */ + smp_rmb(); + last_used = svq->last_used_idx & (svq->vring.num - 1); + used_elem.id = le32_to_cpu(used->ring[last_used].id); + used_elem.len = le32_to_cpu(used->ring[last_used].len); + + svq->last_used_idx++; + if (unlikely(used_elem.id >= svq->vring.num)) { + qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used", + svq->vdev->name, used_elem.id); + return NULL; + } + + if (unlikely(!svq->ring_id_maps[used_elem.id])) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s says index %u is used, but it was not available", + svq->vdev->name, used_elem.id); + return NULL; + } + + descs[used_elem.id].next = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; + return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); +} + +static void vhost_svq_flush(VhostShadowVirtqueue *svq, + bool check_for_avail_queue) +{ + VirtQueue *vq = svq->vq; + + /* Forward as many used buffers as possible. */ + do { + unsigned i = 0; + + vhost_svq_disable_notification(svq); + while (true) { + uint32_t len; + g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); + if (!elem) { + break; + } + + if (unlikely(i >= svq->vring.num)) { + qemu_log_mask(LOG_GUEST_ERROR, + "More than %u used buffers obtained in a %u size SVQ", + i, svq->vring.num); + virtqueue_fill(vq, elem, len, i); + virtqueue_flush(vq, i); + return; + } + virtqueue_fill(vq, elem, len, i++); + } + + virtqueue_flush(vq, i); + event_notifier_set(&svq->svq_call); + + if (check_for_avail_queue && svq->next_guest_avail_elem) { + /* + * Avail ring was full when vhost_svq_flush was called, so it's a + * good moment to make more descriptors available if possible. + */ + vhost_handle_guest_kick(svq); + } + } while (!vhost_svq_enable_notification(svq)); +} + +/** + * Forward used buffers. + * + * @n: hdev call event notifier, the one that device set to notify svq. + * + * Note that we are not making any buffers available in the loop, there is no + * way that it runs more than virtqueue size times. + */ +static void vhost_svq_handle_call(EventNotifier *n) +{ + VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, + hdev_call); + event_notifier_test_and_clear(n); + vhost_svq_flush(svq, true); +} + +/** + * Set the call notifier for the SVQ to call the guest + * + * @svq: Shadow virtqueue + * @call_fd: call notifier + * + * Called on BQL context. + */ +void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) +{ + if (call_fd == VHOST_FILE_UNBIND) { + /* + * Fail event_notifier_set if called handling device call. + * + * SVQ still needs device notifications, since it needs to keep + * forwarding used buffers even with the unbind. + */ + memset(&svq->svq_call, 0, sizeof(svq->svq_call)); + } else { + event_notifier_init_fd(&svq->svq_call, call_fd); + } +} + +/** + * Get the shadow vq vring address. + * @svq: Shadow virtqueue + * @addr: Destination to store address + */ +void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, + struct vhost_vring_addr *addr) +{ + addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc; + addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail; + addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used; +} + +size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) +{ + size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; + size_t avail_size = offsetof(vring_avail_t, ring) + + sizeof(uint16_t) * svq->vring.num; + + return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size); +} + +size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) +{ + size_t used_size = offsetof(vring_used_t, ring) + + sizeof(vring_used_elem_t) * svq->vring.num; + return ROUND_UP(used_size, qemu_real_host_page_size); +} + +/** + * Set a new file descriptor for the guest to kick the SVQ and notify for avail + * + * @svq: The svq + * @svq_kick_fd: The svq kick fd + * + * Note that the SVQ will never close the old file descriptor. + */ +void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) +{ + EventNotifier *svq_kick = &svq->svq_kick; + bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick); + bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND; + + if (poll_stop) { + event_notifier_set_handler(svq_kick, NULL); + } + + /* + * event_notifier_set_handler already checks for guest's notifications if + * they arrive at the new file descriptor in the switch, so there is no + * need to explicitly check for them. + */ + if (poll_start) { + event_notifier_init_fd(svq_kick, svq_kick_fd); + event_notifier_set(svq_kick); + event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); + } +} + +/** + * Start the shadow virtqueue operation. + * + * @svq: Shadow Virtqueue + * @vdev: VirtIO device + * @vq: Virtqueue to shadow + */ +void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + VirtQueue *vq) +{ + size_t desc_size, driver_size, device_size; + + svq->next_guest_avail_elem = NULL; + svq->shadow_avail_idx = 0; + svq->shadow_used_idx = 0; + svq->last_used_idx = 0; + svq->vdev = vdev; + svq->vq = vq; + + svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); + driver_size = vhost_svq_driver_area_size(svq); + device_size = vhost_svq_device_area_size(svq); + svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size); + desc_size = sizeof(vring_desc_t) * svq->vring.num; + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); + memset(svq->vring.desc, 0, driver_size); + svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); + memset(svq->vring.used, 0, device_size); + svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); + for (unsigned i = 0; i < svq->vring.num - 1; i++) { + svq->vring.desc[i].next = cpu_to_le16(i + 1); + } +} + +/** + * Stop the shadow virtqueue operation. + * @svq: Shadow Virtqueue + */ +void vhost_svq_stop(VhostShadowVirtqueue *svq) +{ + event_notifier_set_handler(&svq->svq_kick, NULL); + g_autofree VirtQueueElement *next_avail_elem = NULL; + + if (!svq->vq) { + return; + } + + /* Send all pending used descriptors to guest */ + vhost_svq_flush(svq, false); + + for (unsigned i = 0; i < svq->vring.num; ++i) { + g_autofree VirtQueueElement *elem = NULL; + elem = g_steal_pointer(&svq->ring_id_maps[i]); + if (elem) { + virtqueue_detach_element(svq->vq, elem, 0); + } + } + + next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem); + if (next_avail_elem) { + virtqueue_detach_element(svq->vq, next_avail_elem, 0); + } + svq->vq = NULL; + g_free(svq->ring_id_maps); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); +} + +/** + * Creates vhost shadow virtqueue, and instructs the vhost device to use the + * shadow methods and file descriptors. + * + * @iova_tree: Tree to perform descriptors translations + * + * Returns the new virtqueue or NULL. + * + * In case of error, reason is reported through error_report. + */ +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) +{ + g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); + int r; + + r = event_notifier_init(&svq->hdev_kick, 0); + if (r != 0) { + error_report("Couldn't create kick event notifier: %s (%d)", + g_strerror(errno), errno); + goto err_init_hdev_kick; + } + + r = event_notifier_init(&svq->hdev_call, 0); + if (r != 0) { + error_report("Couldn't create call event notifier: %s (%d)", + g_strerror(errno), errno); + goto err_init_hdev_call; + } + + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); + event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); + svq->iova_tree = iova_tree; + return g_steal_pointer(&svq); + +err_init_hdev_call: + event_notifier_cleanup(&svq->hdev_kick); + +err_init_hdev_kick: + return NULL; +} + +/** + * Free the resources of the shadow virtqueue. + * + * @pvq: gpointer to SVQ so it can be used by autofree functions. + */ +void vhost_svq_free(gpointer pvq) +{ + VhostShadowVirtqueue *vq = pvq; + vhost_svq_stop(vq); + event_notifier_cleanup(&vq->hdev_kick); + event_notifier_set_handler(&vq->hdev_call, NULL); + event_notifier_cleanup(&vq->hdev_call); + g_free(vq); +} diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h new file mode 100644 index 0000000000..e5e24c536d --- /dev/null +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -0,0 +1,87 @@ +/* + * vhost shadow virtqueue + * + * SPDX-FileCopyrightText: Red Hat, Inc. 2021 + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef VHOST_SHADOW_VIRTQUEUE_H +#define VHOST_SHADOW_VIRTQUEUE_H + +#include "qemu/event_notifier.h" +#include "hw/virtio/virtio.h" +#include "standard-headers/linux/vhost_types.h" +#include "hw/virtio/vhost-iova-tree.h" + +/* Shadow virtqueue to relay notifications */ +typedef struct VhostShadowVirtqueue { + /* Shadow vring */ + struct vring vring; + + /* Shadow kick notifier, sent to vhost */ + EventNotifier hdev_kick; + /* Shadow call notifier, sent to vhost */ + EventNotifier hdev_call; + + /* + * Borrowed virtqueue's guest to host notifier. To borrow it in this event + * notifier allows to recover the VhostShadowVirtqueue from the event loop + * easily. If we use the VirtQueue's one, we don't have an easy way to + * retrieve VhostShadowVirtqueue. + * + * So shadow virtqueue must not clean it, or we would lose VirtQueue one. + */ + EventNotifier svq_kick; + + /* Guest's call notifier, where the SVQ calls guest. */ + EventNotifier svq_call; + + /* Virtio queue shadowing */ + VirtQueue *vq; + + /* Virtio device */ + VirtIODevice *vdev; + + /* IOVA mapping */ + VhostIOVATree *iova_tree; + + /* Map for use the guest's descriptors */ + VirtQueueElement **ring_id_maps; + + /* Next VirtQueue element that guest made available */ + VirtQueueElement *next_guest_avail_elem; + + /* Next head to expose to the device */ + uint16_t shadow_avail_idx; + + /* Next free descriptor */ + uint16_t free_head; + + /* Last seen used idx */ + uint16_t shadow_used_idx; + + /* Next head to consume from the device */ + uint16_t last_used_idx; +} VhostShadowVirtqueue; + +bool vhost_svq_valid_features(uint64_t features, Error **errp); + +void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); +void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); +void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, + struct vhost_vring_addr *addr); +size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq); +size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq); + +void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + VirtQueue *vq); +void vhost_svq_stop(VhostShadowVirtqueue *svq); + +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); + +void vhost_svq_free(gpointer vq); +G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); + +#endif diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 6c67d5f034..c5ed7a3779 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -17,12 +17,14 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio-net.h" +#include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost-vdpa.h" #include "exec/address-spaces.h" #include "qemu/main-loop.h" #include "cpu.h" #include "trace.h" #include "qemu-common.h" +#include "qapi/error.h" /* * Return one past the end of the end of section. Be careful with uint64_t @@ -207,6 +209,21 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, vaddr, section->readonly); llsize = int128_sub(llend, int128_make64(iova)); + if (v->shadow_vqs_enabled) { + DMAMap mem_region = { + .translated_addr = (hwaddr)(uintptr_t)vaddr, + .size = int128_get64(llsize) - 1, + .perm = IOMMU_ACCESS_FLAG(true, section->readonly), + }; + + int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); + if (unlikely(r != IOVA_OK)) { + error_report("Can't allocate a mapping (%d)", r); + goto fail; + } + + iova = mem_region.iova; + } vhost_vdpa_iotlb_batch_begin_once(v); ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), @@ -259,6 +276,20 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); + if (v->shadow_vqs_enabled) { + const DMAMap *result; + const void *vaddr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region + + (iova - section->offset_within_address_space); + DMAMap mem_region = { + .translated_addr = (hwaddr)(uintptr_t)vaddr, + .size = int128_get64(llsize) - 1, + }; + + result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); + iova = result->iova; + vhost_iova_tree_remove(v->iova_tree, &mem_region); + } vhost_vdpa_iotlb_batch_begin_once(v); ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); if (ret) { @@ -342,6 +373,55 @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) return v->index != 0; } +static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, + uint64_t *features) +{ + int ret; + + ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); + trace_vhost_vdpa_get_features(dev, *features); + return ret; +} + +static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + Error **errp) +{ + g_autoptr(GPtrArray) shadow_vqs = NULL; + uint64_t dev_features, svq_features; + int r; + bool ok; + + if (!v->shadow_vqs_enabled) { + return 0; + } + + r = vhost_vdpa_get_dev_features(hdev, &dev_features); + if (r != 0) { + error_setg_errno(errp, -r, "Can't get vdpa device features"); + return r; + } + + svq_features = dev_features; + ok = vhost_svq_valid_features(svq_features, errp); + if (unlikely(!ok)) { + return -1; + } + + shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); + for (unsigned n = 0; n < hdev->nvqs; ++n) { + g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); + + if (unlikely(!svq)) { + error_setg(errp, "Cannot create svq %u", n); + return -1; + } + g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); + } + + v->shadow_vqs = g_steal_pointer(&shadow_vqs); + return 0; +} + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { struct vhost_vdpa *v; @@ -364,6 +444,10 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) dev->opaque = opaque ; v->listener = vhost_vdpa_memory_listener; v->msg_type = VHOST_IOTLB_MSG_V2; + ret = vhost_vdpa_init_svq(dev, v, errp); + if (ret) { + goto err; + } vhost_vdpa_get_iova_range(v); @@ -375,6 +459,10 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) VIRTIO_CONFIG_S_DRIVER); return 0; + +err: + ram_block_discard_disable(false); + return ret; } static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, @@ -445,8 +533,14 @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) { + struct vhost_vdpa *v = dev->opaque; int i; + if (v->shadow_vqs_enabled) { + /* FIXME SVQ is not compatible with host notifiers mr */ + return; + } + for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { if (vhost_vdpa_host_notifier_init(dev, i)) { goto err; @@ -460,6 +554,21 @@ err: return; } +static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + size_t idx; + + if (!v->shadow_vqs) { + return; + } + + for (idx = 0; idx < v->shadow_vqs->len; ++idx) { + vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); + } + g_ptr_array_free(v->shadow_vqs, true); +} + static int vhost_vdpa_cleanup(struct vhost_dev *dev) { struct vhost_vdpa *v; @@ -468,6 +577,7 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev) trace_vhost_vdpa_cleanup(dev, v); vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); memory_listener_unregister(&v->listener); + vhost_vdpa_svq_cleanup(dev); dev->opaque = NULL; ram_block_discard_disable(false); @@ -510,12 +620,29 @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, static int vhost_vdpa_set_features(struct vhost_dev *dev, uint64_t features) { + struct vhost_vdpa *v = dev->opaque; int ret; if (vhost_vdpa_one_time_request(dev)) { return 0; } + if (v->shadow_vqs_enabled) { + if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) { + /* + * QEMU is just trying to enable or disable logging. SVQ handles + * this sepparately, so no need to forward this. + */ + v->acked_features = features; + return 0; + } + + v->acked_features = features; + + /* We must not ack _F_LOG if SVQ is enabled */ + features &= ~BIT_ULL(VHOST_F_LOG_ALL); + } + trace_vhost_vdpa_set_features(dev, features); ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); if (ret) { @@ -559,11 +686,26 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev, return ret; } +static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) +{ + if (!v->shadow_vqs_enabled) { + return; + } + + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + vhost_svq_stop(svq); + } +} + static int vhost_vdpa_reset_device(struct vhost_dev *dev) { + struct vhost_vdpa *v = dev->opaque; int ret; uint8_t status = 0; + vhost_vdpa_reset_svq(v); + ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev, status); return ret; @@ -647,15 +789,311 @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, return ret; } +static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); + return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); +} + +static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); + return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); +} + +static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); + return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); +} + +static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, + addr->desc_user_addr, addr->used_user_addr, + addr->avail_user_addr, + addr->log_guest_addr); + + return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); + +} + +/** + * Set the shadow virtqueue descriptors to the device + * + * @dev: The vhost device model + * @svq: The shadow virtqueue + * @idx: The index of the virtqueue in the vhost device + * @errp: Error + * + * Note that this function does not rewind kick file descriptor if cannot set + * call one. + */ +static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + VhostShadowVirtqueue *svq, unsigned idx, + Error **errp) +{ + struct vhost_vring_file file = { + .index = dev->vq_index + idx, + }; + const EventNotifier *event_notifier = &svq->hdev_kick; + int r; + + file.fd = event_notifier_get_fd(event_notifier); + r = vhost_vdpa_set_vring_dev_kick(dev, &file); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Can't set device kick fd"); + return r; + } + + event_notifier = &svq->hdev_call; + file.fd = event_notifier_get_fd(event_notifier); + r = vhost_vdpa_set_vring_dev_call(dev, &file); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Can't set device call fd"); + } + + return r; +} + +/** + * Unmap a SVQ area in the device + */ +static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + const DMAMap *needle) +{ + const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); + hwaddr size; + int r; + + if (unlikely(!result)) { + error_report("Unable to find SVQ address to unmap"); + return false; + } + + size = ROUND_UP(result->size, qemu_real_host_page_size); + r = vhost_vdpa_dma_unmap(v, result->iova, size); + return r == 0; +} + +static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, + const VhostShadowVirtqueue *svq) +{ + DMAMap needle = {}; + struct vhost_vdpa *v = dev->opaque; + struct vhost_vring_addr svq_addr; + bool ok; + + vhost_svq_get_vring_addr(svq, &svq_addr); + + needle.translated_addr = svq_addr.desc_user_addr; + ok = vhost_vdpa_svq_unmap_ring(v, &needle); + if (unlikely(!ok)) { + return false; + } + + needle.translated_addr = svq_addr.used_user_addr; + return vhost_vdpa_svq_unmap_ring(v, &needle); +} + +/** + * Map the SVQ area in the device + * + * @v: Vhost-vdpa device + * @needle: The area to search iova + * @errorp: Error pointer + */ +static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, + Error **errp) +{ + int r; + + r = vhost_iova_tree_map_alloc(v->iova_tree, needle); + if (unlikely(r != IOVA_OK)) { + error_setg(errp, "Cannot allocate iova (%d)", r); + return false; + } + + r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, + (void *)(uintptr_t)needle->translated_addr, + needle->perm == IOMMU_RO); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Cannot map region to device"); + vhost_iova_tree_remove(v->iova_tree, needle); + } + + return r == 0; +} + +/** + * Map the shadow virtqueue rings in the device + * + * @dev: The vhost device + * @svq: The shadow virtqueue + * @addr: Assigned IOVA addresses + * @errp: Error pointer + */ +static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, + const VhostShadowVirtqueue *svq, + struct vhost_vring_addr *addr, + Error **errp) +{ + DMAMap device_region, driver_region; + struct vhost_vring_addr svq_addr; + struct vhost_vdpa *v = dev->opaque; + size_t device_size = vhost_svq_device_area_size(svq); + size_t driver_size = vhost_svq_driver_area_size(svq); + size_t avail_offset; + bool ok; + + ERRP_GUARD(); + vhost_svq_get_vring_addr(svq, &svq_addr); + + driver_region = (DMAMap) { + .translated_addr = svq_addr.desc_user_addr, + .size = driver_size - 1, + .perm = IOMMU_RO, + }; + ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); + if (unlikely(!ok)) { + error_prepend(errp, "Cannot create vq driver region: "); + return false; + } + addr->desc_user_addr = driver_region.iova; + avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; + addr->avail_user_addr = driver_region.iova + avail_offset; + + device_region = (DMAMap) { + .translated_addr = svq_addr.used_user_addr, + .size = device_size - 1, + .perm = IOMMU_RW, + }; + ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); + if (unlikely(!ok)) { + error_prepend(errp, "Cannot create vq device region: "); + vhost_vdpa_svq_unmap_ring(v, &driver_region); + } + addr->used_user_addr = device_region.iova; + + return ok; +} + +static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, + VhostShadowVirtqueue *svq, unsigned idx, + Error **errp) +{ + uint16_t vq_index = dev->vq_index + idx; + struct vhost_vring_state s = { + .index = vq_index, + }; + int r; + + r = vhost_vdpa_set_dev_vring_base(dev, &s); + if (unlikely(r)) { + error_setg_errno(errp, -r, "Cannot set vring base"); + return false; + } + + r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp); + return r == 0; +} + +static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + Error *err = NULL; + unsigned i; + + if (!v->shadow_vqs) { + return true; + } + + for (i = 0; i < v->shadow_vqs->len; ++i) { + VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + struct vhost_vring_addr addr = { + .index = i, + }; + int r; + bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); + if (unlikely(!ok)) { + goto err; + } + + vhost_svq_start(svq, dev->vdev, vq); + ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); + if (unlikely(!ok)) { + goto err_map; + } + + /* Override vring GPA set by vhost subsystem */ + r = vhost_vdpa_set_vring_dev_addr(dev, &addr); + if (unlikely(r != 0)) { + error_setg_errno(&err, -r, "Cannot set device address"); + goto err_set_addr; + } + } + + return true; + +err_set_addr: + vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i)); + +err_map: + vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i)); + +err: + error_reportf_err(err, "Cannot setup SVQ %u: ", i); + for (unsigned j = 0; j < i; ++j) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j); + vhost_vdpa_svq_unmap_rings(dev, svq); + vhost_svq_stop(svq); + } + + return false; +} + +static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + + if (!v->shadow_vqs) { + return true; + } + + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); + if (unlikely(!ok)) { + return false; + } + } + + return true; +} + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) { struct vhost_vdpa *v = dev->opaque; + bool ok; trace_vhost_vdpa_dev_start(dev, started); if (started) { vhost_vdpa_host_notifiers_init(dev); + ok = vhost_vdpa_svqs_start(dev); + if (unlikely(!ok)) { + return -1; + } vhost_vdpa_set_vring_ready(dev); } else { + ok = vhost_vdpa_svqs_stop(dev); + if (unlikely(!ok)) { + return -1; + } vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); } @@ -679,7 +1117,8 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, struct vhost_log *log) { - if (vhost_vdpa_one_time_request(dev)) { + struct vhost_vdpa *v = dev->opaque; + if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { return 0; } @@ -691,11 +1130,17 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, struct vhost_vring_addr *addr) { - trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, - addr->desc_user_addr, addr->used_user_addr, - addr->avail_user_addr, - addr->log_guest_addr); - return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); + struct vhost_vdpa *v = dev->opaque; + + if (v->shadow_vqs_enabled) { + /* + * Device vring addr was set at device start. SVQ base is handled by + * VirtQueue code. + */ + return 0; + } + + return vhost_vdpa_set_vring_dev_addr(dev, addr); } static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, @@ -708,15 +1153,41 @@ static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { - trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); - return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); + struct vhost_vdpa *v = dev->opaque; + + if (v->shadow_vqs_enabled) { + /* + * Device vring base was set at device start. SVQ base is handled by + * VirtQueue code. + */ + return 0; + } + + return vhost_vdpa_set_dev_vring_base(dev, ring); } static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { + struct vhost_vdpa *v = dev->opaque; int ret; + if (v->shadow_vqs_enabled) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, + ring->index); + + /* + * Setting base as last used idx, so destination will see as available + * all the entries that the device did not use, including the in-flight + * processing ones. + * + * TODO: This is ok for networking, but other kinds of devices might + * have problems with these retransmissions. + */ + ring->num = svq->last_used_idx; + return 0; + } + ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); return ret; @@ -725,24 +1196,45 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, struct vhost_vring_file *file) { - trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); - return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); + struct vhost_vdpa *v = dev->opaque; + int vdpa_idx = file->index - dev->vq_index; + + if (v->shadow_vqs_enabled) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); + vhost_svq_set_svq_kick_fd(svq, file->fd); + return 0; + } else { + return vhost_vdpa_set_vring_dev_kick(dev, file); + } } static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, struct vhost_vring_file *file) { - trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); - return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); + struct vhost_vdpa *v = dev->opaque; + + if (v->shadow_vqs_enabled) { + int vdpa_idx = file->index - dev->vq_index; + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); + + vhost_svq_set_svq_call_fd(svq, file->fd); + return 0; + } else { + return vhost_vdpa_set_vring_dev_call(dev, file); + } } static int vhost_vdpa_get_features(struct vhost_dev *dev, uint64_t *features) { - int ret; + struct vhost_vdpa *v = dev->opaque; + int ret = vhost_vdpa_get_dev_features(dev, features); + + if (ret == 0 && v->shadow_vqs_enabled) { + /* Add SVQ logging capabilities */ + *features |= BIT_ULL(VHOST_F_LOG_ALL); + } - ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); - trace_vhost_vdpa_get_features(dev, *features); return ret; } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index f9cf9592fd..7cf1231c1c 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -683,10 +683,12 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, int ret; if (irqfd->users == 0) { - ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev); + KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); + ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev); if (ret < 0) { return ret; } + kvm_irqchip_commit_route_changes(&c); irqfd->virq = ret; } irqfd->users++; diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h index fbcf5bfb55..b02ecdceaa 100644 --- a/include/hw/pci-host/pnv_phb4.h +++ b/include/hw/pci-host/pnv_phb4.h @@ -203,6 +203,7 @@ struct PnvPhb4PecClass { const char *stk_compat; int stk_compat_size; uint64_t version; + const char *phb_type; const uint32_t *num_phbs; const char *rp_model; }; @@ -211,6 +212,10 @@ struct PnvPhb4PecClass { * POWER10 definitions */ +#define TYPE_PNV_PHB5 "pnv-phb5" +#define PNV_PHB5(obj) \ + OBJECT_CHECK(PnvPhb4, (obj), TYPE_PNV_PHB5) + #define PNV_PHB5_VERSION 0x000000a500000001ull #define PNV_PHB5_DEVICE_ID 0x0652 diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index 1e34ddd502..86cb7d7f97 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -190,7 +190,6 @@ DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER10, PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir); void pnv_phb_attach_root_port(PCIHostState *pci, const char *name); -void pnv_chip_parent_fixup(PnvChip *chip, Object *obj, int index); #define TYPE_PNV_MACHINE MACHINE_TYPE_NAME("powernv") typedef struct PnvMachineClass PnvMachineClass; diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 3ce79a646d..a29dbb3f53 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -12,6 +12,9 @@ #ifndef HW_VIRTIO_VHOST_VDPA_H #define HW_VIRTIO_VHOST_VDPA_H +#include <gmodule.h> + +#include "hw/virtio/vhost-iova-tree.h" #include "hw/virtio/virtio.h" #include "standard-headers/linux/vhost_types.h" @@ -27,6 +30,11 @@ typedef struct vhost_vdpa { bool iotlb_batch_begin_sent; MemoryListener listener; struct vhost_vdpa_iova_range iova_range; + uint64_t acked_features; + bool shadow_vqs_enabled; + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; + GPtrArray *shadow_vqs; struct vhost_dev *dev; VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; } VhostVDPA; diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h index 8249edd764..c938fb0793 100644 --- a/include/qemu/iova-tree.h +++ b/include/qemu/iova-tree.h @@ -29,6 +29,7 @@ #define IOVA_OK (0) #define IOVA_ERR_INVALID (-1) /* Invalid parameters */ #define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */ +#define IOVA_ERR_NOMEM (-3) /* Cannot allocate */ typedef struct IOVATree IOVATree; typedef struct DMAMap { @@ -82,7 +83,7 @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map); * @tree: the iova tree to search from * @map: the mapping to search * - * Search for a mapping in the iova tree that overlaps with the + * Search for a mapping in the iova tree that iova overlaps with the * mapping range specified. Only the first found mapping will be * returned. * @@ -95,6 +96,24 @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map); const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map); /** + * iova_tree_find_iova: + * + * @tree: the iova tree to search from + * @map: the mapping to search + * + * Search for a mapping in the iova tree that translated_addr overlaps with the + * mapping range specified. Only the first found mapping will be + * returned. + * + * Return: DMAMap pointer if found, or NULL if not found. Note that + * the returned DMAMap pointer is maintained internally. User should + * only read the content but never modify or free the content. Also, + * user is responsible to make sure the pointer is valid (say, no + * concurrent deletion in progress). + */ +const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map); + +/** * iova_tree_find_address: * * @tree: the iova tree to search from @@ -120,6 +139,23 @@ const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova); void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator); /** + * iova_tree_alloc_map: + * + * @tree: the iova tree to allocate from + * @map: the new map (as translated addr & size) to allocate in the iova region + * @iova_begin: the minimum address of the allocation + * @iova_end: the maximum addressable direction of the allocation + * + * Allocates a new region of a given size, between iova_min and iova_max. + * + * Return: Same as iova_tree_insert, but cannot overlap and can return error if + * iova tree is out of free contiguous range. The caller gets the assigned iova + * in map->iova. + */ +int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin, + hwaddr iova_end); + +/** * iova_tree_destroy: * * @tree: the iova tree to destroy diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index a5bec96fb0..a783c78868 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -224,6 +224,11 @@ DECLARE_INSTANCE_CHECKER(KVMState, KVM_STATE, extern KVMState *kvm_state; typedef struct Notifier Notifier; +typedef struct KVMRouteChange { + KVMState *s; + int changes; +} KVMRouteChange; + /* external API */ bool kvm_has_free_slot(MachineState *ms); @@ -481,7 +486,7 @@ void kvm_init_cpu_signals(CPUState *cpu); /** * kvm_irqchip_add_msi_route - Add MSI route for specific vector - * @s: KVM state + * @c: KVMRouteChange instance. * @vector: which vector to add. This can be either MSI/MSIX * vector. The function will automatically detect whether * MSI/MSIX is enabled, and fetch corresponding MSI @@ -490,10 +495,24 @@ void kvm_init_cpu_signals(CPUState *cpu); * as @NULL, an empty MSI message will be inited. * @return: virq (>=0) when success, errno (<0) when failed. */ -int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev); +int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev); int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, PCIDevice *dev); void kvm_irqchip_commit_routes(KVMState *s); + +static inline KVMRouteChange kvm_irqchip_begin_route_changes(KVMState *s) +{ + return (KVMRouteChange) { .s = s, .changes = 0 }; +} + +static inline void kvm_irqchip_commit_route_changes(KVMRouteChange *c) +{ + if (c->changes) { + kvm_irqchip_commit_routes(c->s); + c->changes = 0; + } +} + void kvm_irqchip_release_virq(KVMState *s, int virq); int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter); diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 2da3316bb5..bf6e96011d 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -452,6 +452,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 00af3bc333..d232feaae9 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 #ifdef KVM_CAP_IRQ_ROUTING @@ -2047,4 +2048,7 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + #endif /* __LINUX_KVM_H */ diff --git a/meson.build b/meson.build index 2d6601467f..ad4327888f 100644 --- a/meson.build +++ b/meson.build @@ -1954,12 +1954,15 @@ config_host_data.set('CONFIG_AF_VSOCK', cc.compiles(gnu_source_prefix + ''' }''')) have_vss = false +have_vss_sdk = false # old xp/2003 SDK if targetos == 'windows' and link_language == 'cpp' have_vss = cxx.compiles(''' #define __MIDL_user_allocate_free_DEFINED__ - #include <inc/win2003/vss.h> + #include <vss.h> int main(void) { return VSS_CTX_BACKUP; }''') + have_vss_sdk = cxx.has_header('vscoordint.h') endif +config_host_data.set('HAVE_VSS_SDK', have_vss_sdk) have_ntddscsi = false if targetos == 'windows' diff --git a/pc-bios/edk2-aarch64-code.fd.bz2 b/pc-bios/edk2-aarch64-code.fd.bz2 index 5bf311464a..0262f5bd8f 100644 --- a/pc-bios/edk2-aarch64-code.fd.bz2 +++ b/pc-bios/edk2-aarch64-code.fd.bz2 Binary files differdiff --git a/pc-bios/edk2-arm-code.fd.bz2 b/pc-bios/edk2-arm-code.fd.bz2 index 7a98069814..4ca97b43ea 100644 --- a/pc-bios/edk2-arm-code.fd.bz2 +++ b/pc-bios/edk2-arm-code.fd.bz2 Binary files differdiff --git a/pc-bios/edk2-i386-code.fd.bz2 b/pc-bios/edk2-i386-code.fd.bz2 index e7b1befe2c..6e02c9b995 100644 --- a/pc-bios/edk2-i386-code.fd.bz2 +++ b/pc-bios/edk2-i386-code.fd.bz2 Binary files differdiff --git a/pc-bios/edk2-i386-secure-code.fd.bz2 b/pc-bios/edk2-i386-secure-code.fd.bz2 index b5df5bed30..a4b1cc92bd 100644 --- a/pc-bios/edk2-i386-secure-code.fd.bz2 +++ b/pc-bios/edk2-i386-secure-code.fd.bz2 Binary files differdiff --git a/pc-bios/edk2-x86_64-code.fd.bz2 b/pc-bios/edk2-x86_64-code.fd.bz2 index e1654d4003..37bfb0dbed 100644 --- a/pc-bios/edk2-x86_64-code.fd.bz2 +++ b/pc-bios/edk2-x86_64-code.fd.bz2 Binary files differdiff --git a/pc-bios/edk2-x86_64-microvm.fd.bz2 b/pc-bios/edk2-x86_64-microvm.fd.bz2 new file mode 100644 index 0000000000..1d65c61ded --- /dev/null +++ b/pc-bios/edk2-x86_64-microvm.fd.bz2 Binary files differdiff --git a/pc-bios/edk2-x86_64-secure-code.fd.bz2 b/pc-bios/edk2-x86_64-secure-code.fd.bz2 index 767274c38c..76dc6d5aad 100644 --- a/pc-bios/edk2-x86_64-secure-code.fd.bz2 +++ b/pc-bios/edk2-x86_64-secure-code.fd.bz2 Binary files differdiff --git a/qga/meson.build b/qga/meson.build index 54f2da5b07..62472747f1 100644 --- a/qga/meson.build +++ b/qga/meson.build @@ -15,7 +15,7 @@ have_qga_vss = get_option('qga_vss') \ If your Visual Studio installation doesn't have the VSS headers, Please download and install Microsoft VSS SDK: http://www.microsoft.com/en-us/download/details.aspx?id=23490 - On POSIX-systems, MinGW doesn't yet provide working headers. + On POSIX-systems, MinGW should provide headers in >=10.0 releases. you can extract the SDK headers by: $ scripts/extract-vsssdk-headers setup.exe The headers are extracted in the directory 'inc/win2003'. diff --git a/qga/vss-win32/install.cpp b/qga/vss-win32/install.cpp index efc5bb9909..8076efe3cb 100644 --- a/qga/vss-win32/install.cpp +++ b/qga/vss-win32/install.cpp @@ -13,7 +13,11 @@ #include "qemu/osdep.h" #include "vss-common.h" +#ifdef HAVE_VSS_SDK #include <vscoordint.h> +#else +#include <vsadmin.h> +#endif #include "install.h" #include <wbemidl.h> #include <comdef.h> diff --git a/qga/vss-win32/provider.cpp b/qga/vss-win32/provider.cpp index fd187fb66f..1b885e24ee 100644 --- a/qga/vss-win32/provider.cpp +++ b/qga/vss-win32/provider.cpp @@ -12,7 +12,11 @@ #include "qemu/osdep.h" #include "vss-common.h" +#ifdef HAVE_VSS_SDK #include <vscoordint.h> +#else +#include <vsadmin.h> +#endif #include <vsprov.h> #define VSS_TIMEOUT_MSEC (60*1000) diff --git a/qga/vss-win32/vss-common.h b/qga/vss-win32/vss-common.h index 54f8de8c88..0e67e7822c 100644 --- a/qga/vss-win32/vss-common.h +++ b/qga/vss-win32/vss-common.h @@ -64,12 +64,13 @@ const CLSID CLSID_QGAVSSProvider = { 0x6e6a3492, 0x8d4d, 0x440c, const TCHAR g_szClsid[] = TEXT("{6E6A3492-8D4D-440C-9619-5E5D0CC31CA8}"); const TCHAR g_szProgid[] = TEXT("QGAVSSProvider"); +#ifdef HAVE_VSS_SDK /* Enums undefined in VSS SDK 7.2 but defined in newer Windows SDK */ enum __VSS_VOLUME_SNAPSHOT_ATTRIBUTES { VSS_VOLSNAP_ATTR_NO_AUTORECOVERY = 0x00000002, VSS_VOLSNAP_ATTR_TXF_RECOVERY = 0x02000000 }; - +#endif /* COM pointer utility; call ->Release() when it goes out of scope */ template <class T> diff --git a/roms/Makefile.edk2 b/roms/Makefile.edk2 index fdae0b511f..485f2244b1 100644 --- a/roms/Makefile.edk2 +++ b/roms/Makefile.edk2 @@ -13,6 +13,7 @@ SHELL = /bin/bash +target = RELEASE toolchain = $(shell source ./edk2-funcs.sh && qemu_edk2_get_toolchain $(1)) licenses := \ @@ -32,6 +33,7 @@ flashdevs := \ i386-secure-code \ x86_64-code \ x86_64-secure-code \ + x86_64-microvm \ \ arm-vars \ i386-vars @@ -50,7 +52,7 @@ all: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd.bz2) \ # we're building from a tarball and that they've already been fetched by # make-release/tarball scripts. submodules: - if test -d edk2/.git; then \ + if test -e edk2/.git; then \ cd edk2 && git submodule update --init --force -- \ ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3 \ BaseTools/Source/C/BrotliCompress/brotli \ @@ -73,7 +75,7 @@ submodules: -D NETWORK_TLS_ENABLE \ -D TPM2_ENABLE \ -D TPM2_CONFIG_ENABLE - cp edk2/Build/ArmVirtQemu-AARCH64/DEBUG_$(call toolchain,aarch64)/FV/QEMU_EFI.fd \ + cp edk2/Build/ArmVirtQemu-AARCH64/$(target)_$(call toolchain,aarch64)/FV/QEMU_EFI.fd \ $@ truncate --size=64M $@ @@ -87,7 +89,7 @@ submodules: -D NETWORK_TLS_ENABLE \ -D TPM2_ENABLE \ -D TPM2_CONFIG_ENABLE - cp edk2/Build/ArmVirtQemu-ARM/DEBUG_$(call toolchain,arm)/FV/QEMU_EFI.fd \ + cp edk2/Build/ArmVirtQemu-ARM/$(target)_$(call toolchain,arm)/FV/QEMU_EFI.fd \ $@ truncate --size=64M $@ @@ -101,7 +103,7 @@ submodules: -D NETWORK_TLS_ENABLE \ -D TPM_ENABLE \ -D TPM_CONFIG_ENABLE - cp edk2/Build/OvmfIa32/DEBUG_$(call toolchain,i386)/FV/OVMF_CODE.fd $@ + cp edk2/Build/OvmfIa32/$(target)_$(call toolchain,i386)/FV/OVMF_CODE.fd $@ ../pc-bios/edk2-i386-secure-code.fd: submodules +./edk2-build.sh \ @@ -115,7 +117,7 @@ submodules: -D TPM_CONFIG_ENABLE \ -D SECURE_BOOT_ENABLE \ -D SMM_REQUIRE - cp edk2/Build/OvmfIa32/DEBUG_$(call toolchain,i386)/FV/OVMF_CODE.fd $@ + cp edk2/Build/OvmfIa32/$(target)_$(call toolchain,i386)/FV/OVMF_CODE.fd $@ ../pc-bios/edk2-x86_64-code.fd: submodules +./edk2-build.sh \ @@ -127,7 +129,7 @@ submodules: -D NETWORK_TLS_ENABLE \ -D TPM_ENABLE \ -D TPM_CONFIG_ENABLE - cp edk2/Build/OvmfX64/DEBUG_$(call toolchain,x86_64)/FV/OVMF_CODE.fd $@ + cp edk2/Build/OvmfX64/$(target)_$(call toolchain,x86_64)/FV/OVMF_CODE.fd $@ ../pc-bios/edk2-x86_64-secure-code.fd: submodules +./edk2-build.sh \ @@ -142,15 +144,25 @@ submodules: -D TPM_CONFIG_ENABLE \ -D SECURE_BOOT_ENABLE \ -D SMM_REQUIRE - cp edk2/Build/Ovmf3264/DEBUG_$(call toolchain,x86_64)/FV/OVMF_CODE.fd $@ + cp edk2/Build/Ovmf3264/$(target)_$(call toolchain,x86_64)/FV/OVMF_CODE.fd $@ + +../pc-bios/edk2-x86_64-microvm.fd: submodules + +./edk2-build.sh \ + x86_64 \ + --arch=X64 \ + --platform=OvmfPkg/Microvm/MicrovmX64.dsc \ + -D NETWORK_IP6_ENABLE \ + -D NETWORK_HTTP_BOOT_ENABLE \ + -D NETWORK_TLS_ENABLE + cp edk2/Build/MicrovmX64/$(target)_$(call toolchain,x86_64)/FV/MICROVM.fd $@ ../pc-bios/edk2-arm-vars.fd: ../pc-bios/edk2-arm-code.fd - cp edk2/Build/ArmVirtQemu-ARM/DEBUG_$(call toolchain,arm)/FV/QEMU_VARS.fd \ + cp edk2/Build/ArmVirtQemu-ARM/$(target)_$(call toolchain,arm)/FV/QEMU_VARS.fd \ $@ truncate --size=64M $@ ../pc-bios/edk2-i386-vars.fd: ../pc-bios/edk2-i386-code.fd - cp edk2/Build/OvmfIa32/DEBUG_$(call toolchain,i386)/FV/OVMF_VARS.fd $@ + cp edk2/Build/OvmfIa32/$(target)_$(call toolchain,i386)/FV/OVMF_VARS.fd $@ # The license file accumulates several individual licenses from under edk2, # prefixing each individual license with a header (generated by "tail") that diff --git a/roms/edk2 b/roms/edk2 -Subproject 06dc822d045c2bb42e497487935485302486e15 +Subproject b24306f15daa2ff8510b06702114724b33895d3 diff --git a/roms/edk2-build.sh b/roms/edk2-build.sh index d5391c7637..ea79dc27a2 100755 --- a/roms/edk2-build.sh +++ b/roms/edk2-build.sh @@ -50,6 +50,6 @@ qemu_edk2_set_cross_env "$emulation_target" build \ --cmd-len=65536 \ -n "$edk2_thread_count" \ - --buildtarget=DEBUG \ + --buildtarget=RELEASE \ --tagname="$edk2_toolchain" \ "${args[@]}" diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index 6fe66d5f57..f140040104 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -249,6 +249,7 @@ controls = [ bits = { 0: 'Execute-only EPT translations', 6: 'Page-walk length 4', + 7: 'Page-walk length 5', 8: 'Paging-structure memory type UC', 14: 'Paging-structure memory type WB', 16: '2MB EPT pages', diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 9ee684ef03..1e26f4571e 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -20,7 +20,6 @@ meson_options_help() { printf "%s\n" ' --enable-malloc=CHOICE choose memory allocator to use [system] (choices:' printf "%s\n" ' jemalloc/system/tcmalloc)' printf "%s\n" ' --enable-profiler profiler support' - printf "%s\n" ' --enable-qga-vss build QGA VSS support' printf "%s\n" ' --enable-qom-cast-debug cast debugging support' printf "%s\n" ' --enable-rng-none dummy RNG, avoid using /dev/(u)random and' printf "%s\n" ' getrandom()' @@ -97,6 +96,7 @@ meson_options_help() { printf "%s\n" ' parallels parallels image format support' printf "%s\n" ' qcow1 qcow1 image format support' printf "%s\n" ' qed qed image format support' + printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)' printf "%s\n" ' rbd Ceph block device driver' printf "%s\n" ' replication replication support' printf "%s\n" ' sdl SDL user interface' diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index 68a55b3bd4..da79b41c4d 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -298,8 +298,8 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, } else { PeV = 0x00; /* Basic checks passed */ - n_exp = float32_getexp(RsV); - d_exp = float32_getexp(RtV); + n_exp = float32_getexp_raw(RsV); + d_exp = float32_getexp_raw(RtV); if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) { /* Near quotient underflow / inexact Q */ PeV = 0x80; diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h index e3b99a8cf4..91591d6050 100644 --- a/target/hexagon/fma_emu.h +++ b/target/hexagon/fma_emu.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,6 +24,10 @@ static inline bool is_finite(float64 x) } int32_t float64_getexp(float64 f64); +static inline uint32_t float32_getexp_raw(float32 f32) +{ + return extract32(f32, 23, 8); +} int32_t float32_getexp(float32 f32); float32 infinite_float32(uint8_t sign); float32 internal_fmafx(float32 a, float32 b, float32 c, diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 4419d30e23..cd6af4bceb 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -210,11 +210,15 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num, } } -static inline void gen_write_p3_0(TCGv control_reg) +static void gen_write_p3_0(DisasContext *ctx, TCGv control_reg) { + TCGv hex_p8 = tcg_temp_new(); for (int i = 0; i < NUM_PREGS; i++) { - tcg_gen_extract_tl(hex_pred[i], control_reg, i * 8, 8); + tcg_gen_extract_tl(hex_p8, control_reg, i * 8, 8); + gen_log_pred_write(ctx, i, hex_p8); + ctx_log_pred_write(ctx, i); } + tcg_temp_free(hex_p8); } /* @@ -228,7 +232,7 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num, TCGv val) { if (reg_num == HEX_REG_P3_0) { - gen_write_p3_0(val); + gen_write_p3_0(ctx, val); } else { gen_log_reg_write(reg_num, val); ctx_log_reg_write(ctx, reg_num); @@ -250,7 +254,7 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, if (reg_num == HEX_REG_P3_0) { TCGv val32 = tcg_temp_new(); tcg_gen_extrl_i64_i32(val32, val); - gen_write_p3_0(val32); + gen_write_p3_0(ctx, val32); tcg_gen_extrh_i64_i32(val32, val); gen_log_reg_write(reg_num + 1, val32); tcg_temp_free(val32); diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 19d103cad5..a78e84faa4 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -268,7 +268,7 @@ static inline void gen_pred_cancel(TCGv pred, int slot_num) #define fVSATUVALN(N, VAL) \ ({ \ - (((int)(VAL)) < 0) ? 0 : ((1LL << (N)) - 1); \ + (((int64_t)(VAL)) < 0) ? 0 : ((1LL << (N)) - 1); \ }) #define fSATUVALN(N, VAL) \ ({ \ diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h index 10f4630364..8345753580 100644 --- a/target/hexagon/mmvec/macros.h +++ b/target/hexagon/mmvec/macros.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -164,11 +164,9 @@ target_ulong va = EA; \ target_ulong va_high = EA + LEN; \ uintptr_t ra = GETPC(); \ - int log_bank = 0; \ int log_byte = 0; \ for (i0 = 0; i0 < ELEMENT_SIZE; i0++) { \ log_byte = ((va + i0) <= va_high) && QVAL; \ - log_bank |= (log_byte << i0); \ uint8_t B; \ B = cpu_ldub_data_ra(env, EA + i0, ra); \ env->tmp_VRegs[0].ub[ELEMENT_SIZE * IDX + i0] = B; \ @@ -243,11 +241,9 @@ int i0; \ target_ulong va = EA; \ target_ulong va_high = EA + LEN; \ - int log_bank = 0; \ int log_byte = 0; \ for (i0 = 0; i0 < ELEM_SIZE; i0++) { \ log_byte = ((va + i0) <= va_high) && QVAL; \ - log_bank |= (log_byte << i0); \ LOG_VTCM_BYTE(va + i0, log_byte, IN.ub[ELEM_SIZE * IDX + i0], \ ELEM_SIZE * IDX + i0); \ } \ diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 057baf9a48..63e5ad5d68 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -304,8 +304,8 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS) { - int32_t K_const = sextract32(M, 24, 4); - int32_t length = sextract32(M, 0, 17); + uint32_t K_const = extract32(M, 24, 4); + uint32_t length = extract32(M, 0, 17); uint32_t new_ptr = RxV + offset; uint32_t start_addr; uint32_t end_addr; @@ -829,7 +829,7 @@ uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV) uint32_t RdV; arch_fpop_start(env); /* Hexagon checks the sign before rounding */ - if (float64_is_neg(RssV) && !float32_is_any_nan(RssV)) { + if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) { float_raise(float_flag_invalid, &env->fp_status); RdV = 0; } else { @@ -938,8 +938,7 @@ int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV) { int32_t PdV; arch_fpop_start(env); - PdV = f8BITSOF(float32_is_any_nan(RsV) || - float32_is_any_nan(RtV)); + PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status)); arch_fpop_end(env); return PdV; } @@ -948,7 +947,7 @@ float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV) { float32 RdV; arch_fpop_start(env); - RdV = float32_maxnum(RsV, RtV, &env->fp_status); + RdV = float32_maximum_number(RsV, RtV, &env->fp_status); arch_fpop_end(env); return RdV; } @@ -957,7 +956,7 @@ float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV) { float32 RdV; arch_fpop_start(env); - RdV = float32_minnum(RsV, RtV, &env->fp_status); + RdV = float32_minimum_number(RsV, RtV, &env->fp_status); arch_fpop_end(env); return RdV; } @@ -1041,10 +1040,7 @@ float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV) { float64 RddV; arch_fpop_start(env); - RddV = float64_maxnum(RssV, RttV, &env->fp_status); - if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) { - float_raise(float_flag_invalid, &env->fp_status); - } + RddV = float64_maximum_number(RssV, RttV, &env->fp_status); arch_fpop_end(env); return RddV; } @@ -1053,10 +1049,7 @@ float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV) { float64 RddV; arch_fpop_start(env); - RddV = float64_minnum(RssV, RttV, &env->fp_status); - if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) { - float_raise(float_flag_invalid, &env->fp_status); - } + RddV = float64_minimum_number(RssV, RttV, &env->fp_status); arch_fpop_end(env); return RddV; } @@ -1097,8 +1090,7 @@ int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV) { int32_t PdV; arch_fpop_start(env); - PdV = f8BITSOF(float64_is_any_nan(RssV) || - float64_is_any_nan(RttV)); + PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status)); arch_fpop_end(env); return PdV; } diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 6c7ef1099b..a88d6554c8 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -575,6 +575,18 @@ static CPUCacheInfo legacy_l3_cache = { #define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */ #define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */ +/* CPUID Leaf 0x1D constants: */ +#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1 +#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000 +#define INTEL_AMX_BYTES_PER_TILE 0x400 +#define INTEL_AMX_BYTES_PER_ROW 0x40 +#define INTEL_AMX_TILE_MAX_NAMES 0x8 +#define INTEL_AMX_TILE_MAX_ROWS 0x10 + +/* CPUID Leaf 0x1E constants: */ +#define INTEL_AMX_TMUL_MAX_K 0x10 +#define INTEL_AMX_TMUL_MAX_N 0x40 + void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, uint32_t vendor2, uint32_t vendor3) { @@ -844,8 +856,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx512-vp2intersect", NULL, "md-clear", NULL, NULL, NULL, "serialize", NULL, "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, - NULL, NULL, NULL, "avx512-fp16", - NULL, NULL, "spec-ctrl", "stibp", + NULL, NULL, "amx-bf16", "avx512-fp16", + "amx-tile", "amx-int8", "spec-ctrl", "stibp", NULL, "arch-capabilities", "core-capability", "ssbd", }, .cpuid = { @@ -910,7 +922,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .type = CPUID_FEATURE_WORD, .feat_names = { "xsaveopt", "xsavec", "xgetbv1", "xsaves", - NULL, NULL, NULL, NULL, + "xfd", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1402,6 +1414,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_PKRU_BIT] = { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU, .size = sizeof(XSavePKRU) }, + [XSTATE_XTILE_CFG_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, + .size = sizeof(XSaveXTILECFG), + }, + [XSTATE_XTILE_DATA_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, + .size = sizeof(XSaveXTILEDATA) + }, }; static uint32_t xsave_area_size(uint64_t mask) @@ -3506,6 +3526,14 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 6, + .note = "5-level EPT", + .props = (PropValue[]) { + { "vmx-page-walk-5", "on" }, + { /* end of list */ } + }, + }, { /* end of list */ } } }, @@ -5488,6 +5516,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, const ExtSaveArea *esa = &x86_ext_save_areas[count]; *eax = esa->size; *ebx = esa->offset; + *ecx = esa->ecx & + (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK); } } break; @@ -5576,6 +5606,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1D: { + /* AMX TILE */ + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { + break; + } + + if (count == 0) { + /* Highest numbered palette subleaf */ + *eax = INTEL_AMX_TILE_MAX_SUBLEAF; + } else if (count == 1) { + *eax = INTEL_AMX_TOTAL_TILE_BYTES | + (INTEL_AMX_BYTES_PER_TILE << 16); + *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16); + *ecx = INTEL_AMX_TILE_MAX_ROWS; + } + break; + } + case 0x1E: { + /* AMX TMUL */ + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { + break; + } + + if (count == 0) { + /* Highest numbered palette subleaf */ + *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8); + } + break; + } case 0x40000000: /* * CPUID code in kvm_arch_init_vcpu() ignores stuff @@ -5930,9 +5997,7 @@ static void x86_cpu_reset(DeviceState *dev) x86_cpu_set_sgxlepubkeyhash(env); - if (env->features[FEAT_SVM] & CPUID_SVM_TSCSCALE) { - env->amd_tsc_scale_msr = MSR_AMD64_TSC_RATIO_DEFAULT; - } + env->amd_tsc_scale_msr = MSR_AMD64_TSC_RATIO_DEFAULT; #endif } @@ -5998,6 +6063,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) CPUX86State *env = &cpu->env; int i; uint64_t mask; + static bool request_perm; if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { env->features[FEAT_XSAVE_COMP_LO] = 0; @@ -6013,6 +6079,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) } } + /* Only request permission for first vcpu */ + if (kvm_enabled() && !request_perm) { + kvm_request_xsave_components(cpu, mask); + request_perm = true; + } + env->features[FEAT_XSAVE_COMP_LO] = mask; env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; } diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e11734ba86..5e406088a9 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -507,6 +507,9 @@ typedef enum X86Seg { #define MSR_VM_HSAVE_PA 0xc0010117 +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 + #define MSR_IA32_BNDCFGS 0x00000d90 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_UMWAIT_CONTROL 0xe1 @@ -539,6 +542,8 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 #define XSTATE_PKRU_BIT 9 +#define XSTATE_XTILE_CFG_BIT 17 +#define XSTATE_XTILE_DATA_BIT 18 #define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT) #define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT) @@ -549,6 +554,17 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) +#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) +#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) + +#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + +#define ESA_FEATURE_ALIGN64_BIT 1 +#define ESA_FEATURE_XFD_BIT 2 + +#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) +#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT) + /* CPUID feature words */ typedef enum FeatureWord { @@ -842,6 +858,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) /* AVX512_FP16 instruction */ #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) +/* AMX tile (two-dimensional register) */ +#define CPUID_7_0_EDX_AMX_TILE (1U << 24) /* Speculation Control */ #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Single Thread Indirect Branch Predictors */ @@ -857,6 +875,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* XFD Extend Feature Disabled */ +#define CPUID_D_1_EAX_XFD (1U << 4) /* Packets which contain IP payload have LIP values */ #define CPUID_14_0_ECX_LIP (1U << 31) @@ -1345,6 +1365,16 @@ typedef struct XSavePKRU { uint32_t padding; } XSavePKRU; +/* Ext. save area 17: AMX XTILECFG state */ +typedef struct XSaveXTILECFG { + uint8_t xtilecfg[64]; +} XSaveXTILECFG; + +/* Ext. save area 18: AMX XTILEDATA state */ +typedef struct XSaveXTILEDATA { + uint8_t xtiledata[8][1024]; +} XSaveXTILEDATA; + QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40); @@ -1352,13 +1382,16 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40); QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200); QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400); QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40); +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000); typedef struct ExtSaveArea { uint32_t feature, bits; uint32_t offset, size; + uint32_t ecx; } ExtSaveArea; -#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1) +#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1) extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT]; @@ -1499,6 +1532,10 @@ typedef struct CPUArchState { uint64_t opmask_regs[NB_OPMASK_REGS]; YMMReg zmmh_regs[CPU_NB_REGS]; ZMMReg hi16_zmm_regs[CPU_NB_REGS]; +#ifdef TARGET_X86_64 + uint8_t xtilecfg[64]; + uint8_t xtiledata[8192]; +#endif /* sysenter registers */ uint32_t sysenter_cs; @@ -1584,6 +1621,10 @@ typedef struct CPUArchState { uint64_t msr_rtit_cr3_match; uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS]; + /* Per-VCPU XFD MSRs */ + uint64_t msr_xfd; + uint64_t msr_xfd_err; + /* exception/interrupt handling */ int error_code; int exception_is_int; diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index d95028018e..a35a1bf9fe 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) static void kvm_cpu_xsave_init(void) { static bool first = true; - KVMState *s = kvm_state; + uint32_t eax, ebx, ecx, edx; int i; if (!first) { @@ -100,10 +100,11 @@ static void kvm_cpu_xsave_init(void) ExtSaveArea *esa = &x86_ext_save_areas[i]; if (esa->size) { - int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX); - if (sz != 0) { - assert(esa->size == sz); - esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX); + host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); + if (eax != 0) { + assert(esa->size == eax); + esa->offset = ebx; + esa->ecx = ecx; } } } diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 83d0988302..ef2c68a6f4 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -17,6 +17,7 @@ #include "qapi/error.h" #include <sys/ioctl.h> #include <sys/utsname.h> +#include <sys/syscall.h> #include <linux/kvm.h> #include "standard-headers/asm-x86/kvm_para.h" @@ -123,6 +124,7 @@ static uint32_t num_architectural_pmu_gp_counters; static uint32_t num_architectural_pmu_fixed_counters; static int has_xsave; +static int has_xsave2; static int has_xcrs; static int has_pit_state2; static int has_sregs2; @@ -349,6 +351,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, struct kvm_cpuid2 *cpuid; uint32_t ret = 0; uint32_t cpuid_1_edx; + uint64_t bitmask; cpuid = get_supported_cpuid(s); @@ -406,6 +409,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, if (!has_msr_arch_capabs) { ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; } + } else if (function == 0xd && index == 0 && + (reg == R_EAX || reg == R_EDX)) { + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); + if (!sys_attr) { + warn_report("cannot get sys attribute capabilities %d", sys_attr); + } + + int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) { + warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " + "error: %d", rc); + } + ret = (reg == R_EAX) ? bitmask : bitmask >> 32; } else if (function == 0x80000001 && reg == R_ECX) { /* * It's safe to enable TOPOEXT even if it's not returned by @@ -1566,6 +1588,26 @@ static Error *invtsc_mig_blocker; #define KVM_MAX_CPUID_ENTRIES 100 +static void kvm_init_xsave(CPUX86State *env) +{ + if (has_xsave2) { + env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096); + } else if (has_xsave) { + env->xsave_buf_len = sizeof(struct kvm_xsave); + } else { + return; + } + + env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); + memset(env->xsave_buf, 0, env->xsave_buf_len); + /* + * The allocated storage must be large enough for all of the + * possible XSAVE state components. + */ + assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <= + env->xsave_buf_len); +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -1595,6 +1637,8 @@ int kvm_arch_init_vcpu(CPUState *cs) cpuid_i = 0; + has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); + r = kvm_arch_set_tsc_khz(cs); if (r < 0) { return r; @@ -1760,7 +1804,9 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; } break; - case 0x14: { + case 0x14: + case 0x1d: + case 0x1e: { uint32_t times; c->function = i; @@ -1982,19 +2028,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (r) { goto fail; } - - if (has_xsave) { - env->xsave_buf_len = sizeof(struct kvm_xsave); - env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); - memset(env->xsave_buf, 0, env->xsave_buf_len); - - /* - * The allocated storage must be large enough for all of the - * possible XSAVE state components. - */ - assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) - <= env->xsave_buf_len); - } + kvm_init_xsave(env); max_nested_state_len = kvm_max_nested_state_length(); if (max_nested_state_len > 0) { @@ -3243,6 +3277,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level) env->msr_ia32_sgxlepubkeyhash[3]); } + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, + env->msr_xfd); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, + env->msr_xfd_err); + } + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see * kvm_put_msr_feature_control. */ } @@ -3298,13 +3339,14 @@ static int kvm_get_xsave(X86CPU *cpu) { CPUX86State *env = &cpu->env; void *xsave = env->xsave_buf; - int ret; + int type, ret; if (!has_xsave) { return kvm_get_fpu(cpu); } - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); + type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE; + ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave); if (ret < 0) { return ret; } @@ -3634,6 +3676,11 @@ static int kvm_get_msrs(X86CPU *cpu) kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0); } + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0); + } + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); if (ret < 0) { return ret; @@ -3930,6 +3977,12 @@ static int kvm_get_msrs(X86CPU *cpu) env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] = msrs[i].data; break; + case MSR_IA32_XFD: + env->msr_xfd = msrs[i].data; + break; + case MSR_IA32_XFD_ERR: + env->msr_xfd_err = msrs[i].data; + break; } } @@ -4940,16 +4993,18 @@ void kvm_arch_init_irq_routing(KVMState *s) kvm_gsi_routing_allowed = true; if (kvm_irqchip_is_split()) { + KVMRouteChange c = kvm_irqchip_begin_route_changes(s); int i; /* If the ioapic is in QEMU and the lapics are in KVM, reserve MSI routes for signaling interrupts to the local apics. */ for (i = 0; i < IOAPIC_NUM_PINS; i++) { - if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) { + if (kvm_irqchip_add_msi_route(&c, 0, NULL) < 0) { error_report("Could not enable split IRQ mode."); exit(1); } } + kvm_irqchip_commit_route_changes(&c); } } @@ -5149,3 +5204,39 @@ bool kvm_arch_cpu_check_are_resettable(void) { return !sev_es_enabled(); } + +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) +{ + KVMState *s = kvm_state; + uint64_t supported; + + mask &= XSTATE_DYNAMIC_MASK; + if (!mask) { + return; + } + /* + * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0]. + * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned + * about them already because they are not supported features. + */ + supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX); + supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32; + mask &= supported; + + while (mask) { + int bit = ctz64(mask); + int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); + if (rc) { + /* + * Older kernel version (<5.17) do not support + * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return + * any dynamic feature from kvm_arch_get_supported_cpuid. + */ + warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure " + "for feature bit %d", bit); + } + mask &= ~BIT_ULL(bit); + } +} diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index a978509d50..4124912c20 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); bool kvm_enable_sgx_provisioning(KVMState *s); +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #endif diff --git a/target/i386/machine.c b/target/i386/machine.c index 6202f47793..7c54bada81 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1483,6 +1483,48 @@ static const VMStateDescription vmstate_pdptrs = { } }; +static bool xfd_msrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD); +} + +static const VMStateDescription vmstate_msr_xfd = { + .name = "cpu/msr_xfd", + .version_id = 1, + .minimum_version_id = 1, + .needed = xfd_msrs_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.msr_xfd, X86CPU), + VMSTATE_UINT64(env.msr_xfd_err, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + +#ifdef TARGET_X86_64 +static bool amx_xtile_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE); +} + +static const VMStateDescription vmstate_amx_xtile = { + .name = "cpu/intel_amx_xtile", + .version_id = 1, + .minimum_version_id = 1, + .needed = amx_xtile_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64), + VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192), + VMSTATE_END_OF_LIST() + } +}; +#endif + const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -1622,6 +1664,10 @@ const VMStateDescription vmstate_x86_cpu = { &vmstate_msr_tsx_ctrl, &vmstate_msr_intel_sgx, &vmstate_pdptrs, + &vmstate_msr_xfd, +#ifdef TARGET_X86_64 + &vmstate_amx_xtile, +#endif NULL } }; diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index baa905a0cd..bffd82923f 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -28,6 +28,42 @@ #include "helper-tcg.h" #include "seg_helper.h" +int get_pg_mode(CPUX86State *env) +{ + int pg_mode = 0; + if (!(env->cr[0] & CR0_PG_MASK)) { + return 0; + } + if (env->cr[0] & CR0_WP_MASK) { + pg_mode |= PG_MODE_WP; + } + if (env->cr[4] & CR4_PAE_MASK) { + pg_mode |= PG_MODE_PAE; + if (env->efer & MSR_EFER_NXE) { + pg_mode |= PG_MODE_NXE; + } + } + if (env->cr[4] & CR4_PSE_MASK) { + pg_mode |= PG_MODE_PSE; + } + if (env->cr[4] & CR4_SMEP_MASK) { + pg_mode |= PG_MODE_SMEP; + } + if (env->hflags & HF_LMA_MASK) { + pg_mode |= PG_MODE_LMA; + if (env->cr[4] & CR4_PKE_MASK) { + pg_mode |= PG_MODE_PKE; + } + if (env->cr[4] & CR4_PKS_MASK) { + pg_mode |= PG_MODE_PKS; + } + if (env->cr[4] & CR4_LA57_MASK) { + pg_mode |= PG_MODE_LA57; + } + } + return pg_mode; +} + /* return non zero if error */ static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr, uint32_t *e2_ptr, int selector, @@ -794,7 +830,9 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level) { X86CPU *cpu = env_archcpu(env); - int index; + int index, pg_mode; + target_ulong rsp; + int32_t sext; #if 0 printf("TR: base=" TARGET_FMT_lx " limit=%x\n", @@ -808,7 +846,17 @@ static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level) if ((index + 7) > env->tr.limit) { raise_exception_err(env, EXCP0A_TSS, env->tr.selector & 0xfffc); } - return cpu_ldq_kernel(env, env->tr.base + index); + + rsp = cpu_ldq_kernel(env, env->tr.base + index); + + /* test virtual address sign extension */ + pg_mode = get_pg_mode(env); + sext = (int64_t)rsp >> (pg_mode & PG_MODE_LA57 ? 56 : 47); + if (sext != 0 && sext != -1) { + raise_exception_err(env, EXCP0C_STACK, 0); + } + + return rsp; } /* 64 bit interrupt */ diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index 5627772e7c..e1b6d88683 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -22,39 +22,6 @@ #include "exec/exec-all.h" #include "tcg/helper-tcg.h" -int get_pg_mode(CPUX86State *env) -{ - int pg_mode = 0; - if (env->cr[0] & CR0_WP_MASK) { - pg_mode |= PG_MODE_WP; - } - if (env->cr[4] & CR4_PAE_MASK) { - pg_mode |= PG_MODE_PAE; - } - if (env->cr[4] & CR4_PSE_MASK) { - pg_mode |= PG_MODE_PSE; - } - if (env->cr[4] & CR4_PKE_MASK) { - pg_mode |= PG_MODE_PKE; - } - if (env->cr[4] & CR4_PKS_MASK) { - pg_mode |= PG_MODE_PKS; - } - if (env->cr[4] & CR4_SMEP_MASK) { - pg_mode |= PG_MODE_SMEP; - } - if (env->cr[4] & CR4_LA57_MASK) { - pg_mode |= PG_MODE_LA57; - } - if (env->hflags & HF_LMA_MASK) { - pg_mode |= PG_MODE_LMA; - } - if (env->efer & MSR_EFER_NXE) { - pg_mode |= PG_MODE_NXE; - } - return pg_mode; -} - #define PG_ERROR_OK (-1) typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type, @@ -279,9 +246,7 @@ do_check_protect_pse36: *prot |= PAGE_EXEC; } - if (!(pg_mode & PG_MODE_LMA)) { - pkr = 0; - } else if (ptep & PG_USER_MASK) { + if (ptep & PG_USER_MASK) { pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0; } else { pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0; @@ -344,8 +309,7 @@ do_check_protect_pse36: if (is_user) error_code |= PG_ERROR_U_MASK; if (is_write1 == 2 && - (((pg_mode & PG_MODE_NXE) && (pg_mode & PG_MODE_PAE)) || - (pg_mode & PG_MODE_SMEP))) + ((pg_mode & PG_MODE_NXE) || (pg_mode & PG_MODE_SMEP))) error_code |= PG_ERROR_I_D_MASK; return error_code; } diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index c7e25abf42..ecddf0cb91 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -256,6 +256,21 @@ static int whpx_set_tsc(CPUState *cpu) return 0; } +/* + * The CR8 register in the CPU is mapped to the TPR register of the APIC, + * however, they use a slightly different encoding. Specifically: + * + * APIC.TPR[bits 7:4] = CR8[bits 3:0] + * + * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64 + * and IA-32 Architectures Software Developer's Manual. + */ + +static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr) +{ + return tpr >> 4; +} + static void whpx_set_registers(CPUState *cpu, int level) { struct whpx_state *whpx = &whpx_global; @@ -284,7 +299,7 @@ static void whpx_set_registers(CPUState *cpu, int level) v86 = (env->eflags & VM_MASK); r86 = !(env->cr[0] & CR0_PE_MASK); - vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state); + vcpu->tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state)); vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state); idx = 0; @@ -475,6 +490,17 @@ static void whpx_get_registers(CPUState *cpu) hr); } + if (whpx_apic_in_platform()) { + /* + * Fetch the TPR value from the emulated APIC. It may get overwritten + * below with the value from CR8 returned by + * WHvGetVirtualProcessorRegisters(). + */ + whpx_apic_get(x86_cpu->apic_state); + vcpu->tpr = whpx_apic_tpr_to_cr8( + cpu_get_apic_tpr(x86_cpu->apic_state)); + } + idx = 0; /* Indexes for first 16 registers match between HV and QEMU definitions */ @@ -604,6 +630,8 @@ static void whpx_get_registers(CPUState *cpu) whpx_apic_get(x86_cpu->apic_state); } + x86_update_hflags(env); + return; } diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c index ac61a96344..996e9f3bfe 100644 --- a/target/i386/xsave_helper.c +++ b/target/i386/xsave_helper.c @@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) memcpy(pkru, &env->pkru, sizeof(env->pkru)); } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + XSaveXTILECFG *tilecfg = buf + e->offset; + + memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + if (e->size && e->offset && buflen >= e->size + e->offset) { + XSaveXTILEDATA *tiledata = buf + e->offset; + + memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata)); + } #endif } @@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen) pkru = buf + e->offset; memcpy(&env->pkru, pkru, sizeof(env->pkru)); } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + const XSaveXTILECFG *tilecfg = buf + e->offset; + + memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + if (e->size && e->offset && buflen >= e->size + e->offset) { + const XSaveXTILEDATA *tiledata = buf + e->offset; + + memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata)); + } #endif } diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 67c38f065b..5414fd63c1 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -204,7 +204,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, /* Check if requested access type is allowed */ need_prot = prot_for_access_type(access_type); if (need_prot & ~*prot) { /* Page Protected for that Access */ - *fault_cause |= DSISR_PROTFAULT; + *fault_cause |= access_type == MMU_INST_FETCH ? SRR1_NOEXEC_GUARD : + DSISR_PROTFAULT; return true; } diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 48a97b2d7e..e67fbf2bb8 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -1552,7 +1552,7 @@ static bool trans_XXSPLTW(DisasContext *ctx, arg_XX2_uim2 *a) tofs = vsr_full_offset(a->xt); bofs = vsr_full_offset(a->xb); bofs += a->uim << MO_32; -#ifndef HOST_WORDS_BIG_ENDIAN +#ifndef HOST_WORDS_BIGENDIAN bofs ^= 8 | 4; #endif diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index e1ea69669c..4bc0420f4d 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1689,8 +1689,21 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, /* LDRD requires alignment; double-check that. */ if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); - } else if (scratch_addend) { + /* + * Rm (the second address op) must not overlap Rt or Rt + 1. + * Since datalo is aligned, we can simplify the test via alignment. + * Flip the two address arguments if that works. + */ + if ((addend & ~1) != datalo) { + tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); + break; + } + if ((addrlo & ~1) != datalo) { + tcg_out_ldrd_r(s, COND_AL, datalo, addend, addrlo); + break; + } + } + if (scratch_addend) { tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo); tcg_out_ld32_12(s, COND_AL, datahi, addend, 4); } else { diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 6e65828c09..33becd7694 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2675,6 +2675,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, if (vece == MO_64) { return true; } + src = dst; } /* @@ -2715,7 +2716,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, msb = clz32(val); lsb = 31 - ctz32(val); } - tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32); + tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32); return; } } else { @@ -2729,7 +2730,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, msb = clz64(val); lsb = 63 - ctz64(val); } - tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64); + tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64); return; } } @@ -2868,7 +2869,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_bitsel_vec: - tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]); + tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1); break; case INDEX_op_cmp_vec: diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py index 9c618d4809..b40a3abc81 100644 --- a/tests/avocado/boot_linux_console.py +++ b/tests/avocado/boot_linux_console.py @@ -1165,11 +1165,14 @@ class BootLinuxConsole(LinuxKernelTest): :avocado: tags=arch:ppc64 :avocado: tags=machine:ppce500 :avocado: tags=cpu:e5500 + :avocado: tags=accel:tcg """ + self.require_accelerator("tcg") tar_hash = '6951d86d644b302898da2fd701739c9406527fe1' self.do_test_advcal_2018('19', tar_hash, 'uImage') def do_test_ppc64_powernv(self, proc): + self.require_accelerator("tcg") images_url = ('https://github.com/open-power/op-build/releases/download/v2.7/') kernel_url = images_url + 'zImage.epapr' @@ -1194,6 +1197,7 @@ class BootLinuxConsole(LinuxKernelTest): """ :avocado: tags=arch:ppc64 :avocado: tags=machine:powernv8 + :avocado: tags=accel:tcg """ self.do_test_ppc64_powernv('P8') @@ -1201,6 +1205,7 @@ class BootLinuxConsole(LinuxKernelTest): """ :avocado: tags=arch:ppc64 :avocado: tags=machine:powernv9 + :avocado: tags=accel:tcg """ self.do_test_ppc64_powernv('P9') @@ -1208,7 +1213,13 @@ class BootLinuxConsole(LinuxKernelTest): """ :avocado: tags=arch:ppc :avocado: tags=machine:g3beige + :avocado: tags=accel:tcg """ + # TODO: g3beige works with kvm_pr but we don't have a + # reliable way ATM (e.g. looking at /proc/modules) to detect + # whether we're running kvm_hv or kvm_pr. For now let's + # disable this test if we don't have TCG support. + self.require_accelerator("tcg") tar_hash = 'e0b872a5eb8fdc5bed19bd43ffe863900ebcedfc' self.vm.add_args('-M', 'graphics=off') self.do_test_advcal_2018('15', tar_hash, 'invaders.elf') @@ -1217,7 +1228,13 @@ class BootLinuxConsole(LinuxKernelTest): """ :avocado: tags=arch:ppc :avocado: tags=machine:mac99 + :avocado: tags=accel:tcg """ + # TODO: mac99 works with kvm_pr but we don't have a + # reliable way ATM (e.g. looking at /proc/modules) to detect + # whether we're running kvm_hv or kvm_pr. For now let's + # disable this test if we don't have TCG support. + self.require_accelerator("tcg") tar_hash = 'e0b872a5eb8fdc5bed19bd43ffe863900ebcedfc' self.vm.add_args('-M', 'graphics=off') self.do_test_advcal_2018('15', tar_hash, 'invaders.elf') diff --git a/tests/avocado/ppc_405.py b/tests/avocado/ppc_405.py index a47f89b934..4e7e01aa76 100644 --- a/tests/avocado/ppc_405.py +++ b/tests/avocado/ppc_405.py @@ -25,18 +25,12 @@ class Ppc405Machine(QemuSystemTest): wait_for_console_pattern(self, 'AMCC PPC405EP Evaluation Board') exec_command_and_wait_for_pattern(self, 'reset', 'AMCC PowerPC 405EP') - def test_ppc_taihu(self): - """ - :avocado: tags=arch:ppc - :avocado: tags=machine:taihu - :avocado: tags=cpu:405ep - """ - self.do_test_ppc405() - def test_ppc_ref405ep(self): """ :avocado: tags=arch:ppc :avocado: tags=machine:ref405ep :avocado: tags=cpu:405ep + :avocado: tags=accel:tcg """ + self.require_accelerator("tcg") self.do_test_ppc405() diff --git a/tests/avocado/ppc_74xx.py b/tests/avocado/ppc_74xx.py index 556a9a7da9..f54757c243 100644 --- a/tests/avocado/ppc_74xx.py +++ b/tests/avocado/ppc_74xx.py @@ -11,6 +11,7 @@ from avocado_qemu import wait_for_console_pattern class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=arch:ppc + :avocado: tags=accel:tcg """ timeout = 5 @@ -18,6 +19,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7400 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -27,6 +29,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7410 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -36,6 +39,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7441 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -45,6 +49,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7445 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -54,6 +59,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7447 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -63,6 +69,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7447a """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -72,6 +79,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7448 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -81,6 +89,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7450 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -90,6 +99,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7451 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -99,6 +109,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7455 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -108,6 +119,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7457 """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') @@ -117,6 +129,7 @@ class ppc74xxCpu(QemuSystemTest): """ :avocado: tags=cpu:7457a """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.launch() wait_for_console_pattern(self, '>> OpenBIOS') diff --git a/tests/avocado/ppc_bamboo.py b/tests/avocado/ppc_bamboo.py index 40629e3478..102ff252df 100644 --- a/tests/avocado/ppc_bamboo.py +++ b/tests/avocado/ppc_bamboo.py @@ -20,7 +20,9 @@ class BambooMachine(QemuSystemTest): :avocado: tags=machine:bamboo :avocado: tags=cpu:440epb :avocado: tags=device:rtl8139 + :avocado: tags=accel:tcg """ + self.require_accelerator("tcg") tar_url = ('http://landley.net/aboriginal/downloads/binaries/' 'system-image-powerpc-440fp.tar.gz') tar_hash = '53e5f16414b195b82d2c70272f81c2eedb39bad9' diff --git a/tests/avocado/ppc_mpc8544ds.py b/tests/avocado/ppc_mpc8544ds.py index 886f967b15..8d6a749201 100644 --- a/tests/avocado/ppc_mpc8544ds.py +++ b/tests/avocado/ppc_mpc8544ds.py @@ -19,7 +19,9 @@ class Mpc8544dsMachine(QemuSystemTest): """ :avocado: tags=arch:ppc :avocado: tags=machine:mpc8544ds + :avocado: tags=accel:tcg """ + self.require_accelerator("tcg") tar_url = ('https://www.qemu-advent-calendar.org' '/2020/download/day17.tar.gz') tar_hash = '7a5239542a7c4257aa4d3b7f6ddf08fb6775c494' diff --git a/tests/avocado/ppc_prep_40p.py b/tests/avocado/ppc_prep_40p.py index 4bd956584d..d4f1eb7e1d 100644 --- a/tests/avocado/ppc_prep_40p.py +++ b/tests/avocado/ppc_prep_40p.py @@ -28,7 +28,9 @@ class IbmPrep40pMachine(QemuSystemTest): :avocado: tags=machine:40p :avocado: tags=os:netbsd :avocado: tags=slowness:high + :avocado: tags=accel:tcg """ + self.require_accelerator("tcg") bios_url = ('http://ftpmirror.your.org/pub/misc/' 'ftp.software.ibm.com/rs6000/firmware/' '7020-40p/P12H0456.IMG') @@ -51,7 +53,9 @@ class IbmPrep40pMachine(QemuSystemTest): """ :avocado: tags=arch:ppc :avocado: tags=machine:40p + :avocado: tags=accel:tcg """ + self.require_accelerator("tcg") self.vm.set_console() self.vm.add_args('-m', '192') # test fw_cfg @@ -65,7 +69,9 @@ class IbmPrep40pMachine(QemuSystemTest): :avocado: tags=arch:ppc :avocado: tags=machine:40p :avocado: tags=os:netbsd + :avocado: tags=accel:tcg """ + self.require_accelerator("tcg") drive_url = ('https://archive.netbsd.org/pub/NetBSD-archive/' 'NetBSD-7.1.2/iso/NetBSD-7.1.2-prep.iso') drive_hash = 'ac6fa2707d888b36d6fa64de6e7fe48e' diff --git a/tests/avocado/ppc_virtex_ml507.py b/tests/avocado/ppc_virtex_ml507.py index a6912ee579..6b07686b56 100644 --- a/tests/avocado/ppc_virtex_ml507.py +++ b/tests/avocado/ppc_virtex_ml507.py @@ -19,7 +19,9 @@ class VirtexMl507Machine(QemuSystemTest): """ :avocado: tags=arch:ppc :avocado: tags=machine:virtex-ml507 + :avocado: tags=accel:tcg """ + self.require_accelerator("tcg") tar_url = ('https://www.qemu-advent-calendar.org' '/2020/download/hippo.tar.gz') tar_hash = '306b95bfe7d147f125aa176a877e266db8ef914a' diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py index c68a953730..0b2b0dc692 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py @@ -36,6 +36,9 @@ class ReplayKernelBase(LinuxKernelTest): def run_vm(self, kernel_path, kernel_command_line, console_pattern, record, shift, args, replay_path): + # icount requires TCG to be available + self.require_accelerator('tcg') + logger = logging.getLogger('replay') start_time = time.time() vm = self.get_vm() @@ -243,6 +246,7 @@ class ReplayKernelNormal(ReplayKernelBase): """ :avocado: tags=arch:ppc64 :avocado: tags=machine:pseries + :avocado: tags=accel:tcg """ kernel_url = ('https://archives.fedoraproject.org/pub/archive' '/fedora-secondary/releases/29/Everything/ppc64le/os' diff --git a/tests/data/acpi/virt/SSDT.memhp b/tests/data/acpi/virt/SSDT.memhp index 375d7b6fc8..4c363a6d95 100644 --- a/tests/data/acpi/virt/SSDT.memhp +++ b/tests/data/acpi/virt/SSDT.memhp Binary files differdiff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 7d8c74fdba..d25f82bb5a 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -160,7 +160,9 @@ qtests_ppc = \ (slirp.found() ? ['test-netfilter'] : []) + \ (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) + \ (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + \ - ['boot-order-test', 'prom-env-test', 'boot-serial-test'] \ + (config_all_devices.has_key('CONFIG_TCG') ? ['prom-env-test'] : []) + \ + (config_all_devices.has_key('CONFIG_TCG') ? ['boot-serial-test'] : []) + \ + ['boot-order-test'] qtests_ppc64 = \ qtests_ppc + \ diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 8b07a28166..23b9870534 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -1,5 +1,5 @@ ## -## Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ first: $(HEX_SRC)/first.S HEX_TESTS = first HEX_TESTS += hex_sigsegv HEX_TESTS += misc +HEX_TESTS += usr HEX_TESTS += preg_alias HEX_TESTS += dual_stores HEX_TESTS += multi_result @@ -43,3 +44,8 @@ HEX_TESTS += fpstuff HEX_TESTS += overflow TESTS += $(HEX_TESTS) + +# This test has to be compiled for the -mv67t target +usr: usr.c + $(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS) + diff --git a/tests/tcg/hexagon/circ.c b/tests/tcg/hexagon/circ.c index 67a1aa3054..354416eb6d 100644 --- a/tests/tcg/hexagon/circ.c +++ b/tests/tcg/hexagon/circ.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -415,7 +415,8 @@ static void circ_test_v3(void) { int *p = wbuf; int size = 15; - int K = 4; /* 64 bytes */ + /* set high bit in K to test unsigned extract in fcirc */ + int K = 8; /* 1024 bytes */ int element; int i; diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c index 0dff429f4c..56bf562a40 100644 --- a/tests/tcg/hexagon/fpstuff.c +++ b/tests/tcg/hexagon/fpstuff.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2020-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2020-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -38,8 +38,11 @@ const int SF_NaN_special = 0x7f800001; const int SF_ANY = 0x3f800000; const int SF_HEX_NAN = 0xffffffff; const int SF_small_neg = 0xab98fba8; +const int SF_denorm = 0x00000001; +const int SF_random = 0x346001d6; -const long long DF_NaN = 0x7ff8000000000000ULL; +const long long DF_QNaN = 0x7ff8000000000000ULL; +const long long DF_SNaN = 0x7ff7000000000000ULL; const long long DF_ANY = 0x3f80000000000000ULL; const long long DF_HEX_NAN = 0xffffffffffffffffULL; const long long DF_small_neg = 0xbd731f7500000000ULL; @@ -126,7 +129,7 @@ static void check_compare_exception(void) "p0 = dfcmp.eq(%2, %3)\n\t" "%0 = p0\n\t" "%1 = usr\n\t" - : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(cmp), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "p0", "usr"); check32(cmp, 0); check_fpstatus(usr, 0); @@ -135,7 +138,7 @@ static void check_compare_exception(void) "p0 = dfcmp.gt(%2, %3)\n\t" "%0 = p0\n\t" "%1 = usr\n\t" - : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(cmp), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "p0", "usr"); check32(cmp, 0); check_fpstatus(usr, 0); @@ -144,7 +147,7 @@ static void check_compare_exception(void) "p0 = dfcmp.ge(%2, %3)\n\t" "%0 = p0\n\t" "%1 = usr\n\t" - : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(cmp), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "p0", "usr"); check32(cmp, 0); check_fpstatus(usr, 0); @@ -206,7 +209,7 @@ static void check_dfminmax(void) int usr; /* - * Execute dfmin/dfmax instructions with one operand as NaN + * Execute dfmin/dfmax instructions with one operand as SNaN * Check that * Result is the other operand * Invalid bit in USR is set @@ -214,7 +217,7 @@ static void check_dfminmax(void) asm (CLEAR_FPSTATUS "%0 = dfmin(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_ANY) : "r2", "usr"); check64(minmax, DF_ANY); check_fpstatus(usr, FPINVF); @@ -222,13 +225,35 @@ static void check_dfminmax(void) asm (CLEAR_FPSTATUS "%0 = dfmax(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_ANY) : "r2", "usr"); check64(minmax, DF_ANY); check_fpstatus(usr, FPINVF); /* - * Execute dfmin/dfmax instructions with both operands NaN + * Execute dfmin/dfmax instructions with one operand as QNaN + * Check that + * Result is the other operand + * No bit in USR is set + */ + asm (CLEAR_FPSTATUS + "%0 = dfmin(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) + : "r2", "usr"); + check64(minmax, DF_ANY); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0 = dfmax(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) + : "r2", "usr"); + check64(minmax, DF_ANY); + check_fpstatus(usr, 0); + + /* + * Execute dfmin/dfmax instructions with both operands SNaN * Check that * Result is DF_HEX_NAN * Invalid bit in USR is set @@ -236,7 +261,7 @@ static void check_dfminmax(void) asm (CLEAR_FPSTATUS "%0 = dfmin(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_NaN) + : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_SNaN) : "r2", "usr"); check64(minmax, DF_HEX_NAN); check_fpstatus(usr, FPINVF); @@ -244,16 +269,39 @@ static void check_dfminmax(void) asm (CLEAR_FPSTATUS "%0 = dfmax(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_NaN) + : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_SNaN) : "r2", "usr"); check64(minmax, DF_HEX_NAN); check_fpstatus(usr, FPINVF); + + /* + * Execute dfmin/dfmax instructions with both operands QNaN + * Check that + * Result is DF_HEX_NAN + * No bit in USR is set + */ + asm (CLEAR_FPSTATUS + "%0 = dfmin(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_QNaN) + : "r2", "usr"); + check64(minmax, DF_HEX_NAN); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0 = dfmax(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_QNaN) + : "r2", "usr"); + check64(minmax, DF_HEX_NAN); + check_fpstatus(usr, 0); } -static void check_recip_exception(void) +static void check_sfrecipa(void) { int result; int usr; + int pred; /* * Check that sfrecipa doesn't set status bits when @@ -329,6 +377,17 @@ static void check_recip_exception(void) : "r2", "p0", "usr"); check32(result, 0x3f800000); check_fpstatus(usr, 0); + + /* + * Check that sfrecipa properly handles denorm + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = p0\n\t" + : "=r"(result), "=r"(pred) : "r"(SF_denorm), "r"(SF_random) + : "p0", "usr"); + check32(result, 0x6a920001); + check32(pred, 0x80); } static void check_canonical_NaN(void) @@ -411,7 +470,7 @@ static void check_canonical_NaN(void) asm(CLEAR_FPSTATUS "%0 = convert_df2sf(%2)\n\t" "%1 = usr\n\t" - : "=r"(sf_result), "=r"(usr) : "r"(DF_NaN) + : "=r"(sf_result), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check32(sf_result, SF_HEX_NAN); check_fpstatus(usr, 0); @@ -419,7 +478,7 @@ static void check_canonical_NaN(void) asm(CLEAR_FPSTATUS "%0 = dfadd(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(df_result), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(df_result), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "usr"); check64(df_result, DF_HEX_NAN); check_fpstatus(usr, 0); @@ -427,7 +486,7 @@ static void check_canonical_NaN(void) asm(CLEAR_FPSTATUS "%0 = dfsub(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(df_result), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(df_result), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "usr"); check64(df_result, DF_HEX_NAN); check_fpstatus(usr, 0); @@ -455,6 +514,28 @@ static void check_invsqrta(void) check32(predval, 0x0); } +static void check_sffixupn(void) +{ + int result; + + /* Check that sffixupn properly deals with denorm */ + asm volatile("%0 = sffixupn(%1, %2)\n\t" + : "=r"(result) + : "r"(SF_random), "r"(SF_denorm)); + check32(result, 0x246001d6); +} + +static void check_sffixupd(void) +{ + int result; + + /* Check that sffixupd properly deals with denorm */ + asm volatile("%0 = sffixupd(%1, %2)\n\t" + : "=r"(result) + : "r"(SF_denorm), "r"(SF_random)); + check32(result, 0x146001d6); +} + static void check_float2int_convs() { int res32; @@ -567,7 +648,7 @@ static void check_float2int_convs() asm(CLEAR_FPSTATUS "%0 = convert_df2w(%2)\n\t" "%1 = usr\n\t" - : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "=r"(res32), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check32(res32, -1); check_fpstatus(usr, FPINVF); @@ -575,7 +656,7 @@ static void check_float2int_convs() asm(CLEAR_FPSTATUS "%0 = convert_df2w(%2):chop\n\t" "%1 = usr\n\t" - : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "=r"(res32), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check32(res32, -1); check_fpstatus(usr, FPINVF); @@ -583,7 +664,7 @@ static void check_float2int_convs() asm(CLEAR_FPSTATUS "%0 = convert_df2d(%2)\n\t" "%1 = usr\n\t" - : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "=r"(res64), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check64(res64, -1); check_fpstatus(usr, FPINVF); @@ -591,7 +672,7 @@ static void check_float2int_convs() asm(CLEAR_FPSTATUS "%0 = convert_df2d(%2):chop\n\t" "%1 = usr\n\t" - : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "=r"(res64), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check64(res64, -1); check_fpstatus(usr, FPINVF); @@ -602,9 +683,11 @@ int main() check_compare_exception(); check_sfminmax(); check_dfminmax(); - check_recip_exception(); + check_sfrecipa(); check_canonical_NaN(); check_invsqrta(); + check_sffixupn(); + check_sffixupd(); check_float2int_convs(); puts(err ? "FAIL" : "PASS"); diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c index 312bb98b41..b896f5897e 100644 --- a/tests/tcg/hexagon/hvx_misc.c +++ b/tests/tcg/hexagon/hvx_misc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2021-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,6 +19,7 @@ #include <stdint.h> #include <stdbool.h> #include <string.h> +#include <limits.h> int err; @@ -432,6 +433,71 @@ TEST_PRED_OP2(pred_and, and, &, "") TEST_PRED_OP2(pred_and_n, and, &, "!") TEST_PRED_OP2(pred_xor, xor, ^, "") +static void test_vadduwsat(void) +{ + /* + * Test for saturation by adding two numbers that add to more than UINT_MAX + * and make sure the result saturates to UINT_MAX + */ + const uint32_t x = 0xffff0000; + const uint32_t y = 0x000fffff; + + memset(expect, 0x12, sizeof(MMVector)); + memset(output, 0x34, sizeof(MMVector)); + + asm volatile ("v10 = vsplat(%0)\n\t" + "v11 = vsplat(%1)\n\t" + "v21.uw = vadd(v11.uw, v10.uw):sat\n\t" + "vmem(%2+#0) = v21\n\t" + : /* no outputs */ + : "r"(x), "r"(y), "r"(output) + : "v10", "v11", "v21", "memory"); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[0].uw[j] = UINT_MAX; + } + + check_output_w(__LINE__, 1); +} + +static void test_vsubuwsat_dv(void) +{ + /* + * Test for saturation by subtracting two numbers where the result is + * negative and make sure the result saturates to zero + * + * vsubuwsat_dv operates on an HVX register pair, so we'll have a + * pair of subtractions + * w - x < 0 + * y - z < 0 + */ + const uint32_t w = 0x000000b7; + const uint32_t x = 0xffffff4e; + const uint32_t y = 0x31fe88e7; + const uint32_t z = 0x7fffff79; + + memset(expect, 0x12, sizeof(MMVector) * 2); + memset(output, 0x34, sizeof(MMVector) * 2); + + asm volatile ("v16 = vsplat(%0)\n\t" + "v17 = vsplat(%1)\n\t" + "v26 = vsplat(%2)\n\t" + "v27 = vsplat(%3)\n\t" + "v25:24.uw = vsub(v17:16.uw, v27:26.uw):sat\n\t" + "vmem(%4+#0) = v24\n\t" + "vmem(%4+#1) = v25\n\t" + : /* no outputs */ + : "r"(w), "r"(y), "r"(x), "r"(z), "r"(output) + : "v16", "v17", "v24", "v25", "v26", "v27", "memory"); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[0].uw[j] = 0x00000000; + expect[1].uw[j] = 0x00000000; + } + + check_output_w(__LINE__, 2); +} + int main() { init_buffers(); @@ -464,6 +530,9 @@ int main() test_pred_and_n(true); test_pred_xor(false); + test_vadduwsat(); + test_vsubuwsat_dv(); + puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; } diff --git a/tests/tcg/hexagon/overflow.c b/tests/tcg/hexagon/overflow.c index 196fcf7f3a..94087851b0 100644 --- a/tests/tcg/hexagon/overflow.c +++ b/tests/tcg/hexagon/overflow.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2021-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -72,6 +72,20 @@ int read_usr_overflow(void) return result & 1; } +int get_usr_overflow(int usr) +{ + return usr & 1; +} + +int get_usr_fp_invalid(int usr) +{ + return (usr >> 1) & 1; +} + +int get_usr_lpcfg(int usr) +{ + return (usr >> 8) & 0x3; +} jmp_buf jmp_env; int usr_overflow; @@ -82,6 +96,49 @@ static void sig_segv(int sig, siginfo_t *info, void *puc) longjmp(jmp_env, 1); } +static void test_packet(void) +{ + int convres; + int satres; + int usr; + + asm("r2 = usr\n\t" + "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */ + "r2 = clrbit(r2, #1)\n\t" /* clear FP invalid bit */ + "usr = r2\n\t" + "{\n\t" + " %0 = convert_sf2uw(%3):chop\n\t" + " %1 = satb(%4)\n\t" + "}\n\t" + "%2 = usr\n\t" + : "=r"(convres), "=r"(satres), "=r"(usr) + : "r"(0x6a051b86), "r"(0x0410eec0) + : "r2", "usr"); + + check(convres, 0xffffffff); + check(satres, 0x7f); + check(get_usr_overflow(usr), 1); + check(get_usr_fp_invalid(usr), 1); + + asm("r2 = usr\n\t" + "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */ + "usr = r2\n\t" + "%2 = r2\n\t" + "p3 = sp3loop0(1f, #1)\n\t" + "1:\n\t" + "{\n\t" + " %0 = satb(%2)\n\t" + "}:endloop0\n\t" + "%1 = usr\n\t" + : "=r"(satres), "=r"(usr) + : "r"(0x0410eec0) + : "r2", "usr", "p3", "sa0", "lc0"); + + check(satres, 0x7f); + check(get_usr_overflow(usr), 1); + check(get_usr_lpcfg(usr), 2); +} + int main() { struct sigaction act; @@ -102,6 +159,8 @@ int main() check(usr_overflow, 0); + test_packet(); + puts(err ? "FAIL" : "PASS"); return err ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/tests/tcg/hexagon/preg_alias.c b/tests/tcg/hexagon/preg_alias.c index 0cac469b78..b44a8112b4 100644 --- a/tests/tcg/hexagon/preg_alias.c +++ b/tests/tcg/hexagon/preg_alias.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -57,17 +57,15 @@ typedef union { static inline void creg_alias(int cval, PRegs *pregs) { - unsigned char val; - asm volatile("c4 = %0" : : "r"(cval)); - - asm volatile("%0 = p0" : "=r"(val)); - pregs->pregs.p0 = val; - asm volatile("%0 = p1" : "=r"(val)); - pregs->pregs.p1 = val; - asm volatile("%0 = p2" : "=r"(val)); - pregs->pregs.p2 = val; - asm volatile("%0 = p3" : "=r"(val)); - pregs->pregs.p3 = val; + asm("c4 = %4\n\t" + "%0 = p0\n\t" + "%1 = p1\n\t" + "%2 = p2\n\t" + "%3 = p3\n\t" + : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1), + "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3) + : "r"(cval) + : "p0", "p1", "p2", "p3"); } int err; @@ -83,22 +81,58 @@ static void check(int val, int expect) static inline void creg_alias_pair(unsigned int cval, PRegs *pregs) { unsigned long long cval_pair = (0xdeadbeefULL << 32) | cval; - unsigned char val; int c5; - asm volatile("c5:4 = %0" : : "r"(cval_pair)); - - asm volatile("%0 = p0" : "=r"(val)); - pregs->pregs.p0 = val; - asm volatile("%0 = p1" : "=r"(val)); - pregs->pregs.p1 = val; - asm volatile("%0 = p2" : "=r"(val)); - pregs->pregs.p2 = val; - asm volatile("%0 = p3" : "=r"(val)); - pregs->pregs.p3 = val; - asm volatile("%0 = c5" : "=r"(c5)); + + asm ("c5:4 = %5\n\t" + "%0 = p0\n\t" + "%1 = p1\n\t" + "%2 = p2\n\t" + "%3 = p3\n\t" + "%4 = c5\n\t" + : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1), + "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3), "=r"(c5) + : "r"(cval_pair) + : "p0", "p1", "p2", "p3"); + check(c5, 0xdeadbeef); } +static void test_packet(void) +{ + /* + * Test that setting c4 inside a packet doesn't impact the predicates + * that are read during the packet. + */ + + int result; + int old_val = 0x0000001c; + + /* Test a predicated register transfer */ + result = old_val; + asm ( + "c4 = %1\n\t" + "{\n\t" + " c4 = %2\n\t" + " if (!p2) %0 = %3\n\t" + "}\n\t" + : "+r"(result) + : "r"(0xffffffff), "r"(0xff00ffff), "r"(0x837ed653) + : "p0", "p1", "p2", "p3"); + check(result, old_val); + + /* Test a predicated store */ + result = 0xffffffff; + asm ("c4 = %0\n\t" + "{\n\t" + " c4 = %1\n\t" + " if (!p2) memw(%2) = #0\n\t" + "}\n\t" + : + : "r"(0), "r"(0xffffffff), "r"(&result) + : "p0", "p1", "p2", "p3", "memory"); + check(result, 0x0); +} + int main() { int c4; @@ -164,6 +198,8 @@ int main() creg_alias_pair(0xffffffff, &pregs); check(pregs.creg, 0xffffffff); + test_packet(); + puts(err ? "FAIL" : "PASS"); return err; } diff --git a/tests/tcg/hexagon/usr.c b/tests/tcg/hexagon/usr.c new file mode 100644 index 0000000000..a531511cec --- /dev/null +++ b/tests/tcg/hexagon/usr.c @@ -0,0 +1,1141 @@ +/* + * Copyright(c) 2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Test instructions that might set bits in user status register (USR) + */ + +#include <stdio.h> +#include <stdint.h> + +int err; + +static void __check(int line, uint32_t val, uint32_t expect) +{ + if (val != expect) { + printf("ERROR at line %d: %d != %d\n", line, val, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +static void __check32(int line, uint32_t val, uint32_t expect) +{ + if (val != expect) { + printf("ERROR at line %d: 0x%08x != 0x%08x\n", line, val, expect); + err++; + } +} + +#define check32(RES, EXP) __check32(__LINE__, RES, EXP) + +static void __check64(int line, uint64_t val, uint64_t expect) +{ + if (val != expect) { + printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", line, val, expect); + err++; + } +} + +#define check64(RES, EXP) __check64(__LINE__, RES, EXP) + +/* + * Some of the instructions tested are only available on certain versions + * of the Hexagon core + */ +#define CORE_HAS_AUDIO (__HEXAGON_ARCH__ >= 67 && defined(__HEXAGON_AUDIO__)) +#define CORE_IS_V67 (__HEXAGON_ARCH__ >= 67) + +/* Define the bits in Hexagon USR register */ +#define USR_OVF_BIT 0 /* Sticky saturation overflow */ +#define USR_FPINVF_BIT 1 /* IEEE FP invalid sticky flag */ +#define USR_FPDBZF_BIT 2 /* IEEE FP divide-by-zero sticky flag */ +#define USR_FPOVFF_BIT 3 /* IEEE FP overflow sticky flag */ +#define USR_FPUNFF_BIT 4 /* IEEE FP underflow sticky flag */ +#define USR_FPINPF_BIT 5 /* IEEE FP inexact sticky flag */ + +/* Corresponding values in USR */ +#define USR_CLEAR 0 +#define USR_OVF (1 << USR_OVF_BIT) +#define USR_FPINVF (1 << USR_FPINVF_BIT) +#define USR_FPDBZF (1 << USR_FPDBZF_BIT) +#define USR_FPOVFF (1 << USR_FPOVFF_BIT) +#define USR_FPUNFF (1 << USR_FPUNFF_BIT) +#define USR_FPINPF (1 << USR_FPINPF_BIT) + +/* Some useful floating point values */ +const uint32_t SF_INF = 0x7f800000; +const uint32_t SF_QNaN = 0x7fc00000; +const uint32_t SF_SNaN = 0x7fb00000; +const uint32_t SF_QNaN_neg = 0xffc00000; +const uint32_t SF_SNaN_neg = 0xffb00000; +const uint32_t SF_HEX_NaN = 0xffffffff; +const uint32_t SF_zero = 0x00000000; +const uint32_t SF_one = 0x3f800000; +const uint32_t SF_one_recip = 0x3f7f0001; /* 0.9960... */ +const uint32_t SF_one_invsqrta = 0x3f7f0000; /* 0.99609375 */ +const uint32_t SF_two = 0x40000000; +const uint32_t SF_four = 0x40800000; +const uint32_t SF_small_neg = 0xab98fba8; +const uint32_t SF_large_pos = 0x5afa572e; + +const uint64_t DF_QNaN = 0x7ff8000000000000ULL; +const uint64_t DF_SNaN = 0x7ff7000000000000ULL; +const uint64_t DF_QNaN_neg = 0xfff8000000000000ULL; +const uint64_t DF_SNaN_neg = 0xfff7000000000000ULL; +const uint64_t DF_HEX_NaN = 0xffffffffffffffffULL; +const uint64_t DF_zero = 0x0000000000000000ULL; +const uint64_t DF_any = 0x3f80000000000000ULL; +const uint64_t DF_one = 0x3ff0000000000000ULL; +const uint64_t DF_one_hh = 0x3ff001ff80000000ULL; /* 1.00048... */ +const uint64_t DF_small_neg = 0xbd731f7500000000ULL; +const uint64_t DF_large_pos = 0x7f80000000000001ULL; + +/* + * Templates for functions to execute an instruction + * + * The templates vary by the number of arguments and the types of the args + * and result. We use one letter in the macro name for the result and each + * argument: + * x unknown (specified in a subsequent template) or don't care + * R register (32 bits) + * P pair (64 bits) + * p predicate + * I immediate + * Xx read/write + */ + +/* Clear bits 0-5 in USR */ +#define CLEAR_USRBITS \ + "r2 = usr\n\t" \ + "r2 = and(r2, #0xffffffc0)\n\t" \ + "usr = r2\n\t" + +/* Template for instructions with one register operand */ +#define FUNC_x_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \ +static RESTYPE NAME(SRCTYPE src, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "=r"(result), "=r"(usr) \ + : "r"(src) \ + : "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_R_OP_R(NAME, INSN) \ +FUNC_x_OP_x(uint32_t, uint32_t, NAME, INSN) + +#define FUNC_R_OP_P(NAME, INSN) \ +FUNC_x_OP_x(uint32_t, uint64_t, NAME, INSN) + +#define FUNC_P_OP_P(NAME, INSN) \ +FUNC_x_OP_x(uint64_t, uint64_t, NAME, INSN) + +#define FUNC_P_OP_R(NAME, INSN) \ +FUNC_x_OP_x(uint64_t, uint32_t, NAME, INSN) + +/* + * Template for instructions with a register and predicate result + * and one register operand + */ +#define FUNC_xp_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \ +static RESTYPE NAME(SRCTYPE src, uint8_t *pred_result, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint8_t pred; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = p2\n\t" \ + "%2 = usr\n\t" \ + : "=r"(result), "=r"(pred), "=r"(usr) \ + : "r"(src) \ + : "r2", "p2", "usr"); \ + *pred_result = pred; \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_Rp_OP_R(NAME, INSN) \ +FUNC_xp_OP_x(uint32_t, uint32_t, NAME, INSN) + +/* Template for instructions with two register operands */ +#define FUNC_x_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "=r"(result), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_P_OP_PP(NAME, INSN) \ +FUNC_x_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN) + +#define FUNC_R_OP_PP(NAME, INSN) \ +FUNC_x_OP_xx(uint32_t, uint64_t, uint64_t, NAME, INSN) + +#define FUNC_P_OP_RR(NAME, INSN) \ +FUNC_x_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN) + +#define FUNC_R_OP_RR(NAME, INSN) \ +FUNC_x_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN) + +#define FUNC_R_OP_PR(NAME, INSN) \ +FUNC_x_OP_xx(uint32_t, uint64_t, uint32_t, NAME, INSN) + +#define FUNC_P_OP_PR(NAME, INSN) \ +FUNC_x_OP_xx(uint64_t, uint64_t, uint32_t, NAME, INSN) + +/* + * Template for instructions with a register and predicate result + * and two register operands + */ +#define FUNC_xp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, \ + uint8_t *pred_result, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint8_t pred; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = p2\n\t" \ + "%2 = usr\n\t" \ + : "=r"(result), "=r"(pred), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "r2", "p2", "usr"); \ + *pred_result = pred; \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_Rp_OP_RR(NAME, INSN) \ +FUNC_xp_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN) + +/* Template for instructions with one register and one immediate */ +#define FUNC_x_OP_xI(RESTYPE, SRC1TYPE, NAME, INSN) \ +static RESTYPE NAME(SRC1TYPE src1, int32_t src2, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "=r"(result), "=r"(usr) \ + : "r"(src1), "i"(src2) \ + : "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_R_OP_RI(NAME, INSN) \ +FUNC_x_OP_xI(uint32_t, uint32_t, NAME, INSN) + +#define FUNC_R_OP_PI(NAME, INSN) \ +FUNC_x_OP_xI(uint32_t, uint64_t, NAME, INSN) + +/* + * Template for instructions with a read/write result + * and two register operands + */ +#define FUNC_Xx_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \ + uint32_t *usr_result) \ +{ \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "+r"(result), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_XR_OP_RR(NAME, INSN) \ +FUNC_Xx_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN) + +#define FUNC_XP_OP_PP(NAME, INSN) \ +FUNC_Xx_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN) + +#define FUNC_XP_OP_RR(NAME, INSN) \ +FUNC_Xx_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN) + +/* + * Template for instructions with a read/write result + * and two register operands + */ +#define FUNC_Xxp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \ + uint8_t *pred_result, uint32_t *usr_result) \ +{ \ + uint32_t usr; \ + uint8_t pred; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = p2\n\t" \ + "%2 = usr\n\t" \ + : "+r"(result), "=r"(pred), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "r2", "usr"); \ + *pred_result = pred; \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_XPp_OP_PP(NAME, INSN) \ +FUNC_Xxp_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN) + +/* + * Template for instructions with a read/write result and + * two register and one predicate operands + */ +#define FUNC_Xx_OP_xxp(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, uint8_t pred,\ + uint32_t *usr_result) \ +{ \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + "p2 = %4\n\t" \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "+r"(result), "=r"(usr) \ + : "r"(src1), "r"(src2), "r"(pred) \ + : "r2", "p2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_XR_OP_RRp(NAME, INSN) \ +FUNC_Xx_OP_xxp(uint32_t, uint32_t, uint32_t, NAME, INSN) + +/* Template for compare instructions with two register operands */ +#define FUNC_CMP_xx(SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static uint32_t NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \ +{ \ + uint32_t result; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%0 = p1\n\t" \ + "%1 = usr\n\t" \ + : "=r"(result), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "p1", "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_CMP_RR(NAME, INSN) \ +FUNC_CMP_xx(uint32_t, uint32_t, NAME, INSN) + +#define FUNC_CMP_PP(NAME, INSN) \ +FUNC_CMP_xx(uint64_t, uint64_t, NAME, INSN) + +/* + * Function declarations using the templates + */ +FUNC_R_OP_R(satub, "%0 = satub(%2)") +FUNC_P_OP_PP(vaddubs, "%0 = vaddub(%2, %3):sat") +FUNC_P_OP_PP(vadduhs, "%0 = vadduh(%2, %3):sat") +FUNC_P_OP_PP(vsububs, "%0 = vsubub(%2, %3):sat") +FUNC_P_OP_PP(vsubuhs, "%0 = vsubuh(%2, %3):sat") + +/* Add vector of half integers with saturation and pack to unsigned bytes */ +FUNC_R_OP_PP(vaddhubs, "%0 = vaddhub(%2, %3):sat") + +/* Vector saturate half to unsigned byte */ +FUNC_R_OP_P(vsathub, "%0 = vsathub(%2)") + +/* Similar to above but takes a 32-bit argument */ +FUNC_R_OP_R(svsathub, "%0 = vsathub(%2)") + +/* Vector saturate word to unsigned half */ +FUNC_P_OP_P(vsatwuh_nopack, "%0 = vsatwuh(%2)") + +/* Similar to above but returns a 32-bit result */ +FUNC_R_OP_P(vsatwuh, "%0 = vsatwuh(%2)") + +/* Vector arithmetic shift halfwords with saturate and pack */ +FUNC_R_OP_PI(asrhub_sat, "%0 = vasrhub(%2, #%3):sat") + +/* Vector arithmetic shift halfwords with round, saturate and pack */ +FUNC_R_OP_PI(asrhub_rnd_sat, "%0 = vasrhub(%2, #%3):raw") + +FUNC_R_OP_RR(addsat, "%0 = add(%2, %3):sat") +/* Similar to above but with register pairs */ +FUNC_P_OP_PP(addpsat, "%0 = add(%2, %3):sat") + +FUNC_XR_OP_RR(mpy_acc_sat_hh_s0, "%0 += mpy(%2.H, %3.H):sat") +FUNC_R_OP_RR(mpy_sat_hh_s1, "%0 = mpy(%2.H, %3.H):<<1:sat") +FUNC_R_OP_RR(mpy_sat_rnd_hh_s1, "%0 = mpy(%2.H, %3.H):<<1:rnd:sat") +FUNC_R_OP_RR(mpy_up_s1_sat, "%0 = mpy(%2, %3):<<1:sat") +FUNC_P_OP_RR(vmpy2s_s1, "%0 = vmpyh(%2, %3):<<1:sat") +FUNC_P_OP_RR(vmpy2su_s1, "%0 = vmpyhsu(%2, %3):<<1:sat") +FUNC_R_OP_RR(vmpy2s_s1pack, "%0 = vmpyh(%2, %3):<<1:rnd:sat") +FUNC_P_OP_PP(vmpy2es_s1, "%0 = vmpyeh(%2, %3):<<1:sat") +FUNC_R_OP_PP(vdmpyrs_s1, "%0 = vdmpy(%2, %3):<<1:rnd:sat") +FUNC_XP_OP_PP(vdmacs_s0, "%0 += vdmpy(%2, %3):sat") +FUNC_R_OP_RR(cmpyrs_s0, "%0 = cmpy(%2, %3):rnd:sat") +FUNC_XP_OP_RR(cmacs_s0, "%0 += cmpy(%2, %3):sat") +FUNC_XP_OP_RR(cnacs_s0, "%0 -= cmpy(%2, %3):sat") +FUNC_P_OP_PP(vrcmpys_s1_h, "%0 = vrcmpys(%2, %3):<<1:sat:raw:hi") +FUNC_XP_OP_PP(mmacls_s0, "%0 += vmpyweh(%2, %3):sat") +FUNC_R_OP_RR(hmmpyl_rs1, "%0 = mpy(%2, %3.L):<<1:rnd:sat") +FUNC_XP_OP_PP(mmaculs_s0, "%0 += vmpyweuh(%2, %3):sat") +FUNC_R_OP_PR(cmpyi_wh, "%0 = cmpyiwh(%2, %3):<<1:rnd:sat") +FUNC_P_OP_PP(vcmpy_s0_sat_i, "%0 = vcmpyi(%2, %3):sat") +FUNC_P_OP_PR(vcrotate, "%0 = vcrotate(%2, %3)") +FUNC_P_OP_PR(vcnegh, "%0 = vcnegh(%2, %3)") + +#if CORE_HAS_AUDIO +FUNC_R_OP_PP(wcmpyrw, "%0 = cmpyrw(%2, %3):<<1:sat") +#endif + +FUNC_R_OP_RR(addh_l16_sat_ll, "%0 = add(%2.L, %3.L):sat") +FUNC_P_OP_P(vconj, "%0 = vconj(%2):sat") +FUNC_P_OP_PP(vxaddsubw, "%0 = vxaddsubw(%2, %3):sat") +FUNC_P_OP_P(vabshsat, "%0 = vabsh(%2):sat") +FUNC_P_OP_PP(vnavgwr, "%0 = vnavgw(%2, %3):rnd:sat") +FUNC_R_OP_RI(round_ri_sat, "%0 = round(%2, #%3):sat") +FUNC_R_OP_RR(asr_r_r_sat, "%0 = asr(%2, %3):sat") + +FUNC_XPp_OP_PP(ACS, "%0, p2 = vacsh(%3, %4)") + +/* Floating point */ +FUNC_R_OP_RR(sfmin, "%0 = sfmin(%2, %3)") +FUNC_R_OP_RR(sfmax, "%0 = sfmax(%2, %3)") +FUNC_R_OP_RR(sfadd, "%0 = sfadd(%2, %3)") +FUNC_R_OP_RR(sfsub, "%0 = sfsub(%2, %3)") +FUNC_R_OP_RR(sfmpy, "%0 = sfmpy(%2, %3)") +FUNC_XR_OP_RR(sffma, "%0 += sfmpy(%2, %3)") +FUNC_XR_OP_RR(sffms, "%0 -= sfmpy(%2, %3)") +FUNC_CMP_RR(sfcmpuo, "p1 = sfcmp.uo(%2, %3)") +FUNC_CMP_RR(sfcmpeq, "p1 = sfcmp.eq(%2, %3)") +FUNC_CMP_RR(sfcmpgt, "p1 = sfcmp.gt(%2, %3)") +FUNC_CMP_RR(sfcmpge, "p1 = sfcmp.ge(%2, %3)") + +FUNC_P_OP_PP(dfadd, "%0 = dfadd(%2, %3)") +FUNC_P_OP_PP(dfsub, "%0 = dfsub(%2, %3)") + +#if CORE_IS_V67 +FUNC_P_OP_PP(dfmin, "%0 = dfmin(%2, %3)") +FUNC_P_OP_PP(dfmax, "%0 = dfmax(%2, %3)") +FUNC_XP_OP_PP(dfmpyhh, "%0 += dfmpyhh(%2, %3)") +#endif + +FUNC_CMP_PP(dfcmpuo, "p1 = dfcmp.uo(%2, %3)") +FUNC_CMP_PP(dfcmpeq, "p1 = dfcmp.eq(%2, %3)") +FUNC_CMP_PP(dfcmpgt, "p1 = dfcmp.gt(%2, %3)") +FUNC_CMP_PP(dfcmpge, "p1 = dfcmp.ge(%2, %3)") + +/* Conversions from sf */ +FUNC_P_OP_R(conv_sf2df, "%0 = convert_sf2df(%2)") +FUNC_R_OP_R(conv_sf2uw, "%0 = convert_sf2uw(%2)") +FUNC_R_OP_R(conv_sf2w, "%0 = convert_sf2w(%2)") +FUNC_P_OP_R(conv_sf2ud, "%0 = convert_sf2ud(%2)") +FUNC_P_OP_R(conv_sf2d, "%0 = convert_sf2d(%2)") +FUNC_R_OP_R(conv_sf2uw_chop, "%0 = convert_sf2uw(%2):chop") +FUNC_R_OP_R(conv_sf2w_chop, "%0 = convert_sf2w(%2):chop") +FUNC_P_OP_R(conv_sf2ud_chop, "%0 = convert_sf2ud(%2):chop") +FUNC_P_OP_R(conv_sf2d_chop, "%0 = convert_sf2d(%2):chop") + +/* Conversions from df */ +FUNC_R_OP_P(conv_df2sf, "%0 = convert_df2sf(%2)") +FUNC_R_OP_P(conv_df2uw, "%0 = convert_df2uw(%2)") +FUNC_R_OP_P(conv_df2w, "%0 = convert_df2w(%2)") +FUNC_P_OP_P(conv_df2ud, "%0 = convert_df2ud(%2)") +FUNC_P_OP_P(conv_df2d, "%0 = convert_df2d(%2)") +FUNC_R_OP_P(conv_df2uw_chop, "%0 = convert_df2uw(%2):chop") +FUNC_R_OP_P(conv_df2w_chop, "%0 = convert_df2w(%2):chop") +FUNC_P_OP_P(conv_df2ud_chop, "%0 = convert_df2ud(%2):chop") +FUNC_P_OP_P(conv_df2d_chop, "%0 = convert_df2d(%2):chop") + +/* Integer to float conversions */ +FUNC_R_OP_R(conv_uw2sf, "%0 = convert_uw2sf(%2)") +FUNC_R_OP_R(conv_w2sf, "%0 = convert_w2sf(%2)") +FUNC_R_OP_P(conv_ud2sf, "%0 = convert_ud2sf(%2)") +FUNC_R_OP_P(conv_d2sf, "%0 = convert_d2sf(%2)") + +/* Special purpose floating point instructions */ +FUNC_XR_OP_RRp(sffma_sc, "%0 += sfmpy(%2, %3, p2):scale") +FUNC_Rp_OP_RR(sfrecipa, "%0, p2 = sfrecipa(%3, %4)") +FUNC_R_OP_RR(sffixupn, "%0 = sffixupn(%2, %3)") +FUNC_R_OP_RR(sffixupd, "%0 = sffixupd(%2, %3)") +FUNC_R_OP_R(sffixupr, "%0 = sffixupr(%2)") +FUNC_Rp_OP_R(sfinvsqrta, "%0, p2 = sfinvsqrta(%3)") + +/* + * Templates for test cases + * + * Same naming convention as the function templates + */ +#define TEST_x_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRCTYPE src = SRC; \ + uint32_t usr_result; \ + result = FUNC(src, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_R_OP_R(FUNC, SRC, RES, USR_RES) \ +TEST_x_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, USR_RES) + +#define TEST_R_OP_P(FUNC, SRC, RES, USR_RES) \ +TEST_x_OP_x(uint32_t, check32, uint64_t, FUNC, SRC, RES, USR_RES) + +#define TEST_P_OP_P(FUNC, SRC, RES, USR_RES) \ +TEST_x_OP_x(uint64_t, check64, uint64_t, FUNC, SRC, RES, USR_RES) + +#define TEST_P_OP_R(FUNC, SRC, RES, USR_RES) \ +TEST_x_OP_x(uint64_t, check64, uint32_t, FUNC, SRC, RES, USR_RES) + +#define TEST_xp_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, \ + RES, PRED_RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRCTYPE src = SRC; \ + uint8_t pred_result; \ + uint32_t usr_result; \ + result = FUNC(src, &pred_result, &usr_result); \ + CHECKFN(result, RES); \ + check(pred_result, PRED_RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_Rp_OP_R(FUNC, SRC, RES, PRED_RES, USR_RES) \ +TEST_xp_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, PRED_RES, USR_RES) + +#define TEST_x_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \ + FUNC, SRC1, SRC2, RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint32_t usr_result; \ + result = FUNC(src1, src2, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_P_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint64_t, check64, uint64_t, uint64_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_R_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint32_t, check32, uint64_t, uint64_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_P_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint64_t, check64, uint32_t, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_R_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint32_t, check32, uint32_t, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_R_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint32_t, check32, uint64_t, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_P_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint64_t, check64, uint64_t, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_xp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, FUNC, SRC1, SRC2, \ + RES, PRED_RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint8_t pred_result; \ + uint32_t usr_result; \ + result = FUNC(src1, src2, &pred_result, &usr_result); \ + CHECKFN(result, RES); \ + check(pred_result, PRED_RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_Rp_OP_RR(FUNC, SRC1, SRC2, RES, PRED_RES, USR_RES) \ +TEST_xp_OP_xx(uint32_t, check32, uint32_t, uint32_t, FUNC, SRC1, SRC2, \ + RES, PRED_RES, USR_RES) + +#define TEST_x_OP_xI(RESTYPE, CHECKFN, SRC1TYPE, \ + FUNC, SRC1, SRC2, RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRC1TYPE src1 = SRC1; \ + uint32_t src2 = SRC2; \ + uint32_t usr_result; \ + result = FUNC(src1, src2, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_R_OP_RI(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xI(uint32_t, check32, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_R_OP_PI(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xI(uint32_t, check64, uint64_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_Xx_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \ + FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \ + do { \ + RESTYPE result = RESIN; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint32_t usr_result; \ + result = FUNC(result, src1, src2, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_XR_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \ +TEST_Xx_OP_xx(uint32_t, check32, uint32_t, uint32_t, \ + FUNC, RESIN, SRC1, SRC2, RES, USR_RES) + +#define TEST_XP_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \ +TEST_Xx_OP_xx(uint64_t, check64, uint64_t, uint64_t, \ + FUNC, RESIN, SRC1, SRC2, RES, USR_RES) + +#define TEST_XP_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \ +TEST_Xx_OP_xx(uint64_t, check64, uint32_t, uint32_t, \ + FUNC, RESIN, SRC1, SRC2, RES, USR_RES) + +#define TEST_Xxp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \ + FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \ + do { \ + RESTYPE result = RESIN; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint8_t pred_res; \ + uint32_t usr_result; \ + result = FUNC(result, src1, src2, &pred_res, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_XPp_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \ +TEST_Xxp_OP_xx(uint64_t, check64, uint64_t, uint64_t, FUNC, RESIN, SRC1, SRC2, \ + RES, PRED_RES, USR_RES) + +#define TEST_Xx_OP_xxp(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \ + FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \ + do { \ + RESTYPE result = RESIN; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint8_t pred = PRED; \ + uint32_t usr_result; \ + result = FUNC(result, src1, src2, pred, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_XR_OP_RRp(FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \ +TEST_Xx_OP_xxp(uint32_t, check32, uint32_t, uint32_t, \ + FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) + +#define TEST_CMP_xx(SRC1TYPE, SRC2TYPE, \ + FUNC, SRC1, SRC2, RES, USR_RES) \ + do { \ + uint32_t result; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint32_t usr_result; \ + result = FUNC(src1, src2, &usr_result); \ + check(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_CMP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_CMP_xx(uint32_t, uint32_t, FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_CMP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_CMP_xx(uint64_t, uint64_t, FUNC, SRC1, SRC2, RES, USR_RES) + +int main() +{ + TEST_R_OP_R(satub, 0, 0, USR_CLEAR); + TEST_R_OP_R(satub, 0xff, 0xff, USR_CLEAR); + TEST_R_OP_R(satub, 0xfff, 0xff, USR_OVF); + TEST_R_OP_R(satub, -1, 0, USR_OVF); + + TEST_P_OP_PP(vaddubs, 0xfeLL, 0x01LL, 0xffLL, USR_CLEAR); + TEST_P_OP_PP(vaddubs, 0xffLL, 0xffLL, 0xffLL, USR_OVF); + + TEST_P_OP_PP(vadduhs, 0xfffeLL, 0x1LL, 0xffffLL, USR_CLEAR); + TEST_P_OP_PP(vadduhs, 0xffffLL, 0x1LL, 0xffffLL, USR_OVF); + + TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0101010101010101LL, + 0x0706050403020100LL, USR_CLEAR); + TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0202020202020202LL, + 0x0605040302010000LL, USR_OVF); + + TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0001000100010001LL, + 0x0003000200010000LL, USR_CLEAR); + TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0002000200020002LL, + 0x0002000100000000LL, USR_OVF); + + TEST_R_OP_PP(vaddhubs, 0x0004000300020001LL, 0x0001000100010001LL, + 0x05040302, USR_CLEAR); + TEST_R_OP_PP(vaddhubs, 0x7fff000300020001LL, 0x0002000200020002LL, + 0xff050403, USR_OVF); + + TEST_R_OP_P(vsathub, 0x0001000300020001LL, 0x01030201, USR_CLEAR); + TEST_R_OP_P(vsathub, 0x010000700080ffffLL, 0xff708000, USR_OVF); + + TEST_R_OP_P(vsatwuh, 0x0000ffff00000001LL, 0xffff0001, USR_CLEAR); + TEST_R_OP_P(vsatwuh, 0x800000000000ffffLL, 0x0000ffff, USR_OVF); + + TEST_P_OP_P(vsatwuh_nopack, 0x0000ffff00000001LL, 0x0000ffff00000001LL, + USR_CLEAR); + TEST_P_OP_P(vsatwuh_nopack, 0x800000000000ffffLL, 0x000000000000ffffLL, + USR_OVF); + + TEST_R_OP_R(svsathub, 0x00020001, 0x0201, USR_CLEAR); + TEST_R_OP_R(svsathub, 0x0080ffff, 0x8000, USR_OVF); + + TEST_R_OP_PI(asrhub_sat, 0x004f003f002f001fLL, 3, 0x09070503, + USR_CLEAR); + TEST_R_OP_PI(asrhub_sat, 0x004fffff8fff001fLL, 3, 0x09000003, + USR_OVF); + + TEST_R_OP_PI(asrhub_rnd_sat, 0x004f003f002f001fLL, 2, 0x0a080604, + USR_CLEAR); + TEST_R_OP_PI(asrhub_rnd_sat, 0x004fffff8fff001fLL, 2, 0x0a000004, + USR_OVF); + + TEST_R_OP_RR(addsat, 1, 2, 3, + USR_CLEAR); + TEST_R_OP_RR(addsat, 0x7fffffff, 0x00000010, 0x7fffffff, + USR_OVF); + TEST_R_OP_RR(addsat, 0x80000000, 0x80000006, 0x80000000, + USR_OVF); + + TEST_P_OP_PP(addpsat, 1LL, 2LL, 3LL, USR_CLEAR); + /* overflow to max positive */ + TEST_P_OP_PP(addpsat, 0x7ffffffffffffff0LL, 0x0000000000000010LL, + 0x7fffffffffffffffLL, USR_OVF); + /* overflow to min negative */ + TEST_P_OP_PP(addpsat, 0x8000000000000003LL, 0x8000000000000006LL, + 0x8000000000000000LL, USR_OVF); + + TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0xffff0000, 0x11110000, + 0x7fffeeee, USR_CLEAR); + TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0x7fff0000, 0x7fff0000, + 0x7fffffff, USR_OVF); + + TEST_R_OP_RR(mpy_sat_hh_s1, 0xffff0000, 0x11110000, 0xffffddde, + USR_CLEAR); + TEST_R_OP_RR(mpy_sat_hh_s1, 0x7fff0000, 0x7fff0000, 0x7ffe0002, + USR_CLEAR); + TEST_R_OP_RR(mpy_sat_hh_s1, 0x80000000, 0x80000000, 0x7fffffff, + USR_OVF); + + TEST_R_OP_RR(mpy_sat_rnd_hh_s1, 0xffff0000, 0x11110000, 0x00005dde, + USR_CLEAR); + TEST_R_OP_RR(mpy_sat_rnd_hh_s1, 0x7fff0000, 0x7fff0000, 0x7ffe8002, + USR_CLEAR); + TEST_R_OP_RR(mpy_sat_rnd_hh_s1, 0x80000000, 0x80000000, 0x7fffffff, + USR_OVF); + + TEST_R_OP_RR(mpy_up_s1_sat, 0xffff0000, 0x11110000, 0xffffddde, + USR_CLEAR); + TEST_R_OP_RR(mpy_up_s1_sat, 0x7fff0000, 0x7fff0000, 0x7ffe0002, + USR_CLEAR); + TEST_R_OP_RR(mpy_up_s1_sat, 0x80000000, 0x80000000, 0x7fffffff, + USR_OVF); + + TEST_P_OP_RR(vmpy2s_s1, 0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL, + USR_CLEAR); + TEST_P_OP_RR(vmpy2s_s1, 0x80000000, 0x80000000, 0x7fffffff00000000LL, + USR_OVF); + + TEST_P_OP_RR(vmpy2su_s1, 0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL, + USR_CLEAR); + TEST_P_OP_RR(vmpy2su_s1, 0xffffbd97, 0xffffffff, 0xfffe000280000000LL, + USR_OVF); + + TEST_R_OP_RR(vmpy2s_s1pack, 0x7fff0000, 0x7fff0000, 0x7ffe0000, + USR_CLEAR); + TEST_R_OP_RR(vmpy2s_s1pack, 0x80008000, 0x80008000, 0x7fff7fff, + USR_OVF); + + TEST_P_OP_PP(vmpy2es_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL, + 0x1ffec0021ffec002LL, USR_CLEAR); + TEST_P_OP_PP(vmpy2es_s1, 0x8000800080008000LL, 0x8000800080008000LL, + 0x7fffffff7fffffffLL, USR_OVF); + + TEST_R_OP_PP(vdmpyrs_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL, + 0x3ffe3ffe, USR_CLEAR); + TEST_R_OP_PP(vdmpyrs_s1, 0x8000800080008000LL, 0x8000800080008000LL, + 0x7fff7fffLL, USR_OVF); + + TEST_XP_OP_PP(vdmacs_s0, 0x0fffffffULL, 0x00ff00ff00ff00ffLL, + 0x00ff00ff00ff00ffLL, 0x0001fc021001fc01LL, USR_CLEAR); + TEST_XP_OP_PP(vdmacs_s0, 0x01111111ULL, 0x8000800080001000LL, + 0x8000800080008000LL, 0x7fffffff39111111LL, USR_OVF); + + TEST_R_OP_RR(cmpyrs_s0, 0x7fff0000, 0x7fff0000, 0x0000c001, + USR_CLEAR); + TEST_R_OP_RR(cmpyrs_s0, 0x80008000, 0x80008000, 0x7fff0000, + USR_OVF); + + TEST_XP_OP_RR(cmacs_s0, 0x0fffffff, 0x7fff0000, 0x7fff0000, + 0x00000000d000fffeLL, USR_CLEAR); + TEST_XP_OP_RR(cmacs_s0, 0x0fff1111, 0x80008000, 0x80008000, + 0x7fffffff0fff1111LL, USR_OVF); + + TEST_XP_OP_RR(cnacs_s0, 0x000000108fffffffULL, 0x7fff0000, 0x7fff0000, + 0x00000010cfff0000ULL, USR_CLEAR); + TEST_XP_OP_RR(cnacs_s0, 0x000000108ff1111fULL, 0x00002001, 0x00007ffd, + 0x0000001080000000ULL, USR_OVF); + + TEST_P_OP_PP(vrcmpys_s1_h, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL, + 0x0003f8040003f804LL, USR_CLEAR); + TEST_P_OP_PP(vrcmpys_s1_h, 0x8000800080008000LL, 0x8000800080008000LL, + 0x7fffffff7fffffffLL, USR_OVF); + + TEST_XP_OP_PP(mmacls_s0, 0x6fffffff, 0x00ff00ff00ff00ffLL, + 0x00ff00ff00ff00ffLL, 0x0000fe017000fe00LL, USR_CLEAR); + TEST_XP_OP_PP(mmacls_s0, 0x6f1111ff, 0x8000800080008000LL, + 0x1000100080008000LL, 0xf80008007fffffffLL, USR_OVF); + + TEST_R_OP_RR(hmmpyl_rs1, 0x7fff0000, 0x7fff0001, 0x0000fffe, + USR_CLEAR); + TEST_R_OP_RR(hmmpyl_rs1, 0x80000000, 0x80008000, 0x7fffffff, + USR_OVF); + + TEST_XP_OP_PP(mmaculs_s0, 0x000000007fffffffULL, 0xffff800080008000LL, + 0xffff800080008000LL, 0xffffc00040003fffLL, USR_CLEAR); + TEST_XP_OP_PP(mmaculs_s0, 0x000011107fffffffULL, 0x00ff00ff00ff00ffLL, + 0x00ff00ff001100ffLL, 0x00010f117fffffffLL, USR_OVF); + + TEST_R_OP_PR(cmpyi_wh, 0x7fff000000000000LL, 0x7fff0001, 0x0000fffe, + USR_CLEAR); + TEST_R_OP_PR(cmpyi_wh, 0x8000000000000000LL, 0x80008000, 0x7fffffff, + USR_OVF); + + TEST_P_OP_PP(vcmpy_s0_sat_i, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL, + 0x0001fc020001fc02LL, USR_CLEAR); + TEST_P_OP_PP(vcmpy_s0_sat_i, 0x8000800080008000LL, 0x8000800080008000LL, + 0x7fffffff7fffffffLL, USR_OVF); + + TEST_P_OP_PR(vcrotate, 0x8000000000000000LL, 0x00000002, + 0x8000000000000000LL, USR_CLEAR); + TEST_P_OP_PR(vcrotate, 0x7fff80007fff8000LL, 0x00000001, + 0x7fff80007fff7fffLL, USR_OVF); + + TEST_P_OP_PR(vcnegh, 0x8000000000000000LL, 0x00000002, + 0x8000000000000000LL, USR_CLEAR); + TEST_P_OP_PR(vcnegh, 0x7fff80007fff8000LL, 0x00000001, + 0x7fff80007fff7fffLL, USR_OVF); + +#if CORE_HAS_AUDIO + TEST_R_OP_PP(wcmpyrw, 0x8765432101234567LL, 0x00000002ffffffffLL, + 0x00000001, USR_CLEAR); + TEST_R_OP_PP(wcmpyrw, 0x800000007fffffffLL, 0x000000ff7fffffffLL, + 0x7fffffff, USR_OVF); + TEST_R_OP_PP(wcmpyrw, 0x7fffffff80000000LL, 0x7fffffff000000ffLL, + 0x80000000, USR_OVF); +#else + printf("Audio instructions skipped\n"); +#endif + + TEST_R_OP_RR(addh_l16_sat_ll, 0x0000ffff, 0x00000002, 0x00000001, + USR_CLEAR); + TEST_R_OP_RR(addh_l16_sat_ll, 0x00007fff, 0x00000005, 0x00007fff, + USR_OVF); + TEST_R_OP_RR(addh_l16_sat_ll, 0x00008000, 0x00008000, 0xffff8000, + USR_OVF); + + TEST_P_OP_P(vconj, 0x0000ffff00000001LL, 0x0000ffff00000001LL, USR_CLEAR); + TEST_P_OP_P(vconj, 0x800000000000ffffLL, 0x7fff00000000ffffLL, USR_OVF); + + TEST_P_OP_PP(vxaddsubw, 0x8765432101234567LL, 0x00000002ffffffffLL, + 0x8765432201234569LL, USR_CLEAR); + TEST_P_OP_PP(vxaddsubw, 0x7fffffff7fffffffLL, 0xffffffffffffffffLL, + 0x7fffffff7ffffffeLL, USR_OVF); + TEST_P_OP_PP(vxaddsubw, 0x800000000fffffffLL, 0x0000000a00000008LL, + 0x8000000010000009LL, USR_OVF); + + TEST_P_OP_P(vabshsat, 0x0001000afffff800LL, 0x0001000a00010800LL, + USR_CLEAR); + TEST_P_OP_P(vabshsat, 0x8000000b000c000aLL, 0x7fff000b000c000aLL, + USR_OVF); + + TEST_P_OP_PP(vnavgwr, 0x8765432101234567LL, 0x00000002ffffffffLL, + 0xc3b2a1900091a2b4LL, USR_CLEAR); + TEST_P_OP_PP(vnavgwr, 0x7fffffff8000000aLL, 0x80000000ffffffffLL, + 0x7fffffffc0000006LL, USR_OVF); + + TEST_R_OP_RI(round_ri_sat, 0x0000ffff, 2, 0x00004000, USR_CLEAR); + TEST_R_OP_RI(round_ri_sat, 0x7fffffff, 2, 0x1fffffff, USR_OVF); + + TEST_R_OP_RR(asr_r_r_sat, 0x0000ffff, 0x00000002, 0x00003fff, + USR_CLEAR); + TEST_R_OP_RR(asr_r_r_sat, 0x00ffffff, 0xfffffff5, 0x7fffffff, + USR_OVF); + TEST_R_OP_RR(asr_r_r_sat, 0x80000000, 0xfffffff5, 0x80000000, + USR_OVF); + + TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL, + 0x0000000000000000ULL, 0x0004000300030004ULL, 0xf0, + USR_CLEAR); + TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL, + 0x000affff000d0000ULL, 0x000e0003000f0004ULL, 0xcc, + USR_CLEAR); + TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL, + 0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xfc, + USR_OVF); + TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL, + 0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xf0, + USR_OVF); + + /* Floating point */ + TEST_R_OP_RR(sfmin, SF_one, SF_small_neg, SF_small_neg, USR_CLEAR); + TEST_R_OP_RR(sfmin, SF_one, SF_SNaN, SF_one, USR_FPINVF); + TEST_R_OP_RR(sfmin, SF_SNaN, SF_one, SF_one, USR_FPINVF); + TEST_R_OP_RR(sfmin, SF_one, SF_QNaN, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmin, SF_QNaN, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmin, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfmin, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sfmax, SF_one, SF_small_neg, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmax, SF_one, SF_SNaN, SF_one, USR_FPINVF); + TEST_R_OP_RR(sfmax, SF_SNaN, SF_one, SF_one, USR_FPINVF); + TEST_R_OP_RR(sfmax, SF_one, SF_QNaN, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmax, SF_QNaN, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmax, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfmax, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sfadd, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sfadd, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfadd, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfadd, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sfsub, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sfsub, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfsub, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfsub, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sfmpy, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sfmpy, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfmpy, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfmpy, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_XR_OP_RR(sffma, SF_one, SF_one, SF_one, SF_two, USR_CLEAR); + TEST_XR_OP_RR(sffma, SF_zero, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_XR_OP_RR(sffma, SF_zero, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_XR_OP_RR(sffma, SF_zero, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_XR_OP_RR(sffma, SF_zero, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_XR_OP_RR(sffms, SF_one, SF_one, SF_one, SF_zero, USR_CLEAR); + TEST_XR_OP_RR(sffms, SF_zero, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_XR_OP_RR(sffms, SF_zero, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_XR_OP_RR(sffms, SF_zero, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_XR_OP_RR(sffms, SF_zero, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_CMP_RR(sfcmpuo, SF_one, SF_large_pos, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_INF, SF_large_pos, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_QNaN, SF_large_pos, 0xff, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_QNaN_neg, SF_large_pos, 0xff, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_SNaN, SF_large_pos, 0xff, USR_FPINVF); + TEST_CMP_RR(sfcmpuo, SF_SNaN_neg, SF_large_pos, 0xff, USR_FPINVF); + TEST_CMP_RR(sfcmpuo, SF_QNaN, SF_QNaN, 0xff, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_QNaN, SF_SNaN, 0xff, USR_FPINVF); + + TEST_CMP_RR(sfcmpeq, SF_one, SF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpeq, SF_one, SF_SNaN, 0x00, USR_FPINVF); + TEST_CMP_RR(sfcmpgt, SF_one, SF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpgt, SF_one, SF_SNaN, 0x00, USR_FPINVF); + TEST_CMP_RR(sfcmpge, SF_one, SF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpge, SF_one, SF_SNaN, 0x00, USR_FPINVF); + + TEST_P_OP_PP(dfadd, DF_any, DF_QNaN, DF_HEX_NaN, USR_CLEAR); + TEST_P_OP_PP(dfadd, DF_any, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfadd, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfadd, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); + + TEST_P_OP_PP(dfsub, DF_any, DF_QNaN, DF_HEX_NaN, USR_CLEAR); + TEST_P_OP_PP(dfsub, DF_any, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfsub, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfsub, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); + +#if CORE_IS_V67 + TEST_P_OP_PP(dfmin, DF_any, DF_small_neg, DF_small_neg, USR_CLEAR); + TEST_P_OP_PP(dfmin, DF_any, DF_SNaN, DF_any, USR_FPINVF); + TEST_P_OP_PP(dfmin, DF_SNaN, DF_any, DF_any, USR_FPINVF); + TEST_P_OP_PP(dfmin, DF_any, DF_QNaN, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmin, DF_QNaN, DF_any, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmin, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfmin, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + + TEST_P_OP_PP(dfmax, DF_any, DF_small_neg, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmax, DF_any, DF_SNaN, DF_any, USR_FPINVF); + TEST_P_OP_PP(dfmax, DF_SNaN, DF_any, DF_any, USR_FPINVF); + TEST_P_OP_PP(dfmax, DF_any, DF_QNaN, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmax, DF_QNaN, DF_any, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmax, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfmax, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + + TEST_XP_OP_PP(dfmpyhh, DF_one, DF_one, DF_one, DF_one_hh, USR_CLEAR); + TEST_XP_OP_PP(dfmpyhh, DF_zero, DF_any, DF_QNaN, DF_HEX_NaN, USR_CLEAR); + TEST_XP_OP_PP(dfmpyhh, DF_zero, DF_any, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_XP_OP_PP(dfmpyhh, DF_zero, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_XP_OP_PP(dfmpyhh, DF_zero, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); +#else + printf("v67 instructions skipped\n"); +#endif + + TEST_CMP_PP(dfcmpuo, DF_small_neg, DF_any, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_large_pos, DF_any, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_QNaN, DF_any, 0xff, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_QNaN_neg, DF_any, 0xff, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_SNaN, DF_any, 0xff, USR_FPINVF); + TEST_CMP_PP(dfcmpuo, DF_SNaN_neg, DF_any, 0xff, USR_FPINVF); + TEST_CMP_PP(dfcmpuo, DF_QNaN, DF_QNaN, 0xff, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_QNaN, DF_SNaN, 0xff, USR_FPINVF); + + TEST_CMP_PP(dfcmpeq, DF_any, DF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpeq, DF_any, DF_SNaN, 0x00, USR_FPINVF); + TEST_CMP_PP(dfcmpgt, DF_any, DF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpgt, DF_any, DF_SNaN, 0x00, USR_FPINVF); + TEST_CMP_PP(dfcmpge, DF_any, DF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpge, DF_any, DF_SNaN, 0x00, USR_FPINVF); + + TEST_P_OP_R(conv_sf2df, SF_QNaN, DF_HEX_NaN, USR_CLEAR); + TEST_P_OP_R(conv_sf2df, SF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_R_OP_R(conv_sf2uw, SF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2uw, SF_SNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2w, SF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2w, SF_SNaN, 0xffffffff, USR_FPINVF); + TEST_P_OP_R(conv_sf2ud, SF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2ud, SF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2d, SF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2d, SF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_R_OP_R(conv_sf2uw_chop, SF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2uw_chop, SF_SNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2w_chop, SF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2w_chop, SF_SNaN, 0xffffffff, USR_FPINVF); + TEST_P_OP_R(conv_sf2ud_chop, SF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2ud_chop, SF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2d_chop, SF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2d_chop, SF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + + TEST_R_OP_P(conv_df2sf, DF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_P(conv_df2sf, DF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_P(conv_df2uw, DF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2uw, DF_SNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2w, DF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2w, DF_SNaN, 0xffffffff, USR_FPINVF); + TEST_P_OP_P(conv_df2ud, DF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2ud, DF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2d, DF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2d, DF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_R_OP_P(conv_df2uw_chop, DF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2uw_chop, DF_SNaN, 0xffffffff, USR_FPINVF); + + /* Test for typo in HELPER(conv_df2uw_chop) */ + TEST_R_OP_P(conv_df2uw_chop, 0xffffff7f00000001ULL, 0xffffffff, USR_FPINVF); + + TEST_R_OP_P(conv_df2w_chop, DF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2w_chop, DF_SNaN, 0xffffffff, USR_FPINVF); + TEST_P_OP_P(conv_df2ud_chop, DF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2ud_chop, DF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2d_chop, DF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2d_chop, DF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + + TEST_R_OP_R(conv_uw2sf, 0x00000001, SF_one, USR_CLEAR); + TEST_R_OP_R(conv_uw2sf, 0x010020a5, 0x4b801052, USR_FPINPF); + TEST_R_OP_R(conv_w2sf, 0x00000001, SF_one, USR_CLEAR); + TEST_R_OP_R(conv_w2sf, 0x010020a5, 0x4b801052, USR_FPINPF); + TEST_R_OP_P(conv_ud2sf, 0x0000000000000001ULL, SF_one, USR_CLEAR); + TEST_R_OP_P(conv_ud2sf, 0x00000000010020a5ULL, 0x4b801052, USR_FPINPF); + TEST_R_OP_P(conv_d2sf, 0x0000000000000001ULL, SF_one, USR_CLEAR); + TEST_R_OP_P(conv_d2sf, 0x00000000010020a5ULL, 0x4b801052, USR_FPINPF); + + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_one, SF_one, 1, SF_four, + USR_CLEAR); + TEST_XR_OP_RRp(sffma_sc, SF_QNaN, SF_one, SF_one, 1, SF_HEX_NaN, + USR_CLEAR); + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_QNaN, SF_one, 1, SF_HEX_NaN, + USR_CLEAR); + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_one, SF_QNaN, 1, SF_HEX_NaN, + USR_CLEAR); + TEST_XR_OP_RRp(sffma_sc, SF_SNaN, SF_one, SF_one, 1, SF_HEX_NaN, + USR_FPINVF); + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_SNaN, SF_one, 1, SF_HEX_NaN, + USR_FPINVF); + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_one, SF_SNaN, 1, SF_HEX_NaN, + USR_FPINVF); + + TEST_Rp_OP_RR(sfrecipa, SF_one, SF_one, SF_one_recip, 0x00, + USR_CLEAR); + TEST_Rp_OP_RR(sfrecipa, SF_QNaN, SF_one, SF_HEX_NaN, 0x00, + USR_CLEAR); + TEST_Rp_OP_RR(sfrecipa, SF_one, SF_QNaN, SF_HEX_NaN, 0x00, + USR_CLEAR); + TEST_Rp_OP_RR(sfrecipa, SF_one, SF_SNaN, SF_HEX_NaN, 0x00, + USR_FPINVF); + TEST_Rp_OP_RR(sfrecipa, SF_SNaN, SF_one, SF_HEX_NaN, 0x00, + USR_FPINVF); + + TEST_R_OP_RR(sffixupn, SF_one, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_RR(sffixupn, SF_QNaN, SF_one, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sffixupn, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sffixupn, SF_SNaN, SF_one, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sffixupn, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sffixupd, SF_one, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_RR(sffixupd, SF_QNaN, SF_one, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sffixupd, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sffixupd, SF_SNaN, SF_one, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sffixupd, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_R(sffixupr, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_R(sffixupr, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_R(sffixupr, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_Rp_OP_R(sfinvsqrta, SF_one, SF_one_invsqrta, 0x00, USR_CLEAR); + TEST_Rp_OP_R(sfinvsqrta, SF_zero, SF_one, 0x00, USR_CLEAR); + TEST_Rp_OP_R(sfinvsqrta, SF_QNaN, SF_HEX_NaN, 0x00, USR_CLEAR); + TEST_Rp_OP_R(sfinvsqrta, SF_small_neg, SF_HEX_NaN, 0x00, USR_FPINVF); + TEST_Rp_OP_R(sfinvsqrta, SF_SNaN, SF_HEX_NaN, 0x00, USR_FPINVF); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/ppc64/Makefile.target b/tests/tcg/ppc64/Makefile.target index c9498053df..8197c288a7 100644 --- a/tests/tcg/ppc64/Makefile.target +++ b/tests/tcg/ppc64/Makefile.target @@ -27,5 +27,6 @@ run-sha512-vector: QEMU_OPTS+=-cpu POWER10 run-plugin-sha512-vector-with-%: QEMU_OPTS+=-cpu POWER10 PPC64_TESTS += signal_save_restore_xer +PPC64_TESTS += xxspltw TESTS += $(PPC64_TESTS) diff --git a/tests/tcg/ppc64le/Makefile.target b/tests/tcg/ppc64le/Makefile.target index 12d85e946b..9624bb1e9c 100644 --- a/tests/tcg/ppc64le/Makefile.target +++ b/tests/tcg/ppc64le/Makefile.target @@ -25,5 +25,6 @@ run-plugin-sha512-vector-with-%: QEMU_OPTS+=-cpu POWER10 PPC64LE_TESTS += mtfsf PPC64LE_TESTS += signal_save_restore_xer +PPC64LE_TESTS += xxspltw TESTS += $(PPC64LE_TESTS) diff --git a/tests/tcg/ppc64le/xxspltw.c b/tests/tcg/ppc64le/xxspltw.c new file mode 100644 index 0000000000..4cff78bfdc --- /dev/null +++ b/tests/tcg/ppc64le/xxspltw.c @@ -0,0 +1,46 @@ +#include <stdio.h> +#include <stdint.h> +#include <inttypes.h> +#include <assert.h> + +#define WORD_A 0xAAAAAAAAUL +#define WORD_B 0xBBBBBBBBUL +#define WORD_C 0xCCCCCCCCUL +#define WORD_D 0xDDDDDDDDUL + +#define DWORD_HI (WORD_A << 32 | WORD_B) +#define DWORD_LO (WORD_C << 32 | WORD_D) + +#define TEST(HI, LO, UIM, RES) \ + do { \ + union { \ + uint64_t u; \ + double f; \ + } h = { .u = HI }, l = { .u = LO }; \ + /* \ + * Use a pair of FPRs to load the VSR avoiding insns \ + * newer than xxswapd. \ + */ \ + asm("xxmrghd 32, %0, %1\n\t" \ + "xxspltw 32, 32, %2\n\t" \ + "xxmrghd %0, 32, %0\n\t" \ + "xxswapd 32, 32\n\t" \ + "xxmrghd %1, 32, %1\n\t" \ + : "+f" (h.f), "+f" (l.f) \ + : "i" (UIM) \ + : "v0"); \ + printf("xxspltw(0x%016" PRIx64 "%016" PRIx64 ", %d) =" \ + " %016" PRIx64 "%016" PRIx64 "\n", HI, LO, UIM, \ + h.u, l.u); \ + assert(h.u == (RES)); \ + assert(l.u == (RES)); \ + } while (0) + +int main(void) +{ + TEST(DWORD_HI, DWORD_LO, 0, WORD_A << 32 | WORD_A); + TEST(DWORD_HI, DWORD_LO, 1, WORD_B << 32 | WORD_B); + TEST(DWORD_HI, DWORD_LO, 2, WORD_C << 32 | WORD_C); + TEST(DWORD_HI, DWORD_LO, 3, WORD_D << 32 | WORD_D); + return 0; +} diff --git a/util/iova-tree.c b/util/iova-tree.c index 23ea35b7a4..6dff29c1f6 100644 --- a/util/iova-tree.c +++ b/util/iova-tree.c @@ -16,6 +16,45 @@ struct IOVATree { GTree *tree; }; +/* Args to pass to iova_tree_alloc foreach function. */ +struct IOVATreeAllocArgs { + /* Size of the desired allocation */ + size_t new_size; + + /* The minimum address allowed in the allocation */ + hwaddr iova_begin; + + /* Map at the left of the hole, can be NULL if "this" is first one */ + const DMAMap *prev; + + /* Map at the right of the hole, can be NULL if "prev" is the last one */ + const DMAMap *this; + + /* If found, we fill in the IOVA here */ + hwaddr iova_result; + + /* Whether have we found a valid IOVA */ + bool iova_found; +}; + +typedef struct IOVATreeFindIOVAArgs { + const DMAMap *needle; + const DMAMap *result; +} IOVATreeFindIOVAArgs; + +/** + * Iterate args to the next hole + * + * @args: The alloc arguments + * @next: The next mapping in the tree. Can be NULL to signal the last one + */ +static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args, + const DMAMap *next) +{ + args->prev = args->this; + args->this = next; +} + static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data) { const DMAMap *m1 = a, *m2 = b; @@ -47,6 +86,35 @@ const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map) return g_tree_lookup(tree->tree, map); } +static gboolean iova_tree_find_address_iterator(gpointer key, gpointer value, + gpointer data) +{ + const DMAMap *map = key; + IOVATreeFindIOVAArgs *args = data; + const DMAMap *needle; + + g_assert(key == value); + + needle = args->needle; + if (map->translated_addr + map->size < needle->translated_addr || + needle->translated_addr + needle->size < map->translated_addr) { + return false; + } + + args->result = map; + return true; +} + +const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map) +{ + IOVATreeFindIOVAArgs args = { + .needle = map, + }; + + g_tree_foreach(tree->tree, iova_tree_find_address_iterator, &args); + return args.result; +} + const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova) { const DMAMap map = { .iova = iova, .size = 0 }; @@ -107,6 +175,108 @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map) return IOVA_OK; } +/** + * Try to find an unallocated IOVA range between prev and this elements. + * + * @args: Arguments to allocation + * + * Cases: + * + * (1) !prev, !this: No entries allocated, always succeed + * + * (2) !prev, this: We're iterating at the 1st element. + * + * (3) prev, !this: We're iterating at the last element. + * + * (4) prev, this: this is the most common case, we'll try to find a hole + * between "prev" and "this" mapping. + * + * Note that this function assumes the last valid iova is HWADDR_MAX, but it + * searches linearly so it's easy to discard the result if it's not the case. + */ +static void iova_tree_alloc_map_in_hole(struct IOVATreeAllocArgs *args) +{ + const DMAMap *prev = args->prev, *this = args->this; + uint64_t hole_start, hole_last; + + if (this && this->iova + this->size < args->iova_begin) { + return; + } + + hole_start = MAX(prev ? prev->iova + prev->size + 1 : 0, args->iova_begin); + hole_last = this ? this->iova : HWADDR_MAX; + + if (hole_last - hole_start > args->new_size) { + args->iova_result = hole_start; + args->iova_found = true; + } +} + +/** + * Foreach dma node in the tree, compare if there is a hole with its previous + * node (or minimum iova address allowed) and the node. + * + * @key: Node iterating + * @value: Node iterating + * @pargs: Struct to communicate with the outside world + * + * Return: false to keep iterating, true if needs break. + */ +static gboolean iova_tree_alloc_traverse(gpointer key, gpointer value, + gpointer pargs) +{ + struct IOVATreeAllocArgs *args = pargs; + DMAMap *node = value; + + assert(key == value); + + iova_tree_alloc_args_iterate(args, node); + iova_tree_alloc_map_in_hole(args); + return args->iova_found; +} + +int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin, + hwaddr iova_last) +{ + struct IOVATreeAllocArgs args = { + .new_size = map->size, + .iova_begin = iova_begin, + }; + + if (unlikely(iova_last < iova_begin)) { + return IOVA_ERR_INVALID; + } + + /* + * Find a valid hole for the mapping + * + * Assuming low iova_begin, so no need to do a binary search to + * locate the first node. + * + * TODO: Replace all this with g_tree_node_first/next/last when available + * (from glib since 2.68). To do it with g_tree_foreach complicates the + * code a lot. + * + */ + g_tree_foreach(tree->tree, iova_tree_alloc_traverse, &args); + if (!args.iova_found) { + /* + * Either tree is empty or the last hole is still not checked. + * g_tree_foreach does not compare (last, iova_last] range, so we check + * it here. + */ + iova_tree_alloc_args_iterate(&args, NULL); + iova_tree_alloc_map_in_hole(&args); + } + + if (!args.iova_found || args.iova_result + map->size > iova_last) { + return IOVA_ERR_NOMEM; + } + + map->iova = args.iova_result; + return iova_tree_insert(tree, map); +} + void iova_tree_destroy(IOVATree *tree) { g_tree_destroy(tree->tree); |