diff options
56 files changed, 1604 insertions, 717 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 0886eb3d2b..5a22c8be42 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -894,7 +894,7 @@ F: hw/misc/zynq* F: include/hw/misc/zynq* X: hw/ssi/xilinx_* -Xilinx ZynqMP +Xilinx ZynqMP and Versal M: Alistair Francis <alistair@alistair23.me> M: Edgar E. Iglesias <edgar.iglesias@gmail.com> M: Peter Maydell <peter.maydell@linaro.org> @@ -905,6 +905,7 @@ F: include/hw/*/xlnx*.h F: include/hw/ssi/xilinx_spips.h F: hw/display/dpcd.c F: include/hw/display/dpcd.h +F: docs/system/arm/xlnx-versal-virt.rst ARM ACPI Subsystem M: Shannon Zhao <shannon.zhaosl@gmail.com> diff --git a/docs/specs/index.rst b/docs/specs/index.rst index 426632a475..1b0eb979d5 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -12,6 +12,7 @@ Contents: ppc-xive ppc-spapr-xive + ppc-spapr-numa acpi_hw_reduced_hotplug tpm acpi_hest_ghes diff --git a/docs/specs/ppc-spapr-numa.rst b/docs/specs/ppc-spapr-numa.rst new file mode 100644 index 0000000000..e762038022 --- /dev/null +++ b/docs/specs/ppc-spapr-numa.rst @@ -0,0 +1,191 @@ + +NUMA mechanics for sPAPR (pseries machines) +============================================ + +NUMA in sPAPR works different than the System Locality Distance +Information Table (SLIT) in ACPI. The logic is explained in the LOPAPR +1.1 chapter 15, "Non Uniform Memory Access (NUMA) Option". This +document aims to complement this specification, providing details +of the elements that impacts how QEMU views NUMA in pseries. + +Associativity and ibm,associativity property +-------------------------------------------- + +Associativity is defined as a group of platform resources that has +similar mean performance (or in our context here, distance) relative to +everyone else outside of the group. + +The format of the ibm,associativity property varies with the value of +bit 0 of byte 5 of the ibm,architecture-vec-5 property. The format with +bit 0 equal to zero is deprecated. The current format, with the bit 0 +with the value of one, makes ibm,associativity property represent the +physical hierarchy of the platform, as one or more lists that starts +with the highest level grouping up to the smallest. Considering the +following topology: + +:: + + Mem M1 ---- Proc P1 | + ----------------- | Socket S1 ---| + chip C1 | | + | HW module 1 (MOD1) + Mem M2 ---- Proc P2 | | + ----------------- | Socket S2 ---| + chip C2 | + +The ibm,associativity property for the processors would be: + +* P1: {MOD1, S1, C1, P1} +* P2: {MOD1, S2, C2, P2} + +Each allocable resource has an ibm,associativity property. The LOPAPR +specification allows multiple lists to be present in this property, +considering that the same resource can have multiple connections to the +platform. + +Relative Performance Distance and ibm,associativity-reference-points +-------------------------------------------------------------------- + +The ibm,associativity-reference-points property is an array that is used +to define the relevant performance/distance related boundaries, defining +the NUMA levels for the platform. + +The definition of its elements also varies with the value of bit 0 of byte 5 +of the ibm,architecture-vec-5 property. The format with bit 0 equal to zero +is also deprecated. With the current format, each integer of the +ibm,associativity-reference-points represents an 1 based ordinal index (i.e. +the first element is 1) of the ibm,associativity array. The first +boundary is the most significant to application performance, followed by +less significant boundaries. Allocated resources that belongs to the +same performance boundaries are expected to have relative NUMA distance +that matches the relevancy of the boundary itself. Resources that belongs +to the same first boundary will have the shortest distance from each +other. Subsequent boundaries represents greater distances and degraded +performance. + +Using the previous example, the following setting reference points defines +three NUMA levels: + +* ibm,associativity-reference-points = {0x3, 0x2, 0x1} + +The first NUMA level (0x3) is interpreted as the third element of each +ibm,associativity array, the second level is the second element and +the third level is the first element. Let's also consider that elements +belonging to the first NUMA level have distance equal to 10 from each +other, and each NUMA level doubles the distance from the previous. This +means that the second would be 20 and the third level 40. For the P1 and +P2 processors, we would have the following NUMA levels: + +:: + + * ibm,associativity-reference-points = {0x3, 0x2, 0x1} + + * P1: associativity{MOD1, S1, C1, P1} + + First NUMA level (0x3) => associativity[2] = C1 + Second NUMA level (0x2) => associativity[1] = S1 + Third NUMA level (0x1) => associativity[0] = MOD1 + + * P2: associativity{MOD1, S2, C2, P2} + + First NUMA level (0x3) => associativity[2] = C2 + Second NUMA level (0x2) => associativity[1] = S2 + Third NUMA level (0x1) => associativity[0] = MOD1 + + P1 and P2 have the same third NUMA level, MOD1: Distance between them = 40 + +Changing the ibm,associativity-reference-points array changes the performance +distance attributes for the same associativity arrays, as the following +example illustrates: + +:: + + * ibm,associativity-reference-points = {0x2} + + * P1: associativity{MOD1, S1, C1, P1} + + First NUMA level (0x2) => associativity[1] = S1 + + * P2: associativity{MOD1, S2, C2, P2} + + First NUMA level (0x2) => associativity[1] = S2 + + P1 and P2 does not have a common performance boundary. Since this is a one level + NUMA configuration, distance between them is one boundary above the first + level, 20. + + +In a hypothetical platform where all resources inside the same hardware module +is considered to be on the same performance boundary: + +:: + + * ibm,associativity-reference-points = {0x1} + + * P1: associativity{MOD1, S1, C1, P1} + + First NUMA level (0x1) => associativity[0] = MOD0 + + * P2: associativity{MOD1, S2, C2, P2} + + First NUMA level (0x1) => associativity[0] = MOD0 + + P1 and P2 belongs to the same first order boundary. The distance between then + is 10. + + +How the pseries Linux guest calculates NUMA distances +===================================================== + +Another key difference between ACPI SLIT and the LOPAPR regarding NUMA is +how the distances are expressed. The SLIT table provides the NUMA distance +value between the relevant resources. LOPAPR does not provide a standard +way to calculate it. We have the ibm,associativity for each resource, which +provides a common-performance hierarchy, and the ibm,associativity-reference-points +array that tells which level of associativity is considered to be relevant +or not. + +The result is that each OS is free to implement and to interpret the distance +as it sees fit. For the pseries Linux guest, each level of NUMA duplicates +the distance of the previous level, and the maximum amount of levels is +limited to MAX_DISTANCE_REF_POINTS = 4 (from arch/powerpc/mm/numa.c in the +kernel tree). This results in the following distances: + +* both resources in the first NUMA level: 10 +* resources one NUMA level apart: 20 +* resources two NUMA levels apart: 40 +* resources three NUMA levels apart: 80 +* resources four NUMA levels apart: 160 + + +Consequences for QEMU NUMA tuning +--------------------------------- + +The way the pseries Linux guest calculates NUMA distances has a direct effect +on what QEMU users can expect when doing NUMA tuning. As of QEMU 5.1, this is +the default ibm,associativity-reference-points being used in the pseries +machine: + +ibm,associativity-reference-points = {0x4, 0x4, 0x2} + +The first and second level are equal, 0x4, and a third one was added in +commit a6030d7e0b35 exclusively for NVLink GPUs support. This means that +regardless of how the ibm,associativity properties are being created in +the device tree, the pseries Linux guest will only recognize three scenarios +as far as NUMA distance goes: + +* if the resources belongs to the same first NUMA level = 10 +* second level is skipped since it's equal to the first +* all resources that aren't a NVLink GPU, it is guaranteed that they will belong + to the same third NUMA level, having distance = 40 +* for NVLink GPUs, distance = 80 from everything else + +In short, we can summarize the NUMA distances seem in pseries Linux guests, using +QEMU up to 5.1, as follows: + +* local distance, i.e. the distance of the resource to its own NUMA node: 10 +* if it's a NVLink GPU device, distance: 80 +* every other resource, distance: 40 + +This also means that user input in QEMU command line does not change the +NUMA distancing inside the guest for the pseries machine. diff --git a/docs/specs/ppc-spapr-xive.rst b/docs/specs/ppc-spapr-xive.rst index 6159bc6eed..7144347560 100644 --- a/docs/specs/ppc-spapr-xive.rst +++ b/docs/specs/ppc-spapr-xive.rst @@ -61,6 +61,11 @@ depend on the XIVE KVM capability of the host. On older kernels without XIVE KVM support, QEMU will use the emulated XIVE device as a fallback and on newer kernels (>=5.2), the KVM XIVE device. +XIVE native exploitation mode is not supported for KVM nested guests, +VMs running under a L1 hypervisor (KVM on pSeries). In that case, the +hypervisor will not advertise the KVM capability and QEMU will use the +emulated XIVE device, same as for older versions of KVM. + As a final refinement, the user can also switch the use of the KVM device with the machine option ``kernel_irqchip``. @@ -121,6 +126,9 @@ xics XICS KVM XICS emul. XICS KVM (1) QEMU warns with ``warning: kernel_irqchip requested but unavailable: IRQ_XIVE capability must be present for KVM`` + In some cases (old host kernels or KVM nested guests), one may hit a + QEMU/KVM incompatibility due to device destruction in reset. QEMU fails + with ``KVM is incompatible with ic-mode=dual,kernel-irqchip=on`` (2) QEMU fails with ``kernel_irqchip requested but unavailable: IRQ_XIVE capability must be present for KVM`` @@ -143,7 +151,7 @@ xics XICS KVM XICS emul. XICS KVM mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual`` (4) QEMU/KVM incompatibility due to device destruction in reset. QEMU fails - with ``KVM is too old to support ic-mode=dual,kernel-irqchip=on`` + with ``KVM is incompatible with ic-mode=dual,kernel-irqchip=on`` XIVE Device tree properties diff --git a/docs/system/arm/xlnx-versal-virt.rst b/docs/system/arm/xlnx-versal-virt.rst new file mode 100644 index 0000000000..2602d0f995 --- /dev/null +++ b/docs/system/arm/xlnx-versal-virt.rst @@ -0,0 +1,176 @@ +Xilinx Versal Virt (``xlnx-versal-virt``) +========================================= + +Xilinx Versal is a family of heterogeneous multi-core SoCs +(System on Chip) that combine traditional hardened CPUs and I/O +peripherals in a Processing System (PS) with runtime programmable +FPGA logic (PL) and an Artificial Intelligence Engine (AIE). + +More details here: +https://www.xilinx.com/products/silicon-devices/acap/versal.html + +The family of Versal SoCs share a single architecture but come in +different parts with different speed grades, amounts of PL and +other differences. + +The Xilinx Versal Virt board in QEMU is a model of a virtual board +(does not exist in reality) with a virtual Versal SoC without I/O +limitations. Currently, we support the following cores and devices: + +Implemented CPU cores: + +- 2 ACPUs (ARM Cortex-A72) + +Implemented devices: + +- Interrupt controller (ARM GICv3) +- 2 UARTs (ARM PL011) +- An RTC (Versal built-in) +- 2 GEMs (Cadence MACB Ethernet MACs) +- 8 ADMA (Xilinx zDMA) channels +- 2 SD Controllers +- OCM (256KB of On Chip Memory) +- DDR memory + +QEMU does not yet model any other devices, including the PL and the AI Engine. + +Other differences between the hardware and the QEMU model: + +- QEMU allows the amount of DDR memory provided to be specified with the + ``-m`` argument. If a DTB is provided on the command line then QEMU will + edit it to include suitable entries describing the Versal DDR memory ranges. + +- QEMU provides 8 virtio-mmio virtio transports; these start at + address ``0xa0000000`` and have IRQs from 111 and upwards. + +Running +""""""" +If the user provides an Operating System to be loaded, we expect users +to use the ``-kernel`` command line option. + +Users can load firmware or boot-loaders with the ``-device loader`` options. + +When loading an OS, QEMU generates a DTB and selects an appropriate address +where it gets loaded. This DTB will be passed to the kernel in register x0. + +If there's no ``-kernel`` option, we generate a DTB and place it at 0x1000 +for boot-loaders or firmware to pick it up. + +If users want to provide their own DTB, they can use the ``-dtb`` option. +These DTBs will have their memory nodes modified to match QEMU's +selected ram_size option before they get passed to the kernel or FW. + +When loading an OS, we turn on QEMU's PSCI implementation with SMC +as the PSCI conduit. When there's no ``-kernel`` option, we assume the user +provides EL3 firmware to handle PSCI. + +A few examples: + +Direct Linux boot of a generic ARM64 upstream Linux kernel: + +.. code-block:: bash + + $ qemu-system-aarch64 -M xlnx-versal-virt -m 2G \ + -serial mon:stdio -display none \ + -kernel arch/arm64/boot/Image \ + -nic user -nic user \ + -device virtio-rng-device,bus=virtio-mmio-bus.0 \ + -drive if=none,index=0,file=hd0.qcow2,id=hd0,snapshot \ + -drive file=qemu_sd.qcow2,if=sd,index=0,snapshot \ + -device virtio-blk-device,drive=hd0 -append root=/dev/vda + +Direct Linux boot of PetaLinux 2019.2: + +.. code-block:: bash + + $ qemu-system-aarch64 -M xlnx-versal-virt -m 2G \ + -serial mon:stdio -display none \ + -kernel petalinux-v2019.2/Image \ + -append "rdinit=/sbin/init console=ttyAMA0,115200n8 earlycon=pl011,mmio,0xFF000000,115200n8" \ + -net nic,model=cadence_gem,netdev=net0 -netdev user,id=net0 \ + -device virtio-rng-device,bus=virtio-mmio-bus.0,rng=rng0 \ + -object rng-random,filename=/dev/urandom,id=rng0 + +Boot PetaLinux 2019.2 via ARM Trusted Firmware (2018.3 because the 2019.2 +version of ATF tries to configure the CCI which we don't model) and U-boot: + +.. code-block:: bash + + $ qemu-system-aarch64 -M xlnx-versal-virt -m 2G \ + -serial stdio -display none \ + -device loader,file=petalinux-v2018.3/bl31.elf,cpu-num=0 \ + -device loader,file=petalinux-v2019.2/u-boot.elf \ + -device loader,addr=0x20000000,file=petalinux-v2019.2/Image \ + -nic user -nic user \ + -device virtio-rng-device,bus=virtio-mmio-bus.0,rng=rng0 \ + -object rng-random,filename=/dev/urandom,id=rng0 + +Run the following at the U-Boot prompt: + +.. code-block:: bash + + Versal> + fdt addr $fdtcontroladdr + fdt move $fdtcontroladdr 0x40000000 + fdt set /timer clock-frequency <0x3dfd240> + setenv bootargs "rdinit=/sbin/init maxcpus=1 console=ttyAMA0,115200n8 earlycon=pl011,mmio,0xFF000000,115200n8" + booti 20000000 - 40000000 + fdt addr $fdtcontroladdr + +Boot Linux as DOM0 on Xen via U-Boot: + +.. code-block:: bash + + $ qemu-system-aarch64 -M xlnx-versal-virt -m 4G \ + -serial stdio -display none \ + -device loader,file=petalinux-v2019.2/u-boot.elf,cpu-num=0 \ + -device loader,addr=0x30000000,file=linux/2018-04-24/xen \ + -device loader,addr=0x40000000,file=petalinux-v2019.2/Image \ + -nic user -nic user \ + -device virtio-rng-device,bus=virtio-mmio-bus.0,rng=rng0 \ + -object rng-random,filename=/dev/urandom,id=rng0 + +Run the following at the U-Boot prompt: + +.. code-block:: bash + + Versal> + fdt addr $fdtcontroladdr + fdt move $fdtcontroladdr 0x20000000 + fdt set /timer clock-frequency <0x3dfd240> + fdt set /chosen xen,xen-bootargs "console=dtuart dtuart=/uart@ff000000 dom0_mem=640M bootscrub=0 maxcpus=1 timer_slop=0" + fdt set /chosen xen,dom0-bootargs "rdinit=/sbin/init clk_ignore_unused console=hvc0 maxcpus=1" + fdt mknode /chosen dom0 + fdt set /chosen/dom0 compatible "xen,multiboot-module" + fdt set /chosen/dom0 reg <0x00000000 0x40000000 0x0 0x03100000> + booti 30000000 - 20000000 + +Boot Linux as Dom0 on Xen via ARM Trusted Firmware and U-Boot: + +.. code-block:: bash + + $ qemu-system-aarch64 -M xlnx-versal-virt -m 4G \ + -serial stdio -display none \ + -device loader,file=petalinux-v2018.3/bl31.elf,cpu-num=0 \ + -device loader,file=petalinux-v2019.2/u-boot.elf \ + -device loader,addr=0x30000000,file=linux/2018-04-24/xen \ + -device loader,addr=0x40000000,file=petalinux-v2019.2/Image \ + -nic user -nic user \ + -device virtio-rng-device,bus=virtio-mmio-bus.0,rng=rng0 \ + -object rng-random,filename=/dev/urandom,id=rng0 + +Run the following at the U-Boot prompt: + +.. code-block:: bash + + Versal> + fdt addr $fdtcontroladdr + fdt move $fdtcontroladdr 0x20000000 + fdt set /timer clock-frequency <0x3dfd240> + fdt set /chosen xen,xen-bootargs "console=dtuart dtuart=/uart@ff000000 dom0_mem=640M bootscrub=0 maxcpus=1 timer_slop=0" + fdt set /chosen xen,dom0-bootargs "rdinit=/sbin/init clk_ignore_unused console=hvc0 maxcpus=1" + fdt mknode /chosen dom0 + fdt set /chosen/dom0 compatible "xen,multiboot-module" + fdt set /chosen/dom0 reg <0x00000000 0x40000000 0x0 0x03100000> + booti 30000000 - 20000000 + diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index 4c5b0e4aab..afdb37e738 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -93,6 +93,7 @@ undocumented; you can get a complete list by running arm/sx1 arm/stellaris arm/virt + arm/xlnx-versal-virt Arm CPU features ================ diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index e13a5f4a7c..3838db1395 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -32,6 +32,91 @@ /* IOTLB Management */ +static guint smmu_iotlb_key_hash(gconstpointer v) +{ + SMMUIOTLBKey *key = (SMMUIOTLBKey *)v; + uint32_t a, b, c; + + /* Jenkins hash */ + a = b = c = JHASH_INITVAL + sizeof(*key); + a += key->asid + key->level + key->tg; + b += extract64(key->iova, 0, 32); + c += extract64(key->iova, 32, 32); + + __jhash_mix(a, b, c); + __jhash_final(a, b, c); + + return c; +} + +static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) +{ + SMMUIOTLBKey *k1 = (SMMUIOTLBKey *)v1, *k2 = (SMMUIOTLBKey *)v2; + + return (k1->asid == k2->asid) && (k1->iova == k2->iova) && + (k1->level == k2->level) && (k1->tg == k2->tg); +} + +SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova, + uint8_t tg, uint8_t level) +{ + SMMUIOTLBKey key = {.asid = asid, .iova = iova, .tg = tg, .level = level}; + + return key; +} + +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, + SMMUTransTableInfo *tt, hwaddr iova) +{ + uint8_t tg = (tt->granule_sz - 10) / 2; + uint8_t inputsize = 64 - tt->tsz; + uint8_t stride = tt->granule_sz - 3; + uint8_t level = 4 - (inputsize - 4) / stride; + SMMUTLBEntry *entry = NULL; + + while (level <= 3) { + uint64_t subpage_size = 1ULL << level_shift(level, tt->granule_sz); + uint64_t mask = subpage_size - 1; + SMMUIOTLBKey key; + + key = smmu_get_iotlb_key(cfg->asid, iova & ~mask, tg, level); + entry = g_hash_table_lookup(bs->iotlb, &key); + if (entry) { + break; + } + level++; + } + + if (entry) { + cfg->iotlb_hits++; + trace_smmu_iotlb_lookup_hit(cfg->asid, iova, + cfg->iotlb_hits, cfg->iotlb_misses, + 100 * cfg->iotlb_hits / + (cfg->iotlb_hits + cfg->iotlb_misses)); + } else { + cfg->iotlb_misses++; + trace_smmu_iotlb_lookup_miss(cfg->asid, iova, + cfg->iotlb_hits, cfg->iotlb_misses, + 100 * cfg->iotlb_hits / + (cfg->iotlb_hits + cfg->iotlb_misses)); + } + return entry; +} + +void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new) +{ + SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1); + uint8_t tg = (new->granule - 10) / 2; + + if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { + smmu_iotlb_inv_all(bs); + } + + *key = smmu_get_iotlb_key(cfg->asid, new->entry.iova, tg, new->level); + trace_smmu_iotlb_insert(cfg->asid, new->entry.iova, tg, new->level); + g_hash_table_insert(bs->iotlb, key, new); +} + inline void smmu_iotlb_inv_all(SMMUState *s) { trace_smmu_iotlb_inv_all(); @@ -44,15 +129,44 @@ static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value, uint16_t asid = *(uint16_t *)user_data; SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; - return iotlb_key->asid == asid; + return SMMU_IOTLB_ASID(*iotlb_key) == asid; } -inline void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova) +static gboolean smmu_hash_remove_by_asid_iova(gpointer key, gpointer value, + gpointer user_data) { - SMMUIOTLBKey key = {.asid = asid, .iova = iova}; + SMMUTLBEntry *iter = (SMMUTLBEntry *)value; + IOMMUTLBEntry *entry = &iter->entry; + SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data; + SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key; - trace_smmu_iotlb_inv_iova(asid, iova); - g_hash_table_remove(s->iotlb, &key); + if (info->asid >= 0 && info->asid != SMMU_IOTLB_ASID(iotlb_key)) { + return false; + } + return ((info->iova & ~entry->addr_mask) == entry->iova) || + ((entry->iova & ~info->mask) == info->iova); +} + +inline void +smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, + uint8_t tg, uint64_t num_pages, uint8_t ttl) +{ + if (ttl && (num_pages == 1)) { + SMMUIOTLBKey key = smmu_get_iotlb_key(asid, iova, tg, ttl); + + g_hash_table_remove(s->iotlb, &key); + } else { + /* if tg is not set we use 4KB range invalidation */ + uint8_t granule = tg ? tg * 2 + 10 : 12; + + SMMUIOTLBPageInvInfo info = { + .asid = asid, .iova = iova, + .mask = (num_pages * 1 << granule) - 1}; + + g_hash_table_foreach_remove(s->iotlb, + smmu_hash_remove_by_asid_iova, + &info); + } } inline void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid) @@ -149,7 +263,7 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) * @cfg: translation config * @iova: iova to translate * @perm: access type - * @tlbe: IOMMUTLBEntry (out) + * @tlbe: SMMUTLBEntry (out) * @info: handle to an error info * * Return 0 on success, < 0 on error. In case of error, @info is filled @@ -159,7 +273,7 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) */ static int smmu_ptw_64(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, - IOMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) + SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) { dma_addr_t baseaddr, indexmask; int stage = cfg->stage; @@ -179,14 +293,11 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, baseaddr = extract64(tt->ttb, 0, 48); baseaddr &= ~indexmask; - tlbe->iova = iova; - tlbe->addr_mask = (1 << granule_sz) - 1; - while (level <= 3) { uint64_t subpage_size = 1ULL << level_shift(level, granule_sz); uint64_t mask = subpage_size - 1; uint32_t offset = iova_level_offset(iova, inputsize, level, granule_sz); - uint64_t pte; + uint64_t pte, gpa; dma_addr_t pte_addr = baseaddr + offset * sizeof(pte); uint8_t ap; @@ -199,60 +310,50 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, if (is_invalid_pte(pte) || is_reserved_pte(pte, level)) { trace_smmu_ptw_invalid_pte(stage, level, baseaddr, pte_addr, offset, pte); - info->type = SMMU_PTW_ERR_TRANSLATION; - goto error; + break; } - if (is_page_pte(pte, level)) { - uint64_t gpa = get_page_pte_address(pte, granule_sz); + if (is_table_pte(pte, level)) { + ap = PTE_APTABLE(pte); - ap = PTE_AP(pte); - if (is_permission_fault(ap, perm)) { + if (is_permission_fault(ap, perm) && !tt->had) { info->type = SMMU_PTW_ERR_PERMISSION; goto error; } - - tlbe->translated_addr = gpa + (iova & mask); - tlbe->perm = PTE_AP_TO_PERM(ap); + baseaddr = get_table_pte_address(pte, granule_sz); + level++; + continue; + } else if (is_page_pte(pte, level)) { + gpa = get_page_pte_address(pte, granule_sz); trace_smmu_ptw_page_pte(stage, level, iova, baseaddr, pte_addr, pte, gpa); - return 0; - } - if (is_block_pte(pte, level)) { + } else { uint64_t block_size; - hwaddr gpa = get_block_pte_address(pte, level, granule_sz, - &block_size); - - ap = PTE_AP(pte); - if (is_permission_fault(ap, perm)) { - info->type = SMMU_PTW_ERR_PERMISSION; - goto error; - } + gpa = get_block_pte_address(pte, level, granule_sz, + &block_size); trace_smmu_ptw_block_pte(stage, level, baseaddr, pte_addr, pte, iova, gpa, block_size >> 20); - - tlbe->translated_addr = gpa + (iova & mask); - tlbe->perm = PTE_AP_TO_PERM(ap); - return 0; } - - /* table pte */ - ap = PTE_APTABLE(pte); - + ap = PTE_AP(pte); if (is_permission_fault(ap, perm)) { info->type = SMMU_PTW_ERR_PERMISSION; goto error; } - baseaddr = get_table_pte_address(pte, granule_sz); - level++; - } + tlbe->entry.translated_addr = gpa; + tlbe->entry.iova = iova & ~mask; + tlbe->entry.addr_mask = mask; + tlbe->entry.perm = PTE_AP_TO_PERM(ap); + tlbe->level = level; + tlbe->granule = granule_sz; + return 0; + } info->type = SMMU_PTW_ERR_TRANSLATION; error: - tlbe->perm = IOMMU_NONE; + tlbe->entry.perm = IOMMU_NONE; return -EINVAL; } @@ -268,7 +369,7 @@ error: * return 0 on success */ inline int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, - IOMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) + SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) { if (!cfg->aa64) { /* @@ -361,31 +462,6 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) return NULL; } -static guint smmu_iotlb_key_hash(gconstpointer v) -{ - SMMUIOTLBKey *key = (SMMUIOTLBKey *)v; - uint32_t a, b, c; - - /* Jenkins hash */ - a = b = c = JHASH_INITVAL + sizeof(*key); - a += key->asid; - b += extract64(key->iova, 0, 32); - c += extract64(key->iova, 32, 32); - - __jhash_mix(a, b, c); - __jhash_final(a, b, c); - - return c; -} - -static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) -{ - const SMMUIOTLBKey *k1 = v1; - const SMMUIOTLBKey *k2 = v2; - - return (k1->asid == k2->asid) && (k1->iova == k2->iova); -} - /* Unmap the whole notifier's range */ static void smmu_unmap_notifier_range(IOMMUNotifier *n) { diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h index 7794d6d394..55147f29be 100644 --- a/hw/arm/smmu-internal.h +++ b/hw/arm/smmu-internal.h @@ -96,4 +96,12 @@ uint64_t iova_level_offset(uint64_t iova, int inputsize, MAKE_64BIT_MASK(0, gsz - 3); } +#define SMMU_IOTLB_ASID(key) ((key).asid) + +typedef struct SMMUIOTLBPageInvInfo { + int asid; + uint64_t iova; + uint64_t mask; +} SMMUIOTLBPageInvInfo; + #endif diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h index 4112394129..fa3c088972 100644 --- a/hw/arm/smmuv3-internal.h +++ b/hw/arm/smmuv3-internal.h @@ -54,6 +54,8 @@ REG32(IDR1, 0x4) REG32(IDR2, 0x8) REG32(IDR3, 0xc) + FIELD(IDR3, HAD, 2, 1); + FIELD(IDR3, RIL, 10, 1); REG32(IDR4, 0x10) REG32(IDR5, 0x14) FIELD(IDR5, OAS, 0, 3); @@ -63,7 +65,8 @@ REG32(IDR5, 0x14) #define SMMU_IDR5_OAS 4 -REG32(IIDR, 0x1c) +REG32(IIDR, 0x18) +REG32(AIDR, 0x1c) REG32(CR0, 0x20) FIELD(CR0, SMMU_ENABLE, 0, 1) FIELD(CR0, EVENTQEN, 2, 1) @@ -298,6 +301,8 @@ enum { /* Command completion notification */ }; #define CMD_TYPE(x) extract32((x)->word[0], 0 , 8) +#define CMD_NUM(x) extract32((x)->word[0], 12 , 5) +#define CMD_SCALE(x) extract32((x)->word[0], 20 , 5) #define CMD_SSEC(x) extract32((x)->word[0], 10, 1) #define CMD_SSV(x) extract32((x)->word[0], 11, 1) #define CMD_RESUME_AC(x) extract32((x)->word[0], 12, 1) @@ -310,6 +315,8 @@ enum { /* Command completion notification */ #define CMD_RESUME_STAG(x) extract32((x)->word[2], 0 , 16) #define CMD_RESP(x) extract32((x)->word[2], 11, 2) #define CMD_LEAF(x) extract32((x)->word[2], 0 , 1) +#define CMD_TTL(x) extract32((x)->word[2], 8 , 2) +#define CMD_TG(x) extract32((x)->word[2], 10, 2) #define CMD_STE_RANGE(x) extract32((x)->word[2], 0 , 5) #define CMD_ADDR(x) ({ \ uint64_t high = (uint64_t)(x)->word[3]; \ @@ -573,6 +580,7 @@ static inline int pa_range(STE *ste) lo = (x)->word[(sel) * 2 + 2] & ~0xfULL; \ hi | lo; \ }) +#define CD_HAD(x, sel) extract32((x)->word[(sel) * 2 + 2], 1, 1) #define CD_TSZ(x, sel) extract32((x)->word[0], (16 * (sel)) + 0, 6) #define CD_TG(x, sel) extract32((x)->word[0], (16 * (sel)) + 6, 2) diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index 57a79df55b..0122700e72 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -254,6 +254,9 @@ static void smmuv3_init_regs(SMMUv3State *s) s->idr[1] = FIELD_DP32(s->idr[1], IDR1, EVENTQS, SMMU_EVENTQS); s->idr[1] = FIELD_DP32(s->idr[1], IDR1, CMDQS, SMMU_CMDQS); + s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1); + s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, 1); + /* 4K and 64K granule support */ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1); @@ -270,6 +273,7 @@ static void smmuv3_init_regs(SMMUv3State *s) s->features = 0; s->sid_split = 0; + s->aidr = 0x1; } static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, @@ -506,7 +510,8 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) if (tt->ttb & ~(MAKE_64BIT_MASK(0, cfg->oas))) { goto bad_cd; } - trace_smmuv3_decode_cd_tt(i, tt->tsz, tt->ttb, tt->granule_sz); + tt->had = CD_HAD(cd, i); + trace_smmuv3_decode_cd_tt(i, tt->tsz, tt->ttb, tt->granule_sz, tt->had); } event->record_trans_faults = CD_R(cd); @@ -626,7 +631,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, SMMUTranslationStatus status; SMMUState *bs = ARM_SMMU(s); uint64_t page_mask, aligned_addr; - IOMMUTLBEntry *cached_entry = NULL; + SMMUTLBEntry *cached_entry = NULL; SMMUTransTableInfo *tt; SMMUTransCfg *cfg = NULL; IOMMUTLBEntry entry = { @@ -636,7 +641,6 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, .addr_mask = ~(hwaddr)0, .perm = IOMMU_NONE, }; - SMMUIOTLBKey key, *new_key; qemu_mutex_lock(&s->mutex); @@ -675,17 +679,9 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, page_mask = (1ULL << (tt->granule_sz)) - 1; aligned_addr = addr & ~page_mask; - key.asid = cfg->asid; - key.iova = aligned_addr; - - cached_entry = g_hash_table_lookup(bs->iotlb, &key); + cached_entry = smmu_iotlb_lookup(bs, cfg, tt, aligned_addr); if (cached_entry) { - cfg->iotlb_hits++; - trace_smmu_iotlb_cache_hit(cfg->asid, aligned_addr, - cfg->iotlb_hits, cfg->iotlb_misses, - 100 * cfg->iotlb_hits / - (cfg->iotlb_hits + cfg->iotlb_misses)); - if ((flag & IOMMU_WO) && !(cached_entry->perm & IOMMU_WO)) { + if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { status = SMMU_TRANS_ERROR; if (event.record_trans_faults) { event.type = SMMU_EVT_F_PERMISSION; @@ -698,17 +694,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, goto epilogue; } - cfg->iotlb_misses++; - trace_smmu_iotlb_cache_miss(cfg->asid, addr & ~page_mask, - cfg->iotlb_hits, cfg->iotlb_misses, - 100 * cfg->iotlb_hits / - (cfg->iotlb_hits + cfg->iotlb_misses)); - - if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { - smmu_iotlb_inv_all(bs); - } - - cached_entry = g_new0(IOMMUTLBEntry, 1); + cached_entry = g_new0(SMMUTLBEntry, 1); if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) { g_free(cached_entry); @@ -753,10 +739,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, } status = SMMU_TRANS_ERROR; } else { - new_key = g_new0(SMMUIOTLBKey, 1); - new_key->asid = cfg->asid; - new_key->iova = aligned_addr; - g_hash_table_insert(bs->iotlb, new_key, cached_entry); + smmu_iotlb_insert(bs, cfg, cached_entry); status = SMMU_TRANS_SUCCESS; } @@ -765,9 +748,9 @@ epilogue: switch (status) { case SMMU_TRANS_SUCCESS: entry.perm = flag; - entry.translated_addr = cached_entry->translated_addr + - (addr & page_mask); - entry.addr_mask = cached_entry->addr_mask; + entry.translated_addr = cached_entry->entry.translated_addr + + (addr & cached_entry->entry.addr_mask); + entry.addr_mask = cached_entry->entry.addr_mask; trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr, entry.translated_addr, entry.perm); break; @@ -807,42 +790,49 @@ epilogue: * @n: notifier to be called * @asid: address space ID or negative value if we don't care * @iova: iova + * @tg: translation granule (if communicated through range invalidation) + * @num_pages: number of @granule sized pages (if tg != 0), otherwise 1 */ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, IOMMUNotifier *n, - int asid, - dma_addr_t iova) + int asid, dma_addr_t iova, + uint8_t tg, uint64_t num_pages) { SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); - SMMUEventInfo event = {.inval_ste_allowed = true}; - SMMUTransTableInfo *tt; - SMMUTransCfg *cfg; IOMMUTLBEntry entry; + uint8_t granule = tg; - cfg = smmuv3_get_config(sdev, &event); - if (!cfg) { - return; - } + if (!tg) { + SMMUEventInfo event = {.inval_ste_allowed = true}; + SMMUTransCfg *cfg = smmuv3_get_config(sdev, &event); + SMMUTransTableInfo *tt; - if (asid >= 0 && cfg->asid != asid) { - return; - } + if (!cfg) { + return; + } - tt = select_tt(cfg, iova); - if (!tt) { - return; + if (asid >= 0 && cfg->asid != asid) { + return; + } + + tt = select_tt(cfg, iova); + if (!tt) { + return; + } + granule = tt->granule_sz; } entry.target_as = &address_space_memory; entry.iova = iova; - entry.addr_mask = (1 << tt->granule_sz) - 1; + entry.addr_mask = num_pages * (1 << granule) - 1; entry.perm = IOMMU_NONE; memory_region_notify_one(n, &entry); } -/* invalidate an asid/iova tuple in all mr's */ -static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) +/* invalidate an asid/iova range tuple in all mr's */ +static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, + uint8_t tg, uint64_t num_pages) { SMMUDevice *sdev; @@ -850,14 +840,41 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) IOMMUMemoryRegion *mr = &sdev->iommu; IOMMUNotifier *n; - trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, iova); + trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, iova, + tg, num_pages); IOMMU_NOTIFIER_FOREACH(n, mr) { - smmuv3_notify_iova(mr, n, asid, iova); + smmuv3_notify_iova(mr, n, asid, iova, tg, num_pages); } } } +static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) +{ + uint8_t scale = 0, num = 0, ttl = 0; + dma_addr_t addr = CMD_ADDR(cmd); + uint8_t type = CMD_TYPE(cmd); + uint16_t vmid = CMD_VMID(cmd); + bool leaf = CMD_LEAF(cmd); + uint8_t tg = CMD_TG(cmd); + hwaddr num_pages = 1; + int asid = -1; + + if (tg) { + scale = CMD_SCALE(cmd); + num = CMD_NUM(cmd); + ttl = CMD_TTL(cmd); + num_pages = (num + 1) * (1 << (scale)); + } + + if (type == SMMU_CMD_TLBI_NH_VA) { + asid = CMD_ASID(cmd); + } + trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf); + smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages); + smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl); +} + static int smmuv3_cmdq_consume(SMMUv3State *s) { SMMUState *bs = ARM_SMMU(s); @@ -988,27 +1005,9 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) smmu_iotlb_inv_all(bs); break; case SMMU_CMD_TLBI_NH_VAA: - { - dma_addr_t addr = CMD_ADDR(&cmd); - uint16_t vmid = CMD_VMID(&cmd); - - trace_smmuv3_cmdq_tlbi_nh_vaa(vmid, addr); - smmuv3_inv_notifiers_iova(bs, -1, addr); - smmu_iotlb_inv_all(bs); - break; - } case SMMU_CMD_TLBI_NH_VA: - { - uint16_t asid = CMD_ASID(&cmd); - uint16_t vmid = CMD_VMID(&cmd); - dma_addr_t addr = CMD_ADDR(&cmd); - bool leaf = CMD_LEAF(&cmd); - - trace_smmuv3_cmdq_tlbi_nh_va(vmid, asid, addr, leaf); - smmuv3_inv_notifiers_iova(bs, asid, addr); - smmu_iotlb_inv_iova(bs, asid, addr); + smmuv3_s1_range_inval(bs, &cmd); break; - } case SMMU_CMD_TLBI_EL3_ALL: case SMMU_CMD_TLBI_EL3_VA: case SMMU_CMD_TLBI_EL2_ALL: @@ -1257,6 +1256,9 @@ static MemTxResult smmu_readl(SMMUv3State *s, hwaddr offset, case A_IIDR: *data = s->iidr; return MEMTX_OK; + case A_AIDR: + *data = s->aidr; + return MEMTX_OK; case A_CR0: *data = s->cr[0]; return MEMTX_OK; diff --git a/hw/arm/trace-events b/hw/arm/trace-events index 0acedcedc6..c8a4d80f6b 100644 --- a/hw/arm/trace-events +++ b/hw/arm/trace-events @@ -14,6 +14,9 @@ smmu_iotlb_inv_all(void) "IOTLB invalidate all" smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d" smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s" +smmu_iotlb_lookup_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" +smmu_iotlb_lookup_miss(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" +smmu_iotlb_insert(uint16_t asid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d addr=0x%"PRIx64" tg=%d level=%d" # smmuv3.c smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)" @@ -36,20 +39,17 @@ smmuv3_translate_abort(const char *n, uint16_t sid, uint64_t addr, bool is_write smmuv3_translate_success(const char *n, uint16_t sid, uint64_t iova, uint64_t translated, int perm) "%s sid=%d iova=0x%"PRIx64" translated=0x%"PRIx64" perm=0x%x" smmuv3_get_cd(uint64_t addr) "CD addr: 0x%"PRIx64 smmuv3_decode_cd(uint32_t oas) "oas=%d" -smmuv3_decode_cd_tt(int i, uint32_t tsz, uint64_t ttb, uint32_t granule_sz) "TT[%d]:tsz:%d ttb:0x%"PRIx64" granule_sz:%d" +smmuv3_decode_cd_tt(int i, uint32_t tsz, uint64_t ttb, uint32_t granule_sz, bool had) "TT[%d]:tsz:%d ttb:0x%"PRIx64" granule_sz:%d had:%d" smmuv3_cmdq_cfgi_ste(int streamid) "streamid =%d" smmuv3_cmdq_cfgi_ste_range(int start, int end) "start=0x%d - end=0x%d" smmuv3_cmdq_cfgi_cd(uint32_t sid) "streamid = %d" smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid %d (hits=%d, misses=%d, hit rate=%d)" smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid %d (hits=%d, misses=%d, hit rate=%d)" -smmuv3_cmdq_tlbi_nh_va(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" leaf=%d" -smmuv3_cmdq_tlbi_nh_vaa(int vmid, uint64_t addr) "vmid =%d addr=0x%"PRIx64 +smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d" smmuv3_cmdq_tlbi_nh(void) "" smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" -smmu_iotlb_cache_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" -smmu_iotlb_cache_miss(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d" smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" -smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova) "iommu mr=%s asid=%d iova=0x%"PRIx64 +smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64 diff --git a/hw/cpu/a9mpcore.c b/hw/cpu/a9mpcore.c index 351295e518..ec186d49ab 100644 --- a/hw/cpu/a9mpcore.c +++ b/hw/cpu/a9mpcore.c @@ -15,6 +15,7 @@ #include "hw/irq.h" #include "hw/qdev-properties.h" #include "hw/core/cpu.h" +#include "cpu.h" #define A9_GIC_NUM_PRIORITY_BITS 5 @@ -52,8 +53,18 @@ static void a9mp_priv_realize(DeviceState *dev, Error **errp) *wdtbusdev; int i; bool has_el3; + CPUState *cpu0; Object *cpuobj; + cpu0 = qemu_get_cpu(0); + cpuobj = OBJECT(cpu0); + if (strcmp(object_get_typename(cpuobj), ARM_CPU_TYPE_NAME("cortex-a9"))) { + /* We might allow Cortex-A5 once we model it */ + error_setg(errp, + "Cortex-A9MPCore peripheral can only use Cortex-A9 CPU"); + return; + } + scudev = DEVICE(&s->scu); qdev_prop_set_uint32(scudev, "num-cpu", s->num_cpu); if (!sysbus_realize(SYS_BUS_DEVICE(&s->scu), errp)) { @@ -70,7 +81,6 @@ static void a9mp_priv_realize(DeviceState *dev, Error **errp) /* Make the GIC's TZ support match the CPUs. We assume that * either all the CPUs have TZ, or none do. */ - cpuobj = OBJECT(qemu_get_cpu(0)); has_el3 = object_property_find(cpuobj, "has_el3", NULL) && object_property_get_bool(cpuobj, "has_el3", &error_abort); qdev_prop_set_bit(gicdev, "has-security-extensions", has_el3); diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 89c8cd9667..4bd0d606ba 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -148,12 +148,19 @@ static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end, xive_end_queue_pic_print_info(end, 6, mon); } +/* + * kvm_irqchip_in_kernel() will cause the compiler to turn this + * info a nop if CONFIG_KVM isn't defined. + */ +#define spapr_xive_in_kernel(xive) \ + (kvm_irqchip_in_kernel() && (xive)->fd != -1) + void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) { XiveSource *xsrc = &xive->source; int i; - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_synchronize_state(xive, &local_err); @@ -329,7 +336,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio); /* Set the mapping address of the END ESB pages after the source ESBs */ - xive->end_base = xive->vc_base + (1ull << xsrc->esb_shift) * xsrc->nr_irqs; + xive->end_base = xive->vc_base + xive_source_esb_len(xsrc); /* * Allocate the routing tables @@ -507,8 +514,10 @@ static const VMStateDescription vmstate_spapr_xive_eas = { static int vmstate_spapr_xive_pre_save(void *opaque) { - if (kvm_irqchip_in_kernel()) { - return kvmppc_xive_pre_save(SPAPR_XIVE(opaque)); + SpaprXive *xive = SPAPR_XIVE(opaque); + + if (spapr_xive_in_kernel(xive)) { + return kvmppc_xive_pre_save(xive); } return 0; @@ -520,8 +529,10 @@ static int vmstate_spapr_xive_pre_save(void *opaque) */ static int spapr_xive_post_load(SpaprInterruptController *intc, int version_id) { - if (kvm_irqchip_in_kernel()) { - return kvmppc_xive_post_load(SPAPR_XIVE(intc), version_id); + SpaprXive *xive = SPAPR_XIVE(intc); + + if (spapr_xive_in_kernel(xive)) { + return kvmppc_xive_post_load(xive, version_id); } return 0; @@ -564,7 +575,7 @@ static int spapr_xive_claim_irq(SpaprInterruptController *intc, int lisn, xive_source_irq_set_lsi(xsrc, lisn); } - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { return kvmppc_xive_source_reset_one(xsrc, lisn, errp); } @@ -641,7 +652,7 @@ static void spapr_xive_set_irq(SpaprInterruptController *intc, int irq, int val) { SpaprXive *xive = SPAPR_XIVE(intc); - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { kvmppc_xive_source_set_irq(&xive->source, irq, val); } else { xive_source_set_irq(&xive->source, irq, val); @@ -749,11 +760,16 @@ static void spapr_xive_deactivate(SpaprInterruptController *intc) spapr_xive_mmio_set_enabled(xive, false); - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { kvmppc_xive_disconnect(intc); } } +static bool spapr_xive_in_kernel_xptr(const XivePresenter *xptr) +{ + return spapr_xive_in_kernel(SPAPR_XIVE(xptr)); +} + static void spapr_xive_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -788,6 +804,7 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data) sicc->post_load = spapr_xive_post_load; xpc->match_nvt = spapr_xive_match_nvt; + xpc->in_kernel = spapr_xive_in_kernel_xptr; } static const TypeInfo spapr_xive_info = { @@ -1058,7 +1075,7 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu, new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn); } - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err); @@ -1379,7 +1396,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, */ out: - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err); @@ -1480,7 +1497,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, args[2] = 0; } - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err); @@ -1642,7 +1659,7 @@ static target_ulong h_int_esb(PowerPCCPU *cpu, return H_P3; } - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data, flags & SPAPR_XIVE_ESB_STORE); } else { @@ -1717,7 +1734,7 @@ static target_ulong h_int_sync(PowerPCCPU *cpu, * under KVM */ - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_sync_source(xive, lisn, &local_err); @@ -1761,7 +1778,7 @@ static target_ulong h_int_reset(PowerPCCPU *cpu, device_legacy_reset(DEVICE(xive)); - if (kvm_irqchip_in_kernel()) { + if (spapr_xive_in_kernel(xive)) { Error *local_err = NULL; kvmppc_xive_reset(xive, &local_err); diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index edb7ee0e74..e8667ce5f6 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -73,54 +73,54 @@ static void kvm_cpu_disable_all(void) * XIVE Thread Interrupt Management context (KVM) */ -void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) +int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) { SpaprXive *xive = SPAPR_XIVE(tctx->xptr); uint64_t state[2]; int ret; - /* The KVM XIVE device is not in use yet */ - if (xive->fd == -1) { - return; - } + assert(xive->fd != -1); /* word0 and word1 of the OS ring. */ state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); if (ret != 0) { - error_setg_errno(errp, errno, + error_setg_errno(errp, -ret, "XIVE: could not restore KVM state of CPU %ld", kvm_arch_vcpu_id(tctx->cs)); + return ret; } + + return 0; } -void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) +int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) { SpaprXive *xive = SPAPR_XIVE(tctx->xptr); uint64_t state[2] = { 0 }; int ret; - /* The KVM XIVE device is not in use */ - if (xive->fd == -1) { - return; - } + assert(xive->fd != -1); ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); if (ret != 0) { - error_setg_errno(errp, errno, + error_setg_errno(errp, -ret, "XIVE: could not capture KVM state of CPU %ld", kvm_arch_vcpu_id(tctx->cs)); - return; + return ret; } /* word0 and word1 of the OS ring. */ *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; + + return 0; } typedef struct { XiveTCTX *tctx; - Error *err; + Error **errp; + int ret; } XiveCpuGetState; static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, @@ -128,14 +128,14 @@ static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, { XiveCpuGetState *s = arg.host_ptr; - kvmppc_xive_cpu_get_state(s->tctx, &s->err); + s->ret = kvmppc_xive_cpu_get_state(s->tctx, s->errp); } -void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) +int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) { XiveCpuGetState s = { .tctx = tctx, - .err = NULL, + .errp = errp, }; /* @@ -144,26 +144,21 @@ void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, RUN_ON_CPU_HOST_PTR(&s)); - if (s.err) { - error_propagate(errp, s.err); - return; - } + return s.ret; } -void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) +int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) { + ERRP_GUARD(); SpaprXive *xive = SPAPR_XIVE(tctx->xptr); unsigned long vcpu_id; int ret; - /* The KVM XIVE device is not in use */ - if (xive->fd == -1) { - return; - } + assert(xive->fd != -1); /* Check if CPU was hot unplugged and replugged. */ if (kvm_cpu_is_enabled(tctx->cs)) { - return; + return 0; } vcpu_id = kvm_arch_vcpu_id(tctx->cs); @@ -171,28 +166,26 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, vcpu_id, 0); if (ret < 0) { - Error *local_err = NULL; - - error_setg(&local_err, - "XIVE: unable to connect CPU%ld to KVM device: %s", - vcpu_id, strerror(errno)); - if (errno == ENOSPC) { - error_append_hint(&local_err, "Try -smp maxcpus=N with N < %u\n", + error_setg_errno(errp, -ret, + "XIVE: unable to connect CPU%ld to KVM device", + vcpu_id); + if (ret == -ENOSPC) { + error_append_hint(errp, "Try -smp maxcpus=N with N < %u\n", MACHINE(qdev_get_machine())->smp.max_cpus); } - error_propagate(errp, local_err); - return; + return ret; } kvm_cpu_enable(tctx->cs); + return 0; } /* * XIVE Interrupt Source (KVM) */ -void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, - Error **errp) +int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, + Error **errp) { uint32_t end_idx; uint32_t end_blk; @@ -201,7 +194,6 @@ void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, bool masked; uint32_t eisn; uint64_t kvm_src; - Error *local_err = NULL; assert(xive_eas_is_valid(eas)); @@ -221,12 +213,8 @@ void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & KVM_XIVE_SOURCE_EISN_MASK; - kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, - &kvm_src, true, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, + &kvm_src, true, errp); } void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) @@ -245,10 +233,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) SpaprXive *xive = SPAPR_XIVE(xsrc->xive); uint64_t state = 0; - /* The KVM XIVE device is not in use */ - if (xive->fd == -1) { - return -ENODEV; - } + assert(xive->fd != -1); if (xive_source_irq_is_lsi(xsrc, srcno)) { state |= KVM_XIVE_LEVEL_SENSITIVE; @@ -261,24 +246,25 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) true, errp); } -static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) +static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) { SpaprXive *xive = SPAPR_XIVE(xsrc->xive); int i; for (i = 0; i < xsrc->nr_irqs; i++) { - Error *local_err = NULL; + int ret; if (!xive_eas_is_valid(&xive->eat[i])) { continue; } - kvmppc_xive_source_reset_one(xsrc, i, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; + ret = kvmppc_xive_source_reset_one(xsrc, i, errp); + if (ret < 0) { + return ret; } } + + return 0; } /* @@ -381,15 +367,15 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) /* * sPAPR XIVE interrupt controller (KVM) */ -void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, - uint32_t end_idx, XiveEND *end, - Error **errp) +int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp) { struct kvm_ppc_xive_eq kvm_eq = { 0 }; uint64_t kvm_eq_idx; uint8_t priority; uint32_t server; - Error *local_err = NULL; + int ret; assert(xive_end_is_valid(end)); @@ -401,11 +387,10 @@ void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & KVM_XIVE_EQ_SERVER_MASK; - kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, - &kvm_eq, false, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; + ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, + &kvm_eq, false, errp); + if (ret < 0) { + return ret; } /* @@ -415,17 +400,18 @@ void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, */ end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); + + return 0; } -void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, - uint32_t end_idx, XiveEND *end, - Error **errp) +int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp) { struct kvm_ppc_xive_eq kvm_eq = { 0 }; uint64_t kvm_eq_idx; uint8_t priority; uint32_t server; - Error *local_err = NULL; /* * Build the KVM state from the local END structure. @@ -463,12 +449,9 @@ void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & KVM_XIVE_EQ_SERVER_MASK; - kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, - &kvm_eq, true, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + return + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, + &kvm_eq, true, errp); } void kvmppc_xive_reset(SpaprXive *xive, Error **errp) @@ -477,23 +460,24 @@ void kvmppc_xive_reset(SpaprXive *xive, Error **errp) NULL, true, errp); } -static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) +static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) { - Error *local_err = NULL; int i; + int ret; for (i = 0; i < xive->nr_ends; i++) { if (!xive_end_is_valid(&xive->endt[i])) { continue; } - kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, - &xive->endt[i], &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; + ret = kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, + &xive->endt[i], errp); + if (ret < 0) { + return ret; } } + + return 0; } /* @@ -592,10 +576,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running, void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) { - /* The KVM XIVE device is not in use */ - if (xive->fd == -1) { - return; - } + assert(xive->fd != -1); /* * When the VM is stopped, the sources are masked and the previous @@ -621,19 +602,17 @@ void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) int kvmppc_xive_pre_save(SpaprXive *xive) { Error *local_err = NULL; + int ret; - /* The KVM XIVE device is not in use */ - if (xive->fd == -1) { - return 0; - } + assert(xive->fd != -1); /* EAT: there is no extra state to query from KVM */ /* ENDT */ - kvmppc_xive_get_queues(xive, &local_err); - if (local_err) { + ret = kvmppc_xive_get_queues(xive, &local_err); + if (ret < 0) { error_report_err(local_err); - return -1; + return ret; } return 0; @@ -650,6 +629,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) Error *local_err = NULL; CPUState *cs; int i; + int ret; /* The KVM XIVE device should be in use */ assert(xive->fd != -1); @@ -660,11 +640,10 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) continue; } - kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, - &xive->endt[i], &local_err); - if (local_err) { - error_report_err(local_err); - return -1; + ret = kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, + &xive->endt[i], &local_err); + if (ret < 0) { + goto fail; } } @@ -679,16 +658,14 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) * previously set in KVM. Since we don't do that for all interrupts * at reset time anymore, let's do it now. */ - kvmppc_xive_source_reset_one(&xive->source, i, &local_err); - if (local_err) { - error_report_err(local_err); - return -1; + ret = kvmppc_xive_source_reset_one(&xive->source, i, &local_err); + if (ret < 0) { + goto fail; } - kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err); - if (local_err) { - error_report_err(local_err); - return -1; + ret = kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err); + if (ret < 0) { + goto fail; } } @@ -705,17 +682,21 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); - kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err); - if (local_err) { - error_report_err(local_err); - return -1; + ret = kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err); + if (ret < 0) { + goto fail; } } /* The source states will be restored when the machine starts running */ return 0; + +fail: + error_report_err(local_err); + return ret; } +/* Returns MAP_FAILED on error and sets errno */ static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, Error **errp) { @@ -726,7 +707,6 @@ static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, pgoff << page_shift); if (addr == MAP_FAILED) { error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); - return NULL; } return addr; @@ -741,10 +721,12 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, { SpaprXive *xive = SPAPR_XIVE(intc); XiveSource *xsrc = &xive->source; - Error *local_err = NULL; - size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; + size_t esb_len = xive_source_esb_len(xsrc); size_t tima_len = 4ull << TM_SHIFT; CPUState *cs; + int fd; + void *addr; + int ret; /* * The KVM XIVE device already in use. This is the case when @@ -760,18 +742,20 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, } /* First, create the KVM XIVE device */ - xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); - if (xive->fd < 0) { - error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device"); + fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); + if (fd < 0) { + error_setg_errno(errp, -fd, "XIVE: error creating KVM device"); return -1; } + xive->fd = fd; /* Tell KVM about the # of VCPUs we may have */ if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_NR_SERVERS)) { - if (kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, - KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true, - &local_err)) { + ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, + KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true, + errp); + if (ret < 0) { goto fail; } } @@ -779,14 +763,14 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, /* * 1. Source ESB pages - KVM mapping */ - xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, - &local_err); - if (local_err) { + addr = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, errp); + if (addr == MAP_FAILED) { goto fail; } + xsrc->esb_mmap = addr; memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc), - "xive.esb", esb_len, xsrc->esb_mmap); + "xive.esb-kvm", esb_len, xsrc->esb_mmap); memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_kvm, 1); @@ -797,11 +781,12 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, /* * 3. TIMA pages - KVM mapping */ - xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, - &local_err); - if (local_err) { + addr = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, errp); + if (addr == MAP_FAILED) { goto fail; } + xive->tm_mmap = addr; + memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive), "xive.tima", tima_len, xive->tm_mmap); memory_region_add_subregion_overlap(&xive->tm_mmio, 0, @@ -814,15 +799,15 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); - kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, &local_err); - if (local_err) { + ret = kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, errp); + if (ret < 0) { goto fail; } } /* Update the KVM sources */ - kvmppc_xive_source_reset(xsrc, &local_err); - if (local_err) { + ret = kvmppc_xive_source_reset(xsrc, errp); + if (ret < 0) { goto fail; } @@ -832,7 +817,6 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, return 0; fail: - error_propagate(errp, local_err); kvmppc_xive_disconnect(intc); return -1; } @@ -843,14 +827,11 @@ void kvmppc_xive_disconnect(SpaprInterruptController *intc) XiveSource *xsrc; size_t esb_len; - /* The KVM XIVE device is not in use */ - if (!xive || xive->fd == -1) { - return; - } + assert(xive->fd != -1); /* Clear the KVM mapping */ xsrc = &xive->source; - esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; + esb_len = xive_source_esb_len(xsrc); if (xsrc->esb_mmap) { memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm); @@ -871,10 +852,8 @@ void kvmppc_xive_disconnect(SpaprInterruptController *intc) * and removed from the list of devices of the VM. The VCPU * presenters are also detached from the device. */ - if (xive->fd != -1) { - close(xive->fd); - xive->fd = -1; - } + close(xive->fd); + xive->fd = -1; kvm_kernel_irqchip = false; kvm_msi_via_irqfd_allowed = false; diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 9a162431e0..489e6256ef 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -592,6 +592,17 @@ static const char * const xive_tctx_ring_names[] = { "USER", "OS", "POOL", "PHYS", }; +/* + * kvm_irqchip_in_kernel() will cause the compiler to turn this + * info a nop if CONFIG_KVM isn't defined. + */ +#define xive_in_kernel(xptr) \ + (kvm_irqchip_in_kernel() && \ + ({ \ + XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); \ + xpc->in_kernel ? xpc->in_kernel(xptr) : false; \ + })) + void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon) { int cpu_index; @@ -606,7 +617,7 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon) cpu_index = tctx->cs ? tctx->cs->cpu_index : -1; - if (kvm_irqchip_in_kernel()) { + if (xive_in_kernel(tctx->xptr)) { Error *local_err = NULL; kvmppc_xive_cpu_synchronize_state(tctx, &local_err); @@ -651,7 +662,6 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp) XiveTCTX *tctx = XIVE_TCTX(dev); PowerPCCPU *cpu; CPUPPCState *env; - Error *local_err = NULL; assert(tctx->cs); assert(tctx->xptr); @@ -671,10 +681,8 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp) } /* Connect the presenter to the VCPU (required for CPU hotplug) */ - if (kvm_irqchip_in_kernel()) { - kvmppc_xive_cpu_connect(tctx, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (xive_in_kernel(tctx->xptr)) { + if (kvmppc_xive_cpu_connect(tctx, errp) < 0) { return; } } @@ -682,13 +690,15 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp) static int vmstate_xive_tctx_pre_save(void *opaque) { + XiveTCTX *tctx = XIVE_TCTX(opaque); Error *local_err = NULL; + int ret; - if (kvm_irqchip_in_kernel()) { - kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err); - if (local_err) { + if (xive_in_kernel(tctx->xptr)) { + ret = kvmppc_xive_cpu_get_state(tctx, &local_err); + if (ret < 0) { error_report_err(local_err); - return -1; + return ret; } } @@ -697,17 +707,19 @@ static int vmstate_xive_tctx_pre_save(void *opaque) static int vmstate_xive_tctx_post_load(void *opaque, int version_id) { + XiveTCTX *tctx = XIVE_TCTX(opaque); Error *local_err = NULL; + int ret; - if (kvm_irqchip_in_kernel()) { + if (xive_in_kernel(tctx->xptr)) { /* * Required for hotplugged CPU, for which the state comes * after all states of the machine. */ - kvmppc_xive_cpu_set_state(XIVE_TCTX(opaque), &local_err); - if (local_err) { + ret = kvmppc_xive_cpu_set_state(tctx, &local_err); + if (ret < 0) { error_report_err(local_err); - return -1; + return ret; } } @@ -1128,6 +1140,7 @@ static void xive_source_reset(void *dev) static void xive_source_realize(DeviceState *dev, Error **errp) { XiveSource *xsrc = XIVE_SOURCE(dev); + size_t esb_len = xive_source_esb_len(xsrc); assert(xsrc->xive); @@ -1147,11 +1160,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp) xsrc->status = g_malloc0(xsrc->nr_irqs); xsrc->lsi_map = bitmap_new(xsrc->nr_irqs); - if (!kvm_irqchip_in_kernel()) { - memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc), - &xive_source_esb_ops, xsrc, "xive.esb", - (1ull << xsrc->esb_shift) * xsrc->nr_irqs); - } + memory_region_init(&xsrc->esb_mmio, OBJECT(xsrc), "xive.esb", esb_len); + memory_region_init_io(&xsrc->esb_mmio_emulated, OBJECT(xsrc), + &xive_source_esb_ops, xsrc, "xive.esb-emulated", + esb_len); + memory_region_add_subregion(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_emulated); qemu_register_reset(xive_source_reset, dev); } @@ -1502,7 +1515,7 @@ static bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, /* * Notification using the END ESe/ESn bit (Event State Buffer for - * escalation and notification). Profide futher coalescing in the + * escalation and notification). Provide further coalescing in the * Router. */ static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk, @@ -1581,7 +1594,7 @@ static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, /* * Check the END ESn (Event State Buffer for notification) for - * even futher coalescing in the Router + * even further coalescing in the Router */ if (!xive_end_is_notify(&end)) { /* ESn[Q]=1 : end of notification */ @@ -1660,7 +1673,7 @@ do_escalation: /* * Check the END ESe (Event State Buffer for escalation) for even - * futher coalescing in the Router + * further coalescing in the Router */ if (!xive_end_is_uncond_escalation(&end)) { /* ESe[Q]=1 : end of notification */ diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c index d969f26704..d4d10a7c03 100644 --- a/hw/nvram/chrp_nvram.c +++ b/hw/nvram/chrp_nvram.c @@ -21,14 +21,21 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/error-report.h" #include "hw/nvram/chrp_nvram.h" #include "sysemu/sysemu.h" -static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) +static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str, + int max_len) { int len; len = strlen(str) + 1; + + if (max_len < len) { + return -1; + } + memcpy(&nvram[addr], str, len); return addr + len; @@ -38,19 +45,26 @@ static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) * Create a "system partition", used for the Open Firmware * environment variables. */ -int chrp_nvram_create_system_partition(uint8_t *data, int min_len) +int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len) { ChrpNvramPartHdr *part_header; unsigned int i; int end; + if (max_len < sizeof(*part_header)) { + goto fail; + } + part_header = (ChrpNvramPartHdr *)data; part_header->signature = CHRP_NVPART_SYSTEM; pstrcpy(part_header->name, sizeof(part_header->name), "system"); end = sizeof(ChrpNvramPartHdr); for (i = 0; i < nb_prom_envs; i++) { - end = chrp_nvram_set_var(data, end, prom_envs[i]); + end = chrp_nvram_set_var(data, end, prom_envs[i], max_len - end); + if (end == -1) { + goto fail; + } } /* End marker */ @@ -65,6 +79,10 @@ int chrp_nvram_create_system_partition(uint8_t *data, int min_len) chrp_nvram_finish_partition(part_header, end); return end; + +fail: + error_report("NVRAM is too small. Try to pass less data to -prom-env"); + exit(EXIT_FAILURE); } /** diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c index beec1c4e4d..11f2d31cdb 100644 --- a/hw/nvram/mac_nvram.c +++ b/hw/nvram/mac_nvram.c @@ -141,7 +141,7 @@ static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off, /* OpenBIOS nvram variables partition */ sysp_end = chrp_nvram_create_system_partition(&nvr->data[off], - DEF_SYSTEM_SIZE) + off; + DEF_SYSTEM_SIZE, len) + off; /* Free space partition */ chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end); diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 15d08281d4..386513499f 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -188,7 +188,8 @@ static void spapr_nvram_realize(SpaprVioDevice *dev, Error **errp) } } else if (nb_prom_envs > 0) { /* Create a system partition to pass the -prom-env variables */ - chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4); + chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4, + nvram->size); chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4], nvram->size - MIN_NVRAM_SIZE / 4); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1c8d0981b3..dd2fa4826b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -558,7 +558,8 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr, int nb_numa_nodes = machine->numa_state->num_nodes; int ret, i, offset; uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; - uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)}; + uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32), + cpu_to_be32(lmb_size & 0xffffffff)}; uint32_t *int_buf, *cur_index, buf_len; int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; MemoryDeviceInfoList *dimms = NULL; @@ -905,7 +906,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) uint32_t lrdr_capacity[] = { cpu_to_be32(max_device_addr >> 32), cpu_to_be32(max_device_addr & 0xffffffff), - 0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE), + cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE >> 32), + cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff), cpu_to_be32(ms->smp.max_cpus / ms->smp.threads), }; uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0); diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 3225fc5a2e..10a80a8159 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -180,24 +180,24 @@ static void spapr_cap_set_pagesize(Object *obj, Visitor *v, const char *name, static void cap_htm_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); if (!val) { /* TODO: We don't support disabling htm yet */ return; } if (tcg_enabled()) { - error_setg(errp, - "No Transactional Memory support in TCG," - " try appending -machine cap-htm=off"); + error_setg(errp, "No Transactional Memory support in TCG"); + error_append_hint(errp, "Try appending -machine cap-htm=off\n"); } else if (kvm_enabled() && !kvmppc_has_cap_htm()) { error_setg(errp, -"KVM implementation does not support Transactional Memory," - " try appending -machine cap-htm=off" - ); + "KVM implementation does not support Transactional Memory"); + error_append_hint(errp, "Try appending -machine cap-htm=off\n"); } } static void cap_vsx_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); PowerPCCPU *cpu = POWERPC_CPU(first_cpu); CPUPPCState *env = &cpu->env; @@ -209,13 +209,14 @@ static void cap_vsx_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) * rid of anything that doesn't do VMX */ g_assert(env->insns_flags & PPC_ALTIVEC); if (!(env->insns_flags2 & PPC2_VSX)) { - error_setg(errp, "VSX support not available," - " try appending -machine cap-vsx=off"); + error_setg(errp, "VSX support not available"); + error_append_hint(errp, "Try appending -machine cap-vsx=off\n"); } } static void cap_dfp_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); PowerPCCPU *cpu = POWERPC_CPU(first_cpu); CPUPPCState *env = &cpu->env; @@ -224,8 +225,8 @@ static void cap_dfp_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) return; } if (!(env->insns_flags2 & PPC2_DFP)) { - error_setg(errp, "DFP support not available," - " try appending -machine cap-dfp=off"); + error_setg(errp, "DFP support not available"); + error_append_hint(errp, "Try appending -machine cap-dfp=off\n"); } } @@ -239,6 +240,7 @@ SpaprCapPossible cap_cfpc_possible = { static void cap_safe_cache_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); uint8_t kvm_val = kvmppc_get_cap_safe_cache(); if (tcg_enabled() && val) { @@ -247,9 +249,9 @@ static void cap_safe_cache_apply(SpaprMachineState *spapr, uint8_t val, cap_cfpc_possible.vals[val]); } else if (kvm_enabled() && (val > kvm_val)) { error_setg(errp, - "Requested safe cache capability level not supported by kvm," - " try appending -machine cap-cfpc=%s", - cap_cfpc_possible.vals[kvm_val]); + "Requested safe cache capability level not supported by KVM"); + error_append_hint(errp, "Try appending -machine cap-cfpc=%s\n", + cap_cfpc_possible.vals[kvm_val]); } } @@ -263,6 +265,7 @@ SpaprCapPossible cap_sbbc_possible = { static void cap_safe_bounds_check_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); uint8_t kvm_val = kvmppc_get_cap_safe_bounds_check(); if (tcg_enabled() && val) { @@ -271,9 +274,9 @@ static void cap_safe_bounds_check_apply(SpaprMachineState *spapr, uint8_t val, cap_sbbc_possible.vals[val]); } else if (kvm_enabled() && (val > kvm_val)) { error_setg(errp, -"Requested safe bounds check capability level not supported by kvm," - " try appending -machine cap-sbbc=%s", - cap_sbbc_possible.vals[kvm_val]); +"Requested safe bounds check capability level not supported by KVM"); + error_append_hint(errp, "Try appending -machine cap-sbbc=%s\n", + cap_sbbc_possible.vals[kvm_val]); } } @@ -290,6 +293,7 @@ SpaprCapPossible cap_ibs_possible = { static void cap_safe_indirect_branch_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); uint8_t kvm_val = kvmppc_get_cap_safe_indirect_branch(); if (tcg_enabled() && val) { @@ -298,9 +302,9 @@ static void cap_safe_indirect_branch_apply(SpaprMachineState *spapr, cap_ibs_possible.vals[val]); } else if (kvm_enabled() && (val > kvm_val)) { error_setg(errp, -"Requested safe indirect branch capability level not supported by kvm," - " try appending -machine cap-ibs=%s", - cap_ibs_possible.vals[kvm_val]); +"Requested safe indirect branch capability level not supported by KVM"); + error_append_hint(errp, "Try appending -machine cap-ibs=%s\n", + cap_ibs_possible.vals[kvm_val]); } } @@ -377,23 +381,35 @@ static void cap_hpt_maxpagesize_cpu_apply(SpaprMachineState *spapr, static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + if (!val) { /* capability disabled by default */ return; } if (tcg_enabled()) { - error_setg(errp, - "No Nested KVM-HV support in tcg," - " try appending -machine cap-nested-hv=off"); + error_setg(errp, "No Nested KVM-HV support in TCG"); + error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n"); } else if (kvm_enabled()) { + if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, + spapr->max_compat_pvr)) { + error_setg(errp, "Nested KVM-HV only supported on POWER9"); + error_append_hint(errp, + "Try appending -machine max-cpu-compat=power9\n"); + return; + } + if (!kvmppc_has_cap_nested_kvm_hv()) { error_setg(errp, -"KVM implementation does not support Nested KVM-HV," - " try appending -machine cap-nested-hv=off"); + "KVM implementation does not support Nested KVM-HV"); + error_append_hint(errp, + "Try appending -machine cap-nested-hv=off\n"); } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) { - error_setg(errp, -"Error enabling cap-nested-hv with KVM, try cap-nested-hv=off"); + error_setg(errp, "Error enabling cap-nested-hv with KVM"); + error_append_hint(errp, + "Try appending -machine cap-nested-hv=off\n"); } } } @@ -401,6 +417,7 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr, static void cap_large_decr_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); PowerPCCPU *cpu = POWERPC_CPU(first_cpu); PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); @@ -411,22 +428,23 @@ static void cap_large_decr_apply(SpaprMachineState *spapr, if (tcg_enabled()) { if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, spapr->max_compat_pvr)) { - error_setg(errp, - "Large decrementer only supported on POWER9, try -cpu POWER9"); + error_setg(errp, "Large decrementer only supported on POWER9"); + error_append_hint(errp, "Try -cpu POWER9\n"); return; } } else if (kvm_enabled()) { int kvm_nr_bits = kvmppc_get_cap_large_decr(); if (!kvm_nr_bits) { - error_setg(errp, - "No large decrementer support," - " try appending -machine cap-large-decr=off"); + error_setg(errp, "No large decrementer support"); + error_append_hint(errp, + "Try appending -machine cap-large-decr=off\n"); } else if (pcc->lrg_decr_bits != kvm_nr_bits) { error_setg(errp, -"KVM large decrementer size (%d) differs to model (%d)," - " try appending -machine cap-large-decr=off", - kvm_nr_bits, pcc->lrg_decr_bits); + "KVM large decrementer size (%d) differs to model (%d)", + kvm_nr_bits, pcc->lrg_decr_bits); + error_append_hint(errp, + "Try appending -machine cap-large-decr=off\n"); } } } @@ -435,14 +453,15 @@ static void cap_large_decr_cpu_apply(SpaprMachineState *spapr, PowerPCCPU *cpu, uint8_t val, Error **errp) { + ERRP_GUARD(); CPUPPCState *env = &cpu->env; target_ulong lpcr = env->spr[SPR_LPCR]; if (kvm_enabled()) { if (kvmppc_enable_cap_large_decr(cpu, val)) { - error_setg(errp, - "No large decrementer support," - " try appending -machine cap-large-decr=off"); + error_setg(errp, "No large decrementer support"); + error_append_hint(errp, + "Try appending -machine cap-large-decr=off\n"); } } @@ -457,6 +476,7 @@ static void cap_large_decr_cpu_apply(SpaprMachineState *spapr, static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); if (tcg_enabled() && val) { @@ -479,14 +499,15 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, return; } error_setg(errp, -"Requested count cache flush assist capability level not supported by kvm," - " try appending -machine cap-ccf-assist=off"); + "Requested count cache flush assist capability level not supported by KVM"); + error_append_hint(errp, "Try appending -machine cap-ccf-assist=off\n"); } } static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { + ERRP_GUARD(); if (!val) { return; /* Disabled by default */ } diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c index 2f8f7d62f8..72bb938375 100644 --- a/hw/ppc/spapr_irq.c +++ b/hw/ppc/spapr_irq.c @@ -139,6 +139,7 @@ SpaprIrq spapr_irq_dual = { static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) { + ERRP_GUARD(); MachineState *machine = MACHINE(spapr); /* @@ -179,14 +180,19 @@ static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) /* * On a POWER9 host, some older KVM XICS devices cannot be destroyed and - * re-created. Detect that early to avoid QEMU to exit later when the - * guest reboots. + * re-created. Same happens with KVM nested guests. Detect that early to + * avoid QEMU to exit later when the guest reboots. */ if (kvm_enabled() && spapr->irq == &spapr_irq_dual && kvm_kernel_irqchip_required() && xics_kvm_has_broken_disconnect(spapr)) { - error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on"); + error_setg(errp, + "KVM is incompatible with ic-mode=dual,kernel-irqchip=on"); + error_append_hint(errp, + "This can happen with an old KVM or in a KVM nested guest.\n"); + error_append_hint(errp, + "Try without kernel-irqchip or with kernel-irqchip=off.\n"); return -1; } diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 363cdb3f7b..0a418f1e67 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1796,6 +1796,7 @@ static void spapr_phb_destroy_msi(gpointer opaque) static void spapr_phb_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user * tries to add a sPAPR PHB to a non-pseries machine. */ @@ -1813,7 +1814,6 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) uint64_t msi_window_size = 4096; SpaprTceTable *tcet; const unsigned windows_supported = spapr_phb_windows_supported(sphb); - Error *local_err = NULL; if (!spapr) { error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine"); @@ -1964,13 +1964,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) /* Initialize the LSI table */ for (i = 0; i < PCI_NUM_PINS; i++) { - uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; + int irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; if (smc->legacy_irq_allocation) { - irq = spapr_irq_findone(spapr, &local_err); - if (local_err) { - error_propagate_prepend(errp, local_err, - "can't allocate LSIs: "); + irq = spapr_irq_findone(spapr, errp); + if (irq < 0) { + error_prepend(errp, "can't allocate LSIs: "); /* * Older machines will never support PHB hotplug, ie, this is an * init only path and QEMU will terminate. No need to rollback. @@ -1979,9 +1978,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) } } - spapr_irq_claim(spapr, irq, true, &local_err); - if (local_err) { - error_propagate_prepend(errp, local_err, "can't allocate LSIs: "); + if (spapr_irq_claim(spapr, irq, true, errp) < 0) { + error_prepend(errp, "can't allocate LSIs: "); goto unrealize; } diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index 9be930415f..cf7dfa4af5 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -143,7 +143,7 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr, memset(image, '\0', sizeof(image)); /* OpenBIOS nvram variables partition */ - sysp_end = chrp_nvram_create_system_partition(image, 0); + sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); /* Free space partition */ chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 9e30203dcc..37310b73e6 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -136,7 +136,7 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size, memset(image, '\0', sizeof(image)); /* OpenBIOS nvram variables partition */ - sysp_end = chrp_nvram_create_system_partition(image, 0); + sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); /* Free space partition */ chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); diff --git a/include/elf.h b/include/elf.h index 5b06b55f28..c117a4d1ab 100644 --- a/include/elf.h +++ b/include/elf.h @@ -558,6 +558,7 @@ typedef struct { #define PPC_FEATURE2_HTM_NOSC 0x01000000 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #define PPC_FEATURE2_HAS_IEEE128 0x00400000 +#define PPC_FEATURE2_ARCH_3_10 0x00040000 /* Bits present in AT_HWCAP for Sparc. */ diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h index ca4a4b1ad1..880dccd7c0 100644 --- a/include/hw/arm/smmu-common.h +++ b/include/hw/arm/smmu-common.h @@ -50,8 +50,15 @@ typedef struct SMMUTransTableInfo { uint64_t ttb; /* TT base address */ uint8_t tsz; /* input range, ie. 2^(64 -tsz)*/ uint8_t granule_sz; /* granule page shift */ + bool had; /* hierarchical attribute disable */ } SMMUTransTableInfo; +typedef struct SMMUTLBEntry { + IOMMUTLBEntry entry; + uint8_t level; + uint8_t granule; +} SMMUTLBEntry; + /* * Generic structure populated by derived SMMU devices * after decoding the configuration information and used as @@ -91,6 +98,8 @@ typedef struct SMMUPciBus { typedef struct SMMUIOTLBKey { uint64_t iova; uint16_t asid; + uint8_t tg; + uint8_t level; } SMMUIOTLBKey; typedef struct SMMUState { @@ -140,7 +149,7 @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev) * pair, according to @cfg translation config */ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, - IOMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); + SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); /** * select_tt - compute which translation table shall be used according to @@ -153,9 +162,15 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); #define SMMU_IOTLB_MAX_SIZE 256 +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, + SMMUTransTableInfo *tt, hwaddr iova); +void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry); +SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova, + uint8_t tg, uint8_t level); void smmu_iotlb_inv_all(SMMUState *s); void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); -void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova); +void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, + uint8_t tg, uint64_t num_pages, uint8_t ttl); /* Unmap the range of all the notifiers registered to any IOMMU mr */ void smmu_inv_notifiers_all(SMMUState *s); diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h index 36b2f45253..68d7a963e0 100644 --- a/include/hw/arm/smmuv3.h +++ b/include/hw/arm/smmuv3.h @@ -41,6 +41,7 @@ typedef struct SMMUv3State { uint32_t idr[6]; uint32_t iidr; + uint32_t aidr; uint32_t cr[3]; uint32_t cr0ack; uint32_t statusr; diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h index 09941a9be4..4a0f5c21b8 100644 --- a/include/hw/nvram/chrp_nvram.h +++ b/include/hw/nvram/chrp_nvram.h @@ -50,7 +50,8 @@ chrp_nvram_finish_partition(ChrpNvramPartHdr *header, uint32_t size) header->checksum = sum & 0xff; } -int chrp_nvram_create_system_partition(uint8_t *data, int min_len); +/* chrp_nvram_create_system_partition() failure is fatal */ +int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len); int chrp_nvram_create_free_partition(uint8_t *data, int len); #endif diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h index 93d09d68de..0ffbe0be02 100644 --- a/include/hw/ppc/spapr_xive.h +++ b/include/hw/ppc/spapr_xive.h @@ -80,15 +80,15 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, Error **errp); void kvmppc_xive_disconnect(SpaprInterruptController *intc); void kvmppc_xive_reset(SpaprXive *xive, Error **errp); -void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, - Error **errp); +int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, + Error **errp); void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp); uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, uint64_t data, bool write); -void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, +int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, uint32_t end_idx, XiveEND *end, Error **errp); -void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, +int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, uint32_t end_idx, XiveEND *end, Error **errp); void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp); diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 705cf48176..2c42ae92d2 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -191,6 +191,7 @@ typedef struct XiveSource { uint64_t esb_flags; uint32_t esb_shift; MemoryRegion esb_mmio; + MemoryRegion esb_mmio_emulated; /* KVM support */ void *esb_mmap; @@ -215,6 +216,11 @@ static inline bool xive_source_esb_has_2page(XiveSource *xsrc) xsrc->esb_shift == XIVE_ESB_4K_2PAGE; } +static inline size_t xive_source_esb_len(XiveSource *xsrc) +{ + return (1ull << xsrc->esb_shift) * xsrc->nr_irqs; +} + /* The trigger page is always the first/even page */ static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno) { @@ -396,6 +402,7 @@ typedef struct XivePresenterClass { uint8_t nvt_blk, uint32_t nvt_idx, bool cam_ignore, uint8_t priority, uint32_t logic_serv, XiveTCTXMatch *match); + bool (*in_kernel)(const XivePresenter *xptr); } XivePresenterClass; int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, @@ -480,9 +487,9 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb); int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp); void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val); -void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); -void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); -void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); -void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); +int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp); #endif /* PPC_XIVE_H */ diff --git a/target/arm/a32.decode b/target/arm/a32.decode index 0bd952c069..4dfd9139bf 100644 --- a/target/arm/a32.decode +++ b/target/arm/a32.decode @@ -47,6 +47,8 @@ &bfi rd rn lsb msb &sat rd rn satimm imm sh &pkh rd rn rm imm tb +&mcr cp opc1 crn crm opc2 rt +&mcrr cp opc1 crm rt rt2 # Data-processing (register) @@ -529,6 +531,23 @@ LDM_a32 ---- 100 b:1 i:1 u:1 w:1 1 rn:4 list:16 &ldst_block B .... 1010 ........................ @branch BL .... 1011 ........................ @branch +# Coprocessor instructions + +# We decode MCR, MCR, MRRC and MCRR only, because for QEMU the +# other coprocessor instructions always UNDEF. +# The trans_ functions for these will ignore cp values 8..13 for v7 or +# earlier, and 0..13 for v8 and later, because those areas of the +# encoding space may be used for other things, such as VFP or Neon. + +@mcr ---- .... opc1:3 . crn:4 rt:4 cp:4 opc2:3 . crm:4 &mcr +@mcrr ---- .... .... rt2:4 rt:4 cp:4 opc1:4 crm:4 &mcrr + +MCRR .... 1100 0100 .... .... .... .... .... @mcrr +MRRC .... 1100 0101 .... .... .... .... .... @mcrr + +MCR .... 1110 ... 0 .... .... .... ... 1 .... @mcr +MRC .... 1110 ... 1 .... .... .... ... 1 .... @mcr + # Supervisor call SVC ---- 1111 imm:24 &i diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 111579554f..6b382fcd60 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -391,12 +391,15 @@ static void arm_cpu_reset(DeviceState *dev) set_flush_to_zero(1, &env->vfp.standard_fp_status); set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); set_default_nan_mode(1, &env->vfp.standard_fp_status); + set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); set_float_detect_tininess(float_tininess_before_rounding, &env->vfp.fp_status); set_float_detect_tininess(float_tininess_before_rounding, &env->vfp.standard_fp_status); set_float_detect_tininess(float_tininess_before_rounding, &env->vfp.fp_status_f16); + set_float_detect_tininess(float_tininess_before_rounding, + &env->vfp.standard_fp_status_f16); #ifndef CONFIG_USER_ONLY if (kvm_enabled()) { kvm_arm_reset_vcpu(cpu); diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 9e8ed423ea..ac857bdc2c 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -609,6 +609,8 @@ typedef struct CPUARMState { * fp_status: is the "normal" fp status. * fp_status_fp16: used for half-precision calculations * standard_fp_status : the ARM "Standard FPSCR Value" + * standard_fp_status_fp16 : used for half-precision + * calculations with the ARM "Standard FPSCR Value" * * Half-precision operations are governed by a separate * flush-to-zero control bit in FPSCR:FZ16. We pass a separate @@ -619,15 +621,20 @@ typedef struct CPUARMState { * Neon) which the architecture defines as controlled by the * standard FPSCR value rather than the FPSCR. * + * The "standard FPSCR but for fp16 ops" is needed because + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than + * using a fixed value for it. + * * To avoid having to transfer exception bits around, we simply * say that the FPSCR cumulative exception flags are the logical - * OR of the flags in the three fp statuses. This relies on the + * OR of the flags in the four fp statuses. This relies on the * only thing which needs to read the exception flags being * an explicit FPSCR read. */ float_status fp_status; float_status fp_status_f16; float_status standard_fp_status; + float_status standard_fp_status_f16; /* ZCR_EL[1-3] */ uint64_t zcr_el[4]; @@ -1950,7 +1957,6 @@ enum arm_features { ARM_FEATURE_V8, ARM_FEATURE_AARCH64, /* supports 64 bit mode */ ARM_FEATURE_CBAR, /* has cp15 CBAR */ - ARM_FEATURE_CRC, /* ARMv8 CRC instructions */ ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */ ARM_FEATURE_EL2, /* has EL2 Virtualization support */ ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ diff --git a/target/arm/helper.c b/target/arm/helper.c index 455c92b891..6b4f0eb533 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -8462,6 +8462,35 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, assert((r->state != ARM_CP_STATE_AA32) || (r->opc0 == 0)); /* AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless */ assert((r->state != ARM_CP_STATE_AA64) || !(r->type & ARM_CP_64BIT)); + /* + * This API is only for Arm's system coprocessors (14 and 15) or + * (M-profile or v7A-and-earlier only) for implementation defined + * coprocessors in the range 0..7. Our decode assumes this, since + * 8..13 can be used for other insns including VFP and Neon. See + * valid_cp() in translate.c. Assert here that we haven't tried + * to use an invalid coprocessor number. + */ + switch (r->state) { + case ARM_CP_STATE_BOTH: + /* 0 has a special meaning, but otherwise the same rules as AA32. */ + if (r->cp == 0) { + break; + } + /* fall through */ + case ARM_CP_STATE_AA32: + if (arm_feature(&cpu->env, ARM_FEATURE_V8) && + !arm_feature(&cpu->env, ARM_FEATURE_M)) { + assert(r->cp >= 14 && r->cp <= 15); + } else { + assert(r->cp < 8 || (r->cp >= 14 && r->cp <= 15)); + } + break; + case ARM_CP_STATE_AA64: + assert(r->cp == 0 || r->cp == CP_REG_ARM64_SYSREG_CP); + break; + default: + g_assert_not_reached(); + } /* The AArch64 pseudocode CheckSystemAccess() specifies that op1 * encodes a minimum access level for the register. We roll this * runtime check into our general permission check code, so check diff --git a/target/arm/m-nocp.decode b/target/arm/m-nocp.decode new file mode 100644 index 0000000000..7182d7d121 --- /dev/null +++ b/target/arm/m-nocp.decode @@ -0,0 +1,42 @@ +# M-profile UserFault.NOCP exception handling +# +# Copyright (c) 2020 Linaro, Ltd +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +# +# This file is processed by scripts/decodetree.py +# +# For M-profile, the architecture specifies that NOCP UsageFaults +# should take precedence over UNDEF faults over the whole wide +# range of coprocessor-space encodings, with the exception of +# VLLDM and VLSTM. (Compare v8.1M IsCPInstruction() pseudocode and +# v8M Arm ARM rule R_QLGM.) This isn't mandatory for v8.0M but we choose +# to behave the same as v8.1M. +# This decode is handled before any others (and in particular before +# decoding FP instructions which are in the coprocessor space). +# If the coprocessor is not present or disabled then we will generate +# the NOCP exception; otherwise we let the insn through to the main decode. + +{ + # Special cases which do not take an early NOCP: VLLDM and VLSTM + VLLDM_VLSTM 1110 1100 001 l:1 rn:4 0000 1010 0000 0000 + # TODO: VSCCLRM (new in v8.1M) is similar: + #VSCCLRM 1110 1100 1-01 1111 ---- 1011 ---- ---0 + + NOCP 111- 1110 ---- ---- ---- cp:4 ---- ---- + NOCP 111- 110- ---- ---- ---- cp:4 ---- ---- + # TODO: From v8.1M onwards we will also want this range to NOCP + #NOCP_8_1 111- 1111 ---- ---- ---- ---- ---- ---- cp=10 +} diff --git a/target/arm/meson.build b/target/arm/meson.build index bd46cdb523..8990090712 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -5,6 +5,7 @@ gen = [ decodetree.process('neon-ls.decode', extra_args: '--static-decode=disas_neon_ls'), decodetree.process('vfp.decode', extra_args: '--static-decode=disas_vfp'), decodetree.process('vfp-uncond.decode', extra_args: '--static-decode=disas_vfp_uncond'), + decodetree.process('m-nocp.decode', extra_args: '--static-decode=disas_m_nocp'), decodetree.process('a32.decode', extra_args: '--static-decode=disas_a32'), decodetree.process('a32-uncond.decode', extra_args: '--static-decode=disas_a32_uncond'), decodetree.process('t32.decode', extra_args: '--static-decode=disas_t32'), diff --git a/target/arm/t32.decode b/target/arm/t32.decode index c21a988f97..7069d821fd 100644 --- a/target/arm/t32.decode +++ b/target/arm/t32.decode @@ -45,6 +45,8 @@ &sat !extern rd rn satimm imm sh &pkh !extern rd rn rm imm tb &cps !extern mode imod M A I F +&mcr !extern cp opc1 crn crm opc2 rt +&mcrr !extern cp opc1 crm rt rt2 # Data-processing (register) @@ -621,6 +623,23 @@ RFE 1110 1001 10.1 .... 1100000000000000 @rfe pu=1 SRS 1110 1000 00.0 1101 1100 0000 000. .... @srs pu=2 SRS 1110 1001 10.0 1101 1100 0000 000. .... @srs pu=1 +# Coprocessor instructions + +# We decode MCR, MCR, MRRC and MCRR only, because for QEMU the +# other coprocessor instructions always UNDEF. +# The trans_ functions for these will ignore cp values 8..13 for v7 or +# earlier, and 0..13 for v8 and later, because those areas of the +# encoding space may be used for other things, such as VFP or Neon. + +@mcr .... .... opc1:3 . crn:4 rt:4 cp:4 opc2:3 . crm:4 +@mcrr .... .... .... rt2:4 rt:4 cp:4 opc1:4 crm:4 + +MCRR 1110 1100 0100 .... .... .... .... .... @mcrr +MRRC 1110 1100 0101 .... .... .... .... .... @mcrr + +MCR 1110 1110 ... 0 .... .... .... ... 1 .... @mcr +MRC 1110 1110 ... 1 .... .... .... ... 1 .... @mcr + # Branches %imm24 26:s1 13:1 11:1 16:10 0:11 !function=t32_branch24 diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 534c3ff5f3..0fc5e12fab 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -609,25 +609,6 @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) tcg_temp_free_i64(tmp); } -TCGv_ptr get_fpstatus_ptr(bool is_f16) -{ - TCGv_ptr statusptr = tcg_temp_new_ptr(); - int offset; - - /* In A64 all instructions (both FP and Neon) use the FPCR; there - * is no equivalent of the A32 Neon "standard FPSCR value". - * However half-precision operations operate under a different - * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status. - */ - if (is_f16) { - offset = offsetof(CPUARMState, vfp.fp_status_f16); - } else { - offset = offsetof(CPUARMState, vfp.fp_status); - } - tcg_gen_addi_ptr(statusptr, cpu_env, offset); - return statusptr; -} - /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, GVecGen2Fn *gvec_fn, int vece) @@ -689,7 +670,7 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, int rm, bool is_fp16, int data, gen_helper_gvec_3_ptr *fn) { - TCGv_ptr fpst = get_fpstatus_ptr(is_fp16); + TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), fpst, @@ -5898,7 +5879,7 @@ static void handle_fp_compare(DisasContext *s, int size, bool cmp_with_zero, bool signal_all_nans) { TCGv_i64 tcg_flags = tcg_temp_new_i64(); - TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16); + TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); if (size == MO_64) { TCGv_i64 tcg_vn, tcg_vm; @@ -6157,7 +6138,7 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); break; case 0x3: /* FSQRT */ - fpst = get_fpstatus_ptr(true); + fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); break; case 0x8: /* FRINTN */ @@ -6167,7 +6148,7 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) case 0xc: /* FRINTA */ { TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7)); - fpst = get_fpstatus_ptr(true); + fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); @@ -6177,11 +6158,11 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) break; } case 0xe: /* FRINTX */ - fpst = get_fpstatus_ptr(true); + fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); break; case 0xf: /* FRINTI */ - fpst = get_fpstatus_ptr(true); + fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); break; default: @@ -6253,7 +6234,7 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) g_assert_not_reached(); } - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); if (rmode >= 0) { TCGv_i32 tcg_rmode = tcg_const_i32(rmode); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); @@ -6330,7 +6311,7 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) g_assert_not_reached(); } - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); if (rmode >= 0) { TCGv_i32 tcg_rmode = tcg_const_i32(rmode); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); @@ -6365,7 +6346,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, /* Single to half */ TCGv_i32 tcg_rd = tcg_temp_new_i32(); TCGv_i32 ahp = get_ahp_flag(); - TCGv_ptr fpst = get_fpstatus_ptr(false); + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); /* write_fp_sreg is OK here because top half of tcg_rd is zero */ @@ -6385,7 +6366,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, /* Double to single */ gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env); } else { - TCGv_ptr fpst = get_fpstatus_ptr(false); + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); TCGv_i32 ahp = get_ahp_flag(); /* Double to half */ gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); @@ -6401,7 +6382,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, case 0x3: { TCGv_i32 tcg_rn = read_fp_sreg(s, rn); - TCGv_ptr tcg_fpst = get_fpstatus_ptr(false); + TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); TCGv_i32 tcg_ahp = get_ahp_flag(); tcg_gen_ext16u_i32(tcg_rn, tcg_rn); if (dtype == 0) { @@ -6518,7 +6499,7 @@ static void handle_fp_2src_single(DisasContext *s, int opcode, TCGv_ptr fpst; tcg_res = tcg_temp_new_i32(); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); tcg_op1 = read_fp_sreg(s, rn); tcg_op2 = read_fp_sreg(s, rm); @@ -6571,7 +6552,7 @@ static void handle_fp_2src_double(DisasContext *s, int opcode, TCGv_ptr fpst; tcg_res = tcg_temp_new_i64(); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); tcg_op1 = read_fp_dreg(s, rn); tcg_op2 = read_fp_dreg(s, rm); @@ -6624,7 +6605,7 @@ static void handle_fp_2src_half(DisasContext *s, int opcode, TCGv_ptr fpst; tcg_res = tcg_temp_new_i32(); - fpst = get_fpstatus_ptr(true); + fpst = fpstatus_ptr(FPST_FPCR_F16); tcg_op1 = read_fp_hreg(s, rn); tcg_op2 = read_fp_hreg(s, rm); @@ -6723,7 +6704,7 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, { TCGv_i32 tcg_op1, tcg_op2, tcg_op3; TCGv_i32 tcg_res = tcg_temp_new_i32(); - TCGv_ptr fpst = get_fpstatus_ptr(false); + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); tcg_op1 = read_fp_sreg(s, rn); tcg_op2 = read_fp_sreg(s, rm); @@ -6761,7 +6742,7 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, { TCGv_i64 tcg_op1, tcg_op2, tcg_op3; TCGv_i64 tcg_res = tcg_temp_new_i64(); - TCGv_ptr fpst = get_fpstatus_ptr(false); + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); tcg_op1 = read_fp_dreg(s, rn); tcg_op2 = read_fp_dreg(s, rm); @@ -6799,7 +6780,7 @@ static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, { TCGv_i32 tcg_op1, tcg_op2, tcg_op3; TCGv_i32 tcg_res = tcg_temp_new_i32(); - TCGv_ptr fpst = get_fpstatus_ptr(true); + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); tcg_op1 = read_fp_hreg(s, rn); tcg_op2 = read_fp_hreg(s, rm); @@ -6945,7 +6926,7 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, TCGv_i32 tcg_shift, tcg_single; TCGv_i64 tcg_double; - tcg_fpstatus = get_fpstatus_ptr(type == 3); + tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); tcg_shift = tcg_const_i32(64 - scale); @@ -7233,7 +7214,7 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) static void handle_fjcvtzs(DisasContext *s, int rd, int rn) { TCGv_i64 t = read_fp_dreg(s, rn); - TCGv_ptr fpstatus = get_fpstatus_ptr(false); + TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); gen_helper_fjcvtzs(t, t, fpstatus); @@ -7847,7 +7828,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) * Note that correct NaN propagation requires that we do these * operations in exactly the order specified by the pseudocode. */ - TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16); + TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); int fpopcode = opcode | is_min << 4 | is_u << 5; int vmap = (1 << elements) - 1; TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, @@ -8359,7 +8340,7 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) return; } - fpst = get_fpstatus_ptr(size == MO_16); + fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); break; default: unallocated_encoding(s); @@ -8872,7 +8853,7 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, int elements, int is_signed, int fracbits, int size) { - TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16); + TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); TCGv_i32 tcg_shift = NULL; MemOp mop = size | (is_signed ? MO_SIGN : 0); @@ -9053,7 +9034,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, assert(!(is_scalar && is_q)); tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); - tcg_fpstatus = get_fpstatus_ptr(size == MO_16); + tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); fracbits = (16 << size) - immhb; tcg_shift = tcg_const_i32(fracbits); @@ -9392,7 +9373,7 @@ static void handle_3same_float(DisasContext *s, int size, int elements, int fpopcode, int rd, int rn, int rm) { int pass; - TCGv_ptr fpst = get_fpstatus_ptr(false); + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); for (pass = 0; pass < elements; pass++) { if (size) { @@ -9785,7 +9766,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, return; } - fpst = get_fpstatus_ptr(true); + fpst = fpstatus_ptr(FPST_FPCR_F16); tcg_op1 = read_fp_hreg(s, rn); tcg_op2 = read_fp_hreg(s, rm); @@ -10038,7 +10019,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, return; } - fpst = get_fpstatus_ptr(size == MO_16); + fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); if (is_double) { TCGv_i64 tcg_op = tcg_temp_new_i64(); @@ -10168,7 +10149,7 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode, int size, int rn, int rd) { bool is_double = (size == 3); - TCGv_ptr fpst = get_fpstatus_ptr(false); + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); if (is_double) { TCGv_i64 tcg_op = tcg_temp_new_i64(); @@ -10309,7 +10290,7 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, } else { TCGv_i32 tcg_lo = tcg_temp_new_i32(); TCGv_i32 tcg_hi = tcg_temp_new_i32(); - TCGv_ptr fpst = get_fpstatus_ptr(false); + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); TCGv_i32 ahp = get_ahp_flag(); tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); @@ -10571,7 +10552,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) if (is_fcvt) { tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); - tcg_fpstatus = get_fpstatus_ptr(false); + tcg_fpstatus = fpstatus_ptr(FPST_FPCR); gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); } else { tcg_rmode = NULL; @@ -11396,7 +11377,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, /* Floating point operations need fpst */ if (opcode >= 0x58) { - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); } else { fpst = NULL; } @@ -11994,7 +11975,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) break; } - fpst = get_fpstatus_ptr(true); + fpst = fpstatus_ptr(FPST_FPCR_F16); if (pairwise) { int maxpass = is_q ? 8 : 4; @@ -12287,7 +12268,7 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, /* 16 -> 32 bit fp conversion */ int srcelt = is_q ? 4 : 0; TCGv_i32 tcg_res[4]; - TCGv_ptr fpst = get_fpstatus_ptr(false); + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); TCGv_i32 ahp = get_ahp_flag(); for (pass = 0; pass < 4; pass++) { @@ -12759,7 +12740,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } if (need_fpstatus || need_rmode) { - tcg_fpstatus = get_fpstatus_ptr(false); + tcg_fpstatus = fpstatus_ptr(FPST_FPCR); } else { tcg_fpstatus = NULL; } @@ -13149,7 +13130,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) } if (need_rmode || need_fpst) { - tcg_fpstatus = get_fpstatus_ptr(true); + tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); } if (need_rmode) { @@ -13458,7 +13439,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } if (is_fp) { - fpst = get_fpstatus_ptr(is_fp16); + fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); } else { fpst = NULL; } diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index 647f0c74f6..2e0d16da25 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -37,7 +37,6 @@ TCGv_i64 cpu_reg_sp(DisasContext *s, int reg); TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf); TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf); void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v); -TCGv_ptr get_fpstatus_ptr(bool); bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, unsigned int imms, unsigned int immr); bool sve_access_check(DisasContext *s); diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index 8fbe8cef9f..9879731a52 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -181,7 +181,7 @@ static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) } opr_sz = (1 + a->q) * 8; - fpst = get_fpstatus_ptr(1); + fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD); fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), vfp_reg_offset(1, a->vn), @@ -218,7 +218,7 @@ static bool trans_VCADD(DisasContext *s, arg_VCADD *a) } opr_sz = (1 + a->q) * 8; - fpst = get_fpstatus_ptr(1); + fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD); fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), vfp_reg_offset(1, a->vn), @@ -322,7 +322,7 @@ static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx : gen_helper_gvec_fcmlah_idx); opr_sz = (1 + a->q) * 8; - fpst = get_fpstatus_ptr(1); + fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD); tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), vfp_reg_offset(1, a->vn), vfp_reg_offset(1, a->vm), @@ -358,7 +358,7 @@ static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a) fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; opr_sz = (1 + a->q) * 8; - fpst = get_fpstatus_ptr(1); + fpst = fpstatus_ptr(FPST_STD); tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd), vfp_reg_offset(1, a->vn), vfp_reg_offset(1, a->rm), @@ -1063,7 +1063,7 @@ static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn, return true; } - TCGv_ptr fpstatus = get_fpstatus_ptr(1); + TCGv_ptr fpstatus = fpstatus_ptr(FPST_STD); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { tmp = neon_load_reg(a->vn, pass); tmp2 = neon_load_reg(a->vm, pass); @@ -1091,7 +1091,7 @@ static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn, uint32_t rn_ofs, uint32_t rm_ofs, \ uint32_t oprsz, uint32_t maxsz) \ { \ - TCGv_ptr fpst = get_fpstatus_ptr(1); \ + TCGv_ptr fpst = fpstatus_ptr(FPST_STD); \ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \ oprsz, maxsz, 0, FUNC); \ tcg_temp_free_ptr(fpst); \ @@ -1287,7 +1287,7 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn) * early. Since Q is 0 there are always just two passes, so instead * of a complicated loop over each pass we just unroll. */ - fpstatus = get_fpstatus_ptr(1); + fpstatus = fpstatus_ptr(FPST_STD); tmp = neon_load_reg(a->vn, 0); tmp2 = neon_load_reg(a->vn, 1); fn(tmp, tmp, tmp2, fpstatus); @@ -1790,7 +1790,7 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, return true; } - fpstatus = get_fpstatus_ptr(1); + fpstatus = fpstatus_ptr(FPST_STD); shiftv = tcg_const_i32(a->shift); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { tmp = neon_load_reg(a->vm, pass); @@ -2591,7 +2591,7 @@ static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) #define WRAP_FP_FN(WRAPNAME, FUNC) \ static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \ { \ - TCGv_ptr fpstatus = get_fpstatus_ptr(1); \ + TCGv_ptr fpstatus = fpstatus_ptr(FPST_STD); \ FUNC(rd, rn, rm, fpstatus); \ tcg_temp_free_ptr(fpstatus); \ } @@ -3480,7 +3480,7 @@ static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) return true; } - fpst = get_fpstatus_ptr(true); + fpst = fpstatus_ptr(FPST_STD); ahp = get_ahp_flag(); tmp = neon_load_reg(a->vm, 0); gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); @@ -3528,7 +3528,7 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) return true; } - fpst = get_fpstatus_ptr(true); + fpst = fpstatus_ptr(FPST_STD); ahp = get_ahp_flag(); tmp3 = tcg_temp_new_i32(); tmp = neon_load_reg(a->vm, 0); @@ -3838,7 +3838,7 @@ static bool do_2misc_fp(DisasContext *s, arg_2misc *a, return true; } - fpst = get_fpstatus_ptr(1); + fpst = fpstatus_ptr(FPST_STD); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { TCGv_i32 tmp = neon_load_reg(a->vm, pass); fn(tmp, tmp, fpst); @@ -3932,7 +3932,7 @@ static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode) return true; } - fpst = get_fpstatus_ptr(1); + fpst = fpstatus_ptr(FPST_STD); tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { @@ -3993,7 +3993,7 @@ static bool do_vcvt(DisasContext *s, arg_2misc *a, int rmode, bool is_signed) return true; } - fpst = get_fpstatus_ptr(1); + fpst = fpstatus_ptr(FPST_STD); tcg_shift = tcg_const_i32(0); tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 8c7fbbd503..d97cb37d83 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -3470,7 +3470,7 @@ static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a) if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -3496,7 +3496,7 @@ static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a) if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -3528,7 +3528,7 @@ static void do_reduce(DisasContext *s, arg_rpr_esz *a, tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); - status = get_fpstatus_ptr(a->esz == MO_16); + status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); fn(temp, t_zn, t_pg, status, t_desc); tcg_temp_free_ptr(t_zn); @@ -3570,7 +3570,7 @@ DO_VPZ(FMAXV, fmaxv) static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), @@ -3618,7 +3618,7 @@ static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3_ptr *fn) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), @@ -3670,7 +3670,7 @@ static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a) } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -3710,7 +3710,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) t_pg = tcg_temp_new_ptr(); tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm)); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); - t_fpst = get_fpstatus_ptr(a->esz == MO_16); + t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); @@ -3737,7 +3737,7 @@ static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a, } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -3779,7 +3779,7 @@ static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a, } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -3831,7 +3831,7 @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn)); tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); - status = get_fpstatus_ptr(is_fp16); + status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); fn(t_zd, t_zn, t_pg, scalar, status, desc); @@ -3895,7 +3895,7 @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -3939,7 +3939,7 @@ static bool trans_FCADD(DisasContext *s, arg_FCADD *a) } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -3958,7 +3958,7 @@ static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -4001,7 +4001,7 @@ static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a) } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -4024,7 +4024,7 @@ static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a) tcg_debug_assert(a->rd == a->ra); if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -4045,7 +4045,7 @@ static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg, { if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = get_fpstatus_ptr(is_fp16); + TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), pred_full_reg_offset(s, pg), @@ -4191,7 +4191,7 @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode) if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); TCGv_i32 tmode = tcg_const_i32(mode); - TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); gen_helper_set_rmode(tmode, tmode, status); diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index 2d63fa0d39..4eeafb494a 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -95,14 +95,11 @@ static inline long vfp_f16_offset(unsigned reg, bool top) static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled) { if (s->fp_excp_el) { - if (arm_dc_feature(s, ARM_FEATURE_M)) { - gen_exception_insn(s, s->pc_curr, EXCP_NOCP, syn_uncategorized(), - s->fp_excp_el); - } else { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), - s->fp_excp_el); - } + /* M-profile handled this earlier, in disas_m_nocp() */ + assert (!arm_dc_feature(s, ARM_FEATURE_M)); + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_fp_access_trap(1, 0xe, false), + s->fp_excp_el); return false; } @@ -362,7 +359,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) return true; } - fpst = get_fpstatus_ptr(0); + fpst = fpstatus_ptr(FPST_FPCR); tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); @@ -425,7 +422,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) return true; } - fpst = get_fpstatus_ptr(0); + fpst = fpstatus_ptr(FPST_FPCR); tcg_shift = tcg_const_i32(0); @@ -1234,7 +1231,7 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, f0 = tcg_temp_new_i32(); f1 = tcg_temp_new_i32(); fd = tcg_temp_new_i32(); - fpst = get_fpstatus_ptr(0); + fpst = fpstatus_ptr(FPST_FPCR); neon_load_reg32(f0, vn); neon_load_reg32(f1, vm); @@ -1317,7 +1314,7 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, f0 = tcg_temp_new_i64(); f1 = tcg_temp_new_i64(); fd = tcg_temp_new_i64(); - fpst = get_fpstatus_ptr(0); + fpst = fpstatus_ptr(FPST_FPCR); neon_load_reg64(f0, vn); neon_load_reg64(f1, vm); @@ -1799,7 +1796,7 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) /* VFNMA, VFNMS */ gen_helper_vfp_negs(vd, vd); } - fpst = get_fpstatus_ptr(0); + fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); neon_store_reg32(vd, a->vd); @@ -1890,7 +1887,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) /* VFNMA, VFNMS */ gen_helper_vfp_negd(vd, vd); } - fpst = get_fpstatus_ptr(0); + fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); neon_store_reg64(vd, a->vd); @@ -2174,7 +2171,7 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a) return true; } - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); /* The T bit tells us if we want the low or high 16 bits of Vm */ @@ -2211,7 +2208,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) return true; } - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); /* The T bit tells us if we want the low or high 16 bits of Vm */ @@ -2240,7 +2237,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) return true; } - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); @@ -2277,7 +2274,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) return true; } - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); vm = tcg_temp_new_i64(); @@ -2307,7 +2304,7 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) tmp = tcg_temp_new_i32(); neon_load_reg32(tmp, a->vm); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rints(tmp, tmp, fpst); neon_store_reg32(tmp, a->vd); tcg_temp_free_ptr(fpst); @@ -2339,7 +2336,7 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) tmp = tcg_temp_new_i64(); neon_load_reg64(tmp, a->vm); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rintd(tmp, tmp, fpst); neon_store_reg64(tmp, a->vd); tcg_temp_free_ptr(fpst); @@ -2363,7 +2360,7 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) tmp = tcg_temp_new_i32(); neon_load_reg32(tmp, a->vm); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); tcg_rmode = tcg_const_i32(float_round_to_zero); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); gen_helper_rints(tmp, tmp, fpst); @@ -2400,7 +2397,7 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) tmp = tcg_temp_new_i64(); neon_load_reg64(tmp, a->vm); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); tcg_rmode = tcg_const_i32(float_round_to_zero); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); gen_helper_rintd(tmp, tmp, fpst); @@ -2427,7 +2424,7 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) tmp = tcg_temp_new_i32(); neon_load_reg32(tmp, a->vm); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rints_exact(tmp, tmp, fpst); neon_store_reg32(tmp, a->vd); tcg_temp_free_ptr(fpst); @@ -2459,7 +2456,7 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) tmp = tcg_temp_new_i64(); neon_load_reg64(tmp, a->vm); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rintd_exact(tmp, tmp, fpst); neon_store_reg64(tmp, a->vd); tcg_temp_free_ptr(fpst); @@ -2538,7 +2535,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) vm = tcg_temp_new_i32(); neon_load_reg32(vm, a->vm); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); if (a->s) { /* i32 -> f32 */ gen_helper_vfp_sitos(vm, vm, fpst); @@ -2574,7 +2571,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i64(); neon_load_reg32(vm, a->vm); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); if (a->s) { /* i32 -> f64 */ gen_helper_vfp_sitod(vd, vm, fpst); @@ -2640,7 +2637,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) vd = tcg_temp_new_i32(); neon_load_reg32(vd, a->vd); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); shift = tcg_const_i32(frac_bits); /* Switch on op:U:sx bits */ @@ -2705,7 +2702,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) vd = tcg_temp_new_i64(); neon_load_reg64(vd, a->vd); - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); shift = tcg_const_i32(frac_bits); /* Switch on op:U:sx bits */ @@ -2758,7 +2755,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) return true; } - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); vm = tcg_temp_new_i32(); neon_load_reg32(vm, a->vm); @@ -2800,7 +2797,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) return true; } - fpst = get_fpstatus_ptr(false); + fpst = fpstatus_ptr(FPST_FPCR); vm = tcg_temp_new_i64(); vd = tcg_temp_new_i32(); neon_load_reg64(vm, a->vm); @@ -2842,9 +2839,14 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) !arm_dc_feature(s, ARM_FEATURE_V8)) { return false; } - /* If not secure, UNDEF. */ + /* + * If not secure, UNDEF. We must emit code for this + * rather than returning false so that this takes + * precedence over the m-nocp.decode NOCP fallback. + */ if (!s->v8m_secure) { - return false; + unallocated_encoding(s); + return true; } /* If no fpu, NOP. */ if (!dc_isar_feature(aa32_vfp, s)) { @@ -2863,3 +2865,33 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) s->base.is_jmp = DISAS_UPDATE_EXIT; return true; } + +static bool trans_NOCP(DisasContext *s, arg_NOCP *a) +{ + /* + * Handle M-profile early check for disabled coprocessor: + * all we need to do here is emit the NOCP exception if + * the coprocessor is disabled. Otherwise we return false + * and the real VFP/etc decode will handle the insn. + */ + assert(arm_dc_feature(s, ARM_FEATURE_M)); + + if (a->cp == 11) { + a->cp = 10; + } + /* TODO: in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */ + + if (a->cp != 10) { + gen_exception_insn(s, s->pc_curr, EXCP_NOCP, + syn_uncategorized(), default_exception_el(s)); + return true; + } + + if (s->fp_excp_el != 0) { + gen_exception_insn(s, s->pc_curr, EXCP_NOCP, + syn_uncategorized(), s->fp_excp_el); + return true; + } + + return false; +} diff --git a/target/arm/translate.c b/target/arm/translate.c index 556588d92f..d34c1d351a 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -49,8 +49,6 @@ #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7) #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8) -#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0) - #include "translate.h" #if defined(CONFIG_USER_ONLY) @@ -59,8 +57,9 @@ #define IS_USER(s) (s->user) #endif -/* We reuse the same 64-bit temporaries for efficiency. */ +/* These are TCG temporaries used only by the legacy iwMMXt decoder */ static TCGv_i64 cpu_V0, cpu_V1, cpu_M0; +/* These are TCG globals which alias CPUARMState fields */ static TCGv_i32 cpu_R[16]; TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF; TCGv_i64 cpu_exclusive_addr; @@ -1095,19 +1094,6 @@ static inline void gen_hlt(DisasContext *s, int imm) unallocated_encoding(s); } -static TCGv_ptr get_fpstatus_ptr(int neon) -{ - TCGv_ptr statusptr = tcg_temp_new_ptr(); - int offset; - if (neon) { - offset = offsetof(CPUARMState, vfp.standard_fp_status); - } else { - offset = offsetof(CPUARMState, vfp.fp_status); - } - tcg_gen_addi_ptr(statusptr, cpu_env, offset); - return statusptr; -} - static inline long vfp_reg_offset(bool dp, unsigned reg) { if (dp) { @@ -1176,6 +1162,7 @@ static TCGv_ptr vfp_reg_ptr(bool dp, int reg) #define ARM_CP_RW_BIT (1 << 20) /* Include the VFP and Neon decoders */ +#include "decode-m-nocp.c.inc" #include "translate-vfp.c.inc" #include "translate-neon.c.inc" @@ -2471,21 +2458,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) return 1; } -#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n)) -#define VFP_DREG(reg, insn, bigbit, smallbit) do { \ - if (dc_isar_feature(aa32_simd_r32, s)) { \ - reg = (((insn) >> (bigbit)) & 0x0f) \ - | (((insn) >> ((smallbit) - 4)) & 0x10); \ - } else { \ - if (insn & (1 << (smallbit))) \ - return 1; \ - reg = ((insn) >> (bigbit)) & 0x0f; \ - }} while (0) - -#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22) -#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7) -#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5) - static inline bool use_goto_tb(DisasContext *s, target_ulong dest) { #ifndef CONFIG_USER_ONLY @@ -4544,48 +4516,12 @@ void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -static int disas_coproc_insn(DisasContext *s, uint32_t insn) +static void do_coproc_insn(DisasContext *s, int cpnum, int is64, + int opc1, int crn, int crm, int opc2, + bool isread, int rt, int rt2) { - int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2; const ARMCPRegInfo *ri; - cpnum = (insn >> 8) & 0xf; - - /* First check for coprocessor space used for XScale/iwMMXt insns */ - if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) { - if (extract32(s->c15_cpar, cpnum, 1) == 0) { - return 1; - } - if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { - return disas_iwmmxt_insn(s, insn); - } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) { - return disas_dsp_insn(s, insn); - } - return 1; - } - - /* Otherwise treat as a generic register access */ - is64 = (insn & (1 << 25)) == 0; - if (!is64 && ((insn & (1 << 4)) == 0)) { - /* cdp */ - return 1; - } - - crm = insn & 0xf; - if (is64) { - crn = 0; - opc1 = (insn >> 4) & 0xf; - opc2 = 0; - rt2 = (insn >> 16) & 0xf; - } else { - crn = (insn >> 16) & 0xf; - opc1 = (insn >> 21) & 7; - opc2 = (insn >> 5) & 7; - rt2 = 0; - } - isread = (insn >> 20) & 1; - rt = (insn >> 12) & 0xf; - ri = get_arm_cp_reginfo(s->cp_regs, ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2)); if (ri) { @@ -4593,7 +4529,8 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn) /* Check access permissions */ if (!cp_access_ok(s->current_el, ri, isread)) { - return 1; + unallocated_encoding(s); + return; } if (s->hstr_active || ri->accessfn || @@ -4667,14 +4604,15 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn) /* Handle special cases first */ switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) { case ARM_CP_NOP: - return 0; + return; case ARM_CP_WFI: if (isread) { - return 1; + unallocated_encoding(s); + return; } gen_set_pc_im(s, s->base.pc_next); s->base.is_jmp = DISAS_WFI; - return 0; + return; default: break; } @@ -4734,7 +4672,7 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn) /* Write */ if (ri->type & ARM_CP_CONST) { /* If not forbidden by access permissions, treat as WI */ - return 0; + return; } if (is64) { @@ -4800,7 +4738,7 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn) gen_lookup_tb(s); } - return 0; + return; } /* Unknown register; this might be a guest error or a QEMU @@ -4820,9 +4758,27 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn) s->ns ? "non-secure" : "secure"); } - return 1; + unallocated_encoding(s); + return; } +/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */ +static void disas_xscale_insn(DisasContext *s, uint32_t insn) +{ + int cpnum = (insn >> 8) & 0xf; + + if (extract32(s->c15_cpar, cpnum, 1) == 0) { + unallocated_encoding(s); + } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { + if (disas_iwmmxt_insn(s, insn)) { + unallocated_encoding(s); + } + } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) { + if (disas_dsp_insn(s, insn)) { + unallocated_encoding(s); + } + } +} /* Store a 64-bit value to a register pair. Clobbers val. */ static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val) @@ -5222,6 +5178,68 @@ static int t16_pop_list(DisasContext *s, int x) #include "decode-t32.c.inc" #include "decode-t16.c.inc" +static bool valid_cp(DisasContext *s, int cp) +{ + /* + * Return true if this coprocessor field indicates something + * that's really a possible coprocessor. + * For v7 and earlier, coprocessors 8..15 were reserved for Arm use, + * and of those only cp14 and cp15 were used for registers. + * cp10 and cp11 were used for VFP and Neon, whose decode is + * dealt with elsewhere. With the advent of fp16, cp9 is also + * now part of VFP. + * For v8A and later, the encoding has been tightened so that + * only cp14 and cp15 are valid, and other values aren't considered + * to be in the coprocessor-instruction space at all. v8M still + * permits coprocessors 0..7. + */ + if (arm_dc_feature(s, ARM_FEATURE_V8) && + !arm_dc_feature(s, ARM_FEATURE_M)) { + return cp >= 14; + } + return cp < 8 || cp >= 14; +} + +static bool trans_MCR(DisasContext *s, arg_MCR *a) +{ + if (!valid_cp(s, a->cp)) { + return false; + } + do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2, + false, a->rt, 0); + return true; +} + +static bool trans_MRC(DisasContext *s, arg_MRC *a) +{ + if (!valid_cp(s, a->cp)) { + return false; + } + do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2, + true, a->rt, 0); + return true; +} + +static bool trans_MCRR(DisasContext *s, arg_MCRR *a) +{ + if (!valid_cp(s, a->cp)) { + return false; + } + do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0, + false, a->rt, a->rt2); + return true; +} + +static bool trans_MRRC(DisasContext *s, arg_MRRC *a) +{ + if (!valid_cp(s, a->cp)) { + return false; + } + do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0, + true, a->rt, a->rt2); + return true; +} + /* Helpers to swap operands for reverse-subtract. */ static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b) { @@ -7862,7 +7880,7 @@ static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a) { TCGv_i32 tmp; - /* For A32, ARCH(5) is checked near the start of the uncond block. */ + /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */ if (s->thumb && (a->imm & 2)) { return false; } @@ -8228,7 +8246,10 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) * choose to UNDEF. In ARMv5 and above the space is used * for miscellaneous unconditional instructions. */ - ARCH(5); + if (!arm_dc_feature(s, ARM_FEATURE_V5)) { + unallocated_encoding(s); + return; + } /* Unconditional instructions. */ /* TODO: Perhaps merge these into one decodetree output file. */ @@ -8265,25 +8286,18 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) return; } /* fall back to legacy decoder */ - - switch ((insn >> 24) & 0xf) { - case 0xc: - case 0xd: - case 0xe: - if (((insn >> 8) & 0xe) == 10) { - /* VFP, but failed disas_vfp. */ - goto illegal_op; - } - if (disas_coproc_insn(s, insn)) { - /* Coprocessor. */ - goto illegal_op; + /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */ + if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) { + if (((insn & 0x0c000e00) == 0x0c000000) + && ((insn & 0x03000000) != 0x03000000)) { + /* Coprocessor insn, coprocessor 0 or 1 */ + disas_xscale_insn(s, insn); + return; } - break; - default: - illegal_op: - unallocated_encoding(s); - break; } + +illegal_op: + unallocated_encoding(s); } static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn) @@ -8360,7 +8374,23 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) goto illegal_op; } } else if ((insn & 0xf800e800) != 0xf000e800) { - ARCH(6T2); + if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) { + unallocated_encoding(s); + return; + } + } + + if (arm_dc_feature(s, ARM_FEATURE_M)) { + /* + * NOCP takes precedence over any UNDEF for (almost) the + * entire wide range of coprocessor-space encodings, so check + * for it first before proceeding to actually decode eg VFP + * insns. This decode also handles the few insns which are + * in copro space but do not have NOCP checks (eg VLLDM, VLSTM). + */ + if (disas_m_nocp(s, insn)) { + return; + } } if ((insn & 0xef000000) == 0xef000000) { @@ -8401,52 +8431,9 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) ((insn >> 28) == 0xe && disas_vfp(s, insn))) { return; } - /* fall back to legacy decoder */ - switch ((insn >> 25) & 0xf) { - case 0: case 1: case 2: case 3: - /* 16-bit instructions. Should never happen. */ - abort(); - case 6: case 7: case 14: case 15: - /* Coprocessor. */ - if (arm_dc_feature(s, ARM_FEATURE_M)) { - /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */ - if (extract32(insn, 24, 2) == 3) { - goto illegal_op; /* op0 = 0b11 : unallocated */ - } - - if (((insn >> 8) & 0xe) == 10 && - dc_isar_feature(aa32_fpsp_v2, s)) { - /* FP, and the CPU supports it */ - goto illegal_op; - } else { - /* All other insns: NOCP */ - gen_exception_insn(s, s->pc_curr, EXCP_NOCP, - syn_uncategorized(), - default_exception_el(s)); - } - break; - } - if (((insn >> 24) & 3) == 3) { - /* Neon DP, but failed disas_neon_dp() */ - goto illegal_op; - } else if (((insn >> 8) & 0xe) == 10) { - /* VFP, but failed disas_vfp. */ - goto illegal_op; - } else { - if (insn & (1 << 28)) - goto illegal_op; - if (disas_coproc_insn(s, insn)) { - goto illegal_op; - } - } - break; - case 12: - goto illegal_op; - default: - illegal_op: - unallocated_encoding(s); - } +illegal_op: + unallocated_encoding(s); } static void disas_thumb_insn(DisasContext *s, uint32_t insn) @@ -8567,7 +8554,6 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) cpu_V0 = tcg_temp_new_i64(); cpu_V1 = tcg_temp_new_i64(); - /* FIXME: cpu_M0 can probably be the same as cpu_V0. */ cpu_M0 = tcg_temp_new_i64(); } diff --git a/target/arm/translate.h b/target/arm/translate.h index 16f2699ad7..6d6d4c0f42 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -393,4 +393,56 @@ typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); +/* + * Enum for argument to fpstatus_ptr(). + */ +typedef enum ARMFPStatusFlavour { + FPST_FPCR, + FPST_FPCR_F16, + FPST_STD, + FPST_STD_F16, +} ARMFPStatusFlavour; + +/** + * fpstatus_ptr: return TCGv_ptr to the specified fp_status field + * + * We have multiple softfloat float_status fields in the Arm CPU state struct + * (see the comment in cpu.h for details). Return a TCGv_ptr which has + * been set up to point to the requested field in the CPU state struct. + * The options are: + * + * FPST_FPCR + * for non-FP16 operations controlled by the FPCR + * FPST_FPCR_F16 + * for operations controlled by the FPCR where FPCR.FZ16 is to be used + * FPST_STD + * for A32/T32 Neon operations using the "standard FPSCR value" + * FPST_STD_F16 + * as FPST_STD, but where FPCR.FZ16 is to be used + */ +static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) +{ + TCGv_ptr statusptr = tcg_temp_new_ptr(); + int offset; + + switch (flavour) { + case FPST_FPCR: + offset = offsetof(CPUARMState, vfp.fp_status); + break; + case FPST_FPCR_F16: + offset = offsetof(CPUARMState, vfp.fp_status_f16); + break; + case FPST_STD: + offset = offsetof(CPUARMState, vfp.standard_fp_status); + break; + case FPST_STD_F16: + offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); + break; + default: + g_assert_not_reached(); + } + tcg_gen_addi_ptr(statusptr, cpu_env, offset); + return statusptr; +} + #endif /* TARGET_ARM_TRANSLATE_H */ diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode index 5fd70f975a..2c793e3e87 100644 --- a/target/arm/vfp.decode +++ b/target/arm/vfp.decode @@ -213,5 +213,3 @@ VCVT_sp_int ---- 1110 1.11 110 s:1 .... 1010 rz:1 1.0 .... \ vd=%vd_sp vm=%vm_sp VCVT_dp_int ---- 1110 1.11 110 s:1 .... 1011 rz:1 1.0 .... \ vd=%vd_sp vm=%vm_dp - -VLLDM_VLSTM 1110 1100 001 l:1 rn:4 0000 1010 0000 0000 diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 60dcd4bf14..64266ece62 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -93,6 +93,8 @@ static uint32_t vfp_get_fpscr_from_host(CPUARMState *env) /* FZ16 does not generate an input denormal exception. */ i |= (get_float_exception_flags(&env->vfp.fp_status_f16) & ~float_flag_input_denormal); + i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) + & ~float_flag_input_denormal); return vfp_exceptbits_from_host(i); } @@ -124,7 +126,9 @@ static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val) if (changed & FPCR_FZ16) { bool ftz_enabled = val & FPCR_FZ16; set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16); + set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16); + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); } if (changed & FPCR_FZ) { bool ftz_enabled = val & FPCR_FZ; @@ -146,6 +150,7 @@ static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val) set_float_exception_flags(i, &env->vfp.fp_status); set_float_exception_flags(0, &env->vfp.fp_status_f16); set_float_exception_flags(0, &env->vfp.standard_fp_status); + set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); } #else diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index e7d382ac10..3c4e1b3475 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -2191,6 +2191,8 @@ enum { PPC2_PM_ISA206 = 0x0000000000040000ULL, /* POWER ISA 3.0 */ PPC2_ISA300 = 0x0000000000080000ULL, + /* POWER ISA 3.1 */ + PPC2_ISA310 = 0x0000000000100000ULL, #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \ PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \ @@ -2199,7 +2201,7 @@ enum { PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \ PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \ PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \ - PPC2_ISA300) + PPC2_ISA300 | PPC2_ISA310) }; /*****************************************************************************/ diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 90166cbabd..6a4dccf70c 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -184,7 +184,10 @@ DEF_HELPER_3(vmulosw, void, avr, avr, avr) DEF_HELPER_3(vmuloub, void, avr, avr, avr) DEF_HELPER_3(vmulouh, void, avr, avr, avr) DEF_HELPER_3(vmulouw, void, avr, avr, avr) -DEF_HELPER_3(vmuluwm, void, avr, avr, avr) +DEF_HELPER_3(vmulhsw, void, avr, avr, avr) +DEF_HELPER_3(vmulhuw, void, avr, avr, avr) +DEF_HELPER_3(vmulhsd, void, avr, avr, avr) +DEF_HELPER_3(vmulhud, void, avr, avr, avr) DEF_HELPER_3(vslo, void, avr, avr, avr) DEF_HELPER_3(vsro, void, avr, avr, avr) DEF_HELPER_3(vsrv, void, avr, avr, avr) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 43ebf1daad..b45626f44c 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -523,19 +523,6 @@ void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) r->VsrD(0) = 0; } -#define VARITH_DO(name, op, element) \ - void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ - { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - r->element[i] = a->element[i] op b->element[i]; \ - } \ - } -VARITH_DO(muluwm, *, u32) -#undef VARITH_DO -#undef VARITH - #define VARITHFP(suffix, func) \ void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ ppc_avr_t *b) \ @@ -1099,6 +1086,41 @@ VMUL(uw, u32, VsrW, VsrD, uint64_t) #undef VMUL_DO_ODD #undef VMUL +void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + + for (i = 0; i < 4; i++) { + r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32); + } +} + +void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + + for (i = 0; i < 4; i++) { + r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] * + (uint64_t)b->u32[i]) >> 32); + } +} + +void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + uint64_t discard; + + muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]); + muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); +} + +void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + uint64_t discard; + + mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]); + mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); +} + void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 04db0d865c..fedb9b2271 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -6971,7 +6971,47 @@ static void gen_dform3D(DisasContext *ctx) return gen_invalid(ctx); } +#if defined(TARGET_PPC64) +/* brd */ +static void gen_brd(DisasContext *ctx) +{ + tcg_gen_bswap64_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); +} + +/* brw */ +static void gen_brw(DisasContext *ctx) +{ + tcg_gen_bswap64_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); + tcg_gen_rotli_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 32); + +} + +/* brh */ +static void gen_brh(DisasContext *ctx) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_movi_i64(t0, 0x00ff00ff00ff00ffull); + tcg_gen_shri_i64(t1, cpu_gpr[rS(ctx->opcode)], 8); + tcg_gen_and_i64(t2, t1, t0); + tcg_gen_and_i64(t1, cpu_gpr[rS(ctx->opcode)], t0); + tcg_gen_shli_i64(t1, t1, 8); + tcg_gen_or_i64(cpu_gpr[rA(ctx->opcode)], t1, t2); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); +} +#endif + static opcode_t opcodes[] = { +#if defined(TARGET_PPC64) +GEN_HANDLER_E(brd, 0x1F, 0x1B, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA310), +GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA310), +GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA310), +#endif GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE), GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER), GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER), diff --git a/target/ppc/translate/spe-impl.c.inc b/target/ppc/translate/spe-impl.c.inc index 36b4d5654d..2e6e799a25 100644 --- a/target/ppc/translate/spe-impl.c.inc +++ b/target/ppc/translate/spe-impl.c.inc @@ -349,14 +349,24 @@ static inline void gen_evmergelohi(DisasContext *ctx) } static inline void gen_evsplati(DisasContext *ctx) { - uint64_t imm = ((int32_t)(rA(ctx->opcode) << 27)) >> 27; + uint64_t imm; + if (unlikely(!ctx->spe_enabled)) { + gen_exception(ctx, POWERPC_EXCP_SPEU); + return; + } + imm = ((int32_t)(rA(ctx->opcode) << 27)) >> 27; tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], imm); tcg_gen_movi_tl(cpu_gprh[rD(ctx->opcode)], imm); } static inline void gen_evsplatfi(DisasContext *ctx) { - uint64_t imm = rA(ctx->opcode) << 27; + uint64_t imm; + if (unlikely(!ctx->spe_enabled)) { + gen_exception(ctx, POWERPC_EXCP_SPEU); + return; + } + imm = rA(ctx->opcode) << 27; tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], imm); tcg_gen_movi_tl(cpu_gprh[rD(ctx->opcode)], imm); @@ -389,21 +399,37 @@ static inline void gen_evsel(DisasContext *ctx) static void gen_evsel0(DisasContext *ctx) { + if (unlikely(!ctx->spe_enabled)) { + gen_exception(ctx, POWERPC_EXCP_SPEU); + return; + } gen_evsel(ctx); } static void gen_evsel1(DisasContext *ctx) { + if (unlikely(!ctx->spe_enabled)) { + gen_exception(ctx, POWERPC_EXCP_SPEU); + return; + } gen_evsel(ctx); } static void gen_evsel2(DisasContext *ctx) { + if (unlikely(!ctx->spe_enabled)) { + gen_exception(ctx, POWERPC_EXCP_SPEU); + return; + } gen_evsel(ctx); } static void gen_evsel3(DisasContext *ctx) { + if (unlikely(!ctx->spe_enabled)) { + gen_exception(ctx, POWERPC_EXCP_SPEU); + return; + } gen_evsel(ctx); } @@ -518,6 +544,11 @@ static inline void gen_evmwsmia(DisasContext *ctx) { TCGv_i64 tmp; + if (unlikely(!ctx->spe_enabled)) { + gen_exception(ctx, POWERPC_EXCP_SPEU); + return; + } + gen_evmwsmi(ctx); /* rD := rA * rB */ tmp = tcg_temp_new_i64(); @@ -531,8 +562,13 @@ static inline void gen_evmwsmia(DisasContext *ctx) static inline void gen_evmwsmiaa(DisasContext *ctx) { - TCGv_i64 acc = tcg_temp_new_i64(); - TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 acc; + TCGv_i64 tmp; + + if (unlikely(!ctx->spe_enabled)) { + gen_exception(ctx, POWERPC_EXCP_SPEU); + return; + } gen_evmwsmi(ctx); /* rD := rA * rB */ @@ -892,8 +928,14 @@ static inline void gen_##name(DisasContext *ctx) \ #define GEN_SPEFPUOP_CONV_32_64(name) \ static inline void gen_##name(DisasContext *ctx) \ { \ - TCGv_i64 t0 = tcg_temp_new_i64(); \ - TCGv_i32 t1 = tcg_temp_new_i32(); \ + TCGv_i64 t0; \ + TCGv_i32 t1; \ + if (unlikely(!ctx->spe_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_SPEU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(); \ + t1 = tcg_temp_new_i32(); \ gen_load_gpr64(t0, rB(ctx->opcode)); \ gen_helper_##name(t1, cpu_env, t0); \ tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1); \ @@ -903,8 +945,14 @@ static inline void gen_##name(DisasContext *ctx) \ #define GEN_SPEFPUOP_CONV_64_32(name) \ static inline void gen_##name(DisasContext *ctx) \ { \ - TCGv_i64 t0 = tcg_temp_new_i64(); \ - TCGv_i32 t1 = tcg_temp_new_i32(); \ + TCGv_i64 t0; \ + TCGv_i32 t1; \ + if (unlikely(!ctx->spe_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_SPEU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(); \ + t1 = tcg_temp_new_i32(); \ tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); \ gen_helper_##name(t0, cpu_env, t1); \ gen_store_gpr64(rD(ctx->opcode), t0); \ @@ -914,7 +962,12 @@ static inline void gen_##name(DisasContext *ctx) \ #define GEN_SPEFPUOP_CONV_64_64(name) \ static inline void gen_##name(DisasContext *ctx) \ { \ - TCGv_i64 t0 = tcg_temp_new_i64(); \ + TCGv_i64 t0; \ + if (unlikely(!ctx->spe_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_SPEU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(); \ gen_load_gpr64(t0, rB(ctx->opcode)); \ gen_helper_##name(t0, cpu_env, t0); \ gen_store_gpr64(rD(ctx->opcode), t0); \ @@ -923,13 +976,8 @@ static inline void gen_##name(DisasContext *ctx) \ #define GEN_SPEFPUOP_ARITH2_32_32(name) \ static inline void gen_##name(DisasContext *ctx) \ { \ - TCGv_i32 t0, t1; \ - if (unlikely(!ctx->spe_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_SPEU); \ - return; \ - } \ - t0 = tcg_temp_new_i32(); \ - t1 = tcg_temp_new_i32(); \ + TCGv_i32 t0 = tcg_temp_new_i32(); \ + TCGv_i32 t1 = tcg_temp_new_i32(); \ tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); \ tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); \ gen_helper_##name(t0, cpu_env, t0, t1); \ @@ -958,13 +1006,8 @@ static inline void gen_##name(DisasContext *ctx) \ #define GEN_SPEFPUOP_COMP_32(name) \ static inline void gen_##name(DisasContext *ctx) \ { \ - TCGv_i32 t0, t1; \ - if (unlikely(!ctx->spe_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_SPEU); \ - return; \ - } \ - t0 = tcg_temp_new_i32(); \ - t1 = tcg_temp_new_i32(); \ + TCGv_i32 t0 = tcg_temp_new_i32(); \ + TCGv_i32 t1 = tcg_temp_new_i32(); \ \ tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); \ tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); \ @@ -1074,28 +1117,16 @@ GEN_SPEFPUOP_ARITH2_32_32(efsmul); GEN_SPEFPUOP_ARITH2_32_32(efsdiv); static inline void gen_efsabs(DisasContext *ctx) { - if (unlikely(!ctx->spe_enabled)) { - gen_exception(ctx, POWERPC_EXCP_SPEU); - return; - } tcg_gen_andi_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], (target_long)~0x80000000LL); } static inline void gen_efsnabs(DisasContext *ctx) { - if (unlikely(!ctx->spe_enabled)) { - gen_exception(ctx, POWERPC_EXCP_SPEU); - return; - } tcg_gen_ori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 0x80000000); } static inline void gen_efsneg(DisasContext *ctx) { - if (unlikely(!ctx->spe_enabled)) { - gen_exception(ctx, POWERPC_EXCP_SPEU); - return; - } tcg_gen_xori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 0x80000000); } diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index de2fd136ff..92b9527aff 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -801,18 +801,27 @@ static void trans_vclzd(DisasContext *ctx) GEN_VXFORM(vmuloub, 4, 0); GEN_VXFORM(vmulouh, 4, 1); GEN_VXFORM(vmulouw, 4, 2); -GEN_VXFORM(vmuluwm, 4, 2); +GEN_VXFORM_V(vmuluwm, MO_32, tcg_gen_gvec_mul, 4, 2); GEN_VXFORM_DUAL(vmulouw, PPC_ALTIVEC, PPC_NONE, vmuluwm, PPC_NONE, PPC2_ALTIVEC_207) GEN_VXFORM(vmulosb, 4, 4); GEN_VXFORM(vmulosh, 4, 5); GEN_VXFORM(vmulosw, 4, 6); +GEN_VXFORM_V(vmulld, MO_64, tcg_gen_gvec_mul, 4, 7); GEN_VXFORM(vmuleub, 4, 8); GEN_VXFORM(vmuleuh, 4, 9); GEN_VXFORM(vmuleuw, 4, 10); +GEN_VXFORM(vmulhuw, 4, 10); +GEN_VXFORM(vmulhud, 4, 11); +GEN_VXFORM_DUAL(vmuleuw, PPC_ALTIVEC, PPC_NONE, + vmulhuw, PPC_NONE, PPC2_ISA310); GEN_VXFORM(vmulesb, 4, 12); GEN_VXFORM(vmulesh, 4, 13); GEN_VXFORM(vmulesw, 4, 14); +GEN_VXFORM(vmulhsw, 4, 14); +GEN_VXFORM_DUAL(vmulesw, PPC_ALTIVEC, PPC_NONE, + vmulhsw, PPC_NONE, PPC2_ISA310); +GEN_VXFORM(vmulhsd, 4, 15); GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4); GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5); GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6); diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc index 84e05fb827..f3f4855111 100644 --- a/target/ppc/translate/vmx-ops.c.inc +++ b/target/ppc/translate/vmx-ops.c.inc @@ -48,6 +48,9 @@ GEN_HANDLER_E(name, 0x04, opc2, opc3, inval, PPC_NONE, PPC2_ISA300) GEN_HANDLER_E_2(name, 0x04, opc2, opc3, opc4, 0x00000000, PPC_NONE, \ PPC2_ISA300) +#define GEN_VXFORM_310(name, opc2, opc3) \ +GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA310) + #define GEN_VXFORM_DUAL(name0, name1, opc2, opc3, type0, type1) \ GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1) @@ -104,12 +107,15 @@ GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE), GEN_VXFORM(vmulosb, 4, 4), GEN_VXFORM(vmulosh, 4, 5), GEN_VXFORM_207(vmulosw, 4, 6), +GEN_VXFORM_310(vmulld, 4, 7), GEN_VXFORM(vmuleub, 4, 8), GEN_VXFORM(vmuleuh, 4, 9), -GEN_VXFORM_207(vmuleuw, 4, 10), +GEN_VXFORM_DUAL(vmuleuw, vmulhuw, 4, 10, PPC_ALTIVEC, PPC_NONE), +GEN_VXFORM_310(vmulhud, 4, 11), GEN_VXFORM(vmulesb, 4, 12), GEN_VXFORM(vmulesh, 4, 13), -GEN_VXFORM_207(vmulesw, 4, 14), +GEN_VXFORM_DUAL(vmulesw, vmulhsw, 4, 14, PPC_ALTIVEC, PPC_NONE), +GEN_VXFORM_310(vmulhsd, 4, 15), GEN_VXFORM(vslb, 2, 4), GEN_VXFORM(vslh, 2, 5), GEN_VXFORM_DUAL(vslw, vrlwnm, 2, 6, PPC_ALTIVEC, PPC_NONE), diff --git a/target/ppc/translate_init.c.inc b/target/ppc/translate_init.c.inc index 7e66822b5d..230a062d29 100644 --- a/target/ppc/translate_init.c.inc +++ b/target/ppc/translate_init.c.inc @@ -284,12 +284,24 @@ static void spr_write_atbu(DisasContext *ctx, int sprn, int gprn) ATTRIBUTE_UNUSED static void spr_read_purr(DisasContext *ctx, int gprn, int sprn) { + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_load_purr(cpu_gpr[gprn], cpu_env); + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_stop_exception(ctx); + } } static void spr_write_purr(DisasContext *ctx, int sprn, int gprn) { + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_store_purr(cpu_env, cpu_gpr[gprn]); + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_stop_exception(ctx); + } } /* HDECR */ @@ -319,17 +331,35 @@ static void spr_write_hdecr(DisasContext *ctx, int sprn, int gprn) static void spr_read_vtb(DisasContext *ctx, int gprn, int sprn) { + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_load_vtb(cpu_gpr[gprn], cpu_env); + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_stop_exception(ctx); + } } static void spr_write_vtb(DisasContext *ctx, int sprn, int gprn) { + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_store_vtb(cpu_env, cpu_gpr[gprn]); + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_stop_exception(ctx); + } } static void spr_write_tbu40(DisasContext *ctx, int sprn, int gprn) { + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } gen_helper_store_tbu40(cpu_env, cpu_gpr[gprn]); + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_stop_exception(ctx); + } } #endif @@ -9201,7 +9231,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data) PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 | - PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL; + PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310; pcc->msr_mask = (1ull << MSR_SF) | (1ull << MSR_HV) | (1ull << MSR_TM) | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 3bef3789b3..393c4b30e0 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -564,6 +564,7 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, #define VMULOUH VX4(72) #define VMULOUW VX4(136) /* v2.07 */ #define VMULUWM VX4(137) /* v2.07 */ +#define VMULLD VX4(457) /* v3.10 */ #define VMSUMUHM VX4(38) #define VMRGHB VX4(12) @@ -3022,6 +3023,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) return -1; case MO_32: return have_isa_2_07 ? 1 : -1; + case MO_64: + return have_isa_3_10; } return 0; case INDEX_op_bitsel_vec: @@ -3158,6 +3161,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, static const uint32_t add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, + mul_op[4] = { 0, 0, VMULUWM, VMULLD }, neg_op[4] = { 0, 0, VNEGW, VNEGD }, eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, @@ -3208,8 +3212,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, a1 = 0; break; case INDEX_op_mul_vec: - tcg_debug_assert(vece == MO_32 && have_isa_2_07); - insn = VMULUWM; + insn = mul_op[vece]; break; case INDEX_op_ssadd_vec: insn = ssadd_op[vece]; @@ -3729,6 +3732,11 @@ static void tcg_target_init(TCGContext *s) have_isa = tcg_isa_3_00; } #endif +#ifdef PPC_FEATURE2_ARCH_3_10 + if (hwcap2 & PPC_FEATURE2_ARCH_3_10) { + have_isa = tcg_isa_3_10; + } +#endif #ifdef PPC_FEATURE2_HAS_ISEL /* Prefer explicit instruction from the kernel. */ diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index be5b2901c3..aee38157a2 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -63,6 +63,7 @@ typedef enum { tcg_isa_2_06, tcg_isa_2_07, tcg_isa_3_00, + tcg_isa_3_10, } TCGPowerISA; extern TCGPowerISA have_isa; @@ -72,6 +73,7 @@ extern bool have_vsx; #define have_isa_2_06 (have_isa >= tcg_isa_2_06) #define have_isa_2_07 (have_isa >= tcg_isa_2_07) #define have_isa_3_00 (have_isa >= tcg_isa_3_00) +#define have_isa_3_10 (have_isa >= tcg_isa_3_10) /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ |