diff options
63 files changed, 3276 insertions, 1655 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 448dc951c5..e9d861e8ef 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -343,6 +343,7 @@ L: qemu-riscv@nongnu.org S: Supported F: target/riscv/insn_trans/trans_xthead.c.inc F: target/riscv/xthead*.decode +F: target/riscv/th_* F: disas/riscv-xthead* RISC-V XVentanaCondOps extension diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 2972f75b96..6711b58e0b 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -682,11 +682,14 @@ static inline bool cpu_handle_halt(CPUState *cpu) #ifndef CONFIG_USER_ONLY if (cpu->halted) { const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; + bool leave_halt; if (tcg_ops->cpu_exec_halt) { - tcg_ops->cpu_exec_halt(cpu); + leave_halt = tcg_ops->cpu_exec_halt(cpu); + } else { + leave_halt = cpu_has_work(cpu); } - if (!cpu_has_work(cpu)) { + if (!leave_halt) { return true; } diff --git a/disas/riscv.c b/disas/riscv.c index e236c8b5b7..297cfa2f63 100644 --- a/disas/riscv.c +++ b/disas/riscv.c @@ -2190,7 +2190,22 @@ static const char *csr_name(int csrno) case 0x0383: return "mibound"; case 0x0384: return "mdbase"; case 0x0385: return "mdbound"; - case 0x03a0: return "pmpcfg3"; + case 0x03a0: return "pmpcfg0"; + case 0x03a1: return "pmpcfg1"; + case 0x03a2: return "pmpcfg2"; + case 0x03a3: return "pmpcfg3"; + case 0x03a4: return "pmpcfg4"; + case 0x03a5: return "pmpcfg5"; + case 0x03a6: return "pmpcfg6"; + case 0x03a7: return "pmpcfg7"; + case 0x03a8: return "pmpcfg8"; + case 0x03a9: return "pmpcfg9"; + case 0x03aa: return "pmpcfg10"; + case 0x03ab: return "pmpcfg11"; + case 0x03ac: return "pmpcfg12"; + case 0x03ad: return "pmpcfg13"; + case 0x03ae: return "pmpcfg14"; + case 0x03af: return "pmpcfg15"; case 0x03b0: return "pmpaddr0"; case 0x03b1: return "pmpaddr1"; case 0x03b2: return "pmpaddr2"; @@ -2207,6 +2222,54 @@ static const char *csr_name(int csrno) case 0x03bd: return "pmpaddr13"; case 0x03be: return "pmpaddr14"; case 0x03bf: return "pmpaddr15"; + case 0x03c0: return "pmpaddr16"; + case 0x03c1: return "pmpaddr17"; + case 0x03c2: return "pmpaddr18"; + case 0x03c3: return "pmpaddr19"; + case 0x03c4: return "pmpaddr20"; + case 0x03c5: return "pmpaddr21"; + case 0x03c6: return "pmpaddr22"; + case 0x03c7: return "pmpaddr23"; + case 0x03c8: return "pmpaddr24"; + case 0x03c9: return "pmpaddr25"; + case 0x03ca: return "pmpaddr26"; + case 0x03cb: return "pmpaddr27"; + case 0x03cc: return "pmpaddr28"; + case 0x03cd: return "pmpaddr29"; + case 0x03ce: return "pmpaddr30"; + case 0x03cf: return "pmpaddr31"; + case 0x03d0: return "pmpaddr32"; + case 0x03d1: return "pmpaddr33"; + case 0x03d2: return "pmpaddr34"; + case 0x03d3: return "pmpaddr35"; + case 0x03d4: return "pmpaddr36"; + case 0x03d5: return "pmpaddr37"; + case 0x03d6: return "pmpaddr38"; + case 0x03d7: return "pmpaddr39"; + case 0x03d8: return "pmpaddr40"; + case 0x03d9: return "pmpaddr41"; + case 0x03da: return "pmpaddr42"; + case 0x03db: return "pmpaddr43"; + case 0x03dc: return "pmpaddr44"; + case 0x03dd: return "pmpaddr45"; + case 0x03de: return "pmpaddr46"; + case 0x03df: return "pmpaddr47"; + case 0x03e0: return "pmpaddr48"; + case 0x03e1: return "pmpaddr49"; + case 0x03e2: return "pmpaddr50"; + case 0x03e3: return "pmpaddr51"; + case 0x03e4: return "pmpaddr52"; + case 0x03e5: return "pmpaddr53"; + case 0x03e6: return "pmpaddr54"; + case 0x03e7: return "pmpaddr55"; + case 0x03e8: return "pmpaddr56"; + case 0x03e9: return "pmpaddr57"; + case 0x03ea: return "pmpaddr58"; + case 0x03eb: return "pmpaddr59"; + case 0x03ec: return "pmpaddr60"; + case 0x03ed: return "pmpaddr61"; + case 0x03ee: return "pmpaddr62"; + case 0x03ef: return "pmpaddr63"; case 0x0780: return "mtohost"; case 0x0781: return "mfromhost"; case 0x0782: return "mreset"; diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index 7fcea54d8d..1a06a5feb6 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -146,6 +146,7 @@ the following architecture extensions: - FEAT_UAO (Unprivileged Access Override control) - FEAT_VHE (Virtualization Host Extensions) - FEAT_VMID16 (16-bit VMID) +- FEAT_WFxT (WFE and WFI instructions with timeout) - FEAT_XNX (Translation table stage 2 Unprivileged Execute-never) For information on the specifics of these extensions, please refer diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index c9d7c0dda7..870d30e350 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -86,16 +86,16 @@ undocumented; you can get a complete list by running arm/bananapi_m2u.rst arm/b-l475e-iot01a.rst arm/sabrelite + arm/highbank arm/digic arm/cubieboard arm/emcraft-sf2 - arm/highbank arm/musicpal arm/gumstix arm/mainstone arm/kzm - arm/nrf arm/nseries + arm/nrf arm/nuvoton arm/imx25-pdk arm/orangepi @@ -107,8 +107,8 @@ undocumented; you can get a complete list by running arm/stellaris arm/stm32 arm/virt - arm/xlnx-versal-virt arm/xenpvh + arm/xlnx-versal-virt Emulated CPU architecture support ================================= diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 8b97683a45..1ad60da7aa 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -370,6 +370,7 @@ config ZYNQ select A9MPCORE select CADENCE # UART select PFLASH_CFI02 + select PL310 # cache controller select PL330 select SDHCI select SSI_M25P80 diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index 57c337fd92..e884692f07 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -891,7 +891,7 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) mc->init = sbsa_ref_init; mc->desc = "QEMU 'SBSA Reference' ARM Virtual Machine"; - mc->default_cpu_type = ARM_CPU_TYPE_NAME("neoverse-n1"); + mc->default_cpu_type = ARM_CPU_TYPE_NAME("neoverse-n2"); mc->valid_cpu_types = valid_cpu_types; mc->max_cpus = 512; mc->pci_allow_0_address = true; diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c index fc3abcbe88..7f7a3d23fb 100644 --- a/hw/arm/xilinx_zynq.c +++ b/hw/arm/xilinx_zynq.c @@ -84,9 +84,12 @@ static const int dma_irqs[8] = { 0xe3401000 + ARMV7_IMM16(extract32((val), 16, 16)), /* movt r1 ... */ \ 0xe5801000 + (addr) +#define ZYNQ_MAX_CPUS 2 + struct ZynqMachineState { MachineState parent; Clock *ps_clk; + ARMCPU *cpu[ZYNQ_MAX_CPUS]; }; static void zynq_write_board_setup(ARMCPU *cpu, @@ -176,13 +179,13 @@ static inline int zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq, static void zynq_init(MachineState *machine) { ZynqMachineState *zynq_machine = ZYNQ_MACHINE(machine); - ARMCPU *cpu; MemoryRegion *address_space_mem = get_system_memory(); MemoryRegion *ocm_ram = g_new(MemoryRegion, 1); DeviceState *dev, *slcr; SysBusDevice *busdev; qemu_irq pic[64]; int n; + unsigned int smp_cpus = machine->smp.cpus; /* max 2GB ram */ if (machine->ram_size > 2 * GiB) { @@ -190,21 +193,26 @@ static void zynq_init(MachineState *machine) exit(EXIT_FAILURE); } - cpu = ARM_CPU(object_new(machine->cpu_type)); + for (n = 0; n < smp_cpus; n++) { + Object *cpuobj = object_new(machine->cpu_type); - /* By default A9 CPUs have EL3 enabled. This board does not - * currently support EL3 so the CPU EL3 property is disabled before - * realization. - */ - if (object_property_find(OBJECT(cpu), "has_el3")) { - object_property_set_bool(OBJECT(cpu), "has_el3", false, &error_fatal); - } + /* + * By default A9 CPUs have EL3 enabled. This board does not currently + * support EL3 so the CPU EL3 property is disabled before realization. + */ + if (object_property_find(cpuobj, "has_el3")) { + object_property_set_bool(cpuobj, "has_el3", false, &error_fatal); + } - object_property_set_int(OBJECT(cpu), "midr", ZYNQ_BOARD_MIDR, - &error_fatal); - object_property_set_int(OBJECT(cpu), "reset-cbar", MPCORE_PERIPHBASE, - &error_fatal); - qdev_realize(DEVICE(cpu), NULL, &error_fatal); + object_property_set_int(cpuobj, "midr", ZYNQ_BOARD_MIDR, + &error_fatal); + object_property_set_int(cpuobj, "reset-cbar", MPCORE_PERIPHBASE, + &error_fatal); + + qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); + + zynq_machine->cpu[n] = ARM_CPU(cpuobj); + } /* DDR remapped to address zero. */ memory_region_add_subregion(address_space_mem, 0, machine->ram); @@ -237,14 +245,19 @@ static void zynq_init(MachineState *machine) sysbus_mmio_map(SYS_BUS_DEVICE(slcr), 0, 0xF8000000); dev = qdev_new(TYPE_A9MPCORE_PRIV); - qdev_prop_set_uint32(dev, "num-cpu", 1); + qdev_prop_set_uint32(dev, "num-cpu", smp_cpus); busdev = SYS_BUS_DEVICE(dev); sysbus_realize_and_unref(busdev, &error_fatal); sysbus_mmio_map(busdev, 0, MPCORE_PERIPHBASE); - sysbus_connect_irq(busdev, 0, - qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ)); - sysbus_connect_irq(busdev, 1, - qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_FIQ)); + zynq_binfo.gic_cpu_if_addr = MPCORE_PERIPHBASE + 0x100; + sysbus_create_varargs("l2x0", MPCORE_PERIPHBASE + 0x2000, NULL); + for (n = 0; n < smp_cpus; n++) { + DeviceState *cpudev = DEVICE(zynq_machine->cpu[n]); + sysbus_connect_irq(busdev, (2 * n) + 0, + qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); + sysbus_connect_irq(busdev, (2 * n) + 1, + qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); + } for (n = 0; n < 64; n++) { pic[n] = qdev_get_gpio_in(dev, n); @@ -349,7 +362,7 @@ static void zynq_init(MachineState *machine) zynq_binfo.board_setup_addr = BOARD_SETUP_ADDR; zynq_binfo.write_board_setup = zynq_write_board_setup; - arm_load_kernel(cpu, machine, &zynq_binfo); + arm_load_kernel(zynq_machine->cpu[0], machine, &zynq_binfo); } static void zynq_machine_class_init(ObjectClass *oc, void *data) @@ -361,7 +374,7 @@ static void zynq_machine_class_init(ObjectClass *oc, void *data) MachineClass *mc = MACHINE_CLASS(oc); mc->desc = "Xilinx Zynq Platform Baseboard for Cortex-A9"; mc->init = zynq_init; - mc->max_cpus = 1; + mc->max_cpus = ZYNQ_MAX_CPUS; mc->no_sdcard = 1; mc->ignore_memory_transaction_failures = true; mc->valid_cpu_types = valid_cpu_types; diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index e4b8437f8b..806832439b 100644 --- a/hw/intc/arm_gic.c +++ b/hw/intc/arm_gic.c @@ -1308,12 +1308,15 @@ static void gic_dist_writeb(void *opaque, hwaddr offset, for (i = 0; i < 8; i++) { if (value & (1 << i)) { + int mask = (irq < GIC_INTERNAL) ? (1 << cpu) + : GIC_DIST_TARGET(irq + i); + if (s->security_extn && !attrs.secure && !GIC_DIST_TEST_GROUP(irq + i, 1 << cpu)) { continue; /* Ignore Non-secure access of Group0 IRQ */ } - GIC_DIST_SET_PENDING(irq + i, GIC_DIST_TARGET(irq + i)); + GIC_DIST_SET_PENDING(irq + i, mask); } } } else if (offset < 0x300) { @@ -1407,6 +1410,13 @@ static void gic_dist_writeb(void *opaque, hwaddr offset, value = ALL_CPU_MASK; } s->irq_target[irq] = value & ALL_CPU_MASK; + if (irq >= GIC_INTERNAL && s->irq_state[irq].pending) { + /* + * Changing the target of an interrupt that is currently + * pending updates the set of CPUs it is pending on. + */ + s->irq_state[irq].pending = value & ALL_CPU_MASK; + } } } else if (offset < 0xf00) { /* Interrupt Configuration. */ diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index fc5df0d598..32edd6d07b 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -1000,16 +1000,16 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, qdev_prop_set_bit(dev, "msimode", msimode); qdev_prop_set_bit(dev, "mmode", mmode); + if (parent) { + riscv_aplic_add_child(parent, dev); + } + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); if (!is_kvm_aia(msimode)) { sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); } - if (parent) { - riscv_aplic_add_child(parent, dev); - } - if (!msimode) { for (i = 0; i < num_harts; i++) { CPUState *cpu = cpu_by_arch_id(hartid_base + i); diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index 09878e722c..47281ca853 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -209,8 +209,8 @@ static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry) /* Some RISC-V machines (e.g. opentitan) don't have a fdt. */ if (fdt) { end = start + size; - qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", start); - qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", end); + qemu_fdt_setprop_u64(fdt, "/chosen", "linux,initrd-start", start); + qemu_fdt_setprop_u64(fdt, "/chosen", "linux,initrd-end", end); } } diff --git a/hw/ufs/trace-events b/hw/ufs/trace-events index 665e1a942b..531dcfc686 100644 --- a/hw/ufs/trace-events +++ b/hw/ufs/trace-events @@ -11,13 +11,18 @@ ufs_exec_nop_cmd(uint32_t slot) "UTRLDBR slot %"PRIu32"" ufs_exec_scsi_cmd(uint32_t slot, uint8_t lun, uint8_t opcode) "slot %"PRIu32", lun 0x%"PRIx8", opcode 0x%"PRIx8"" ufs_exec_query_cmd(uint32_t slot, uint8_t opcode) "slot %"PRIu32", opcode 0x%"PRIx8"" ufs_process_uiccmd(uint32_t uiccmd, uint32_t ucmdarg1, uint32_t ucmdarg2, uint32_t ucmdarg3) "uiccmd 0x%"PRIx32", ucmdarg1 0x%"PRIx32", ucmdarg2 0x%"PRIx32", ucmdarg3 0x%"PRIx32"" +ufs_mcq_complete_req(uint8_t qid) "sqid %"PRIu8"" +ufs_mcq_create_sq(uint8_t sqid, uint8_t cqid, uint64_t addr, uint16_t size) "mcq create sq sqid %"PRIu8", cqid %"PRIu8", addr 0x%"PRIx64", size %"PRIu16"" +ufs_mcq_create_cq(uint8_t cqid, uint64_t addr, uint16_t size) "mcq create cq cqid %"PRIu8", addr 0x%"PRIx64", size %"PRIu16"" # error condition ufs_err_dma_read_utrd(uint32_t slot, uint64_t addr) "failed to read utrd. UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64"" ufs_err_dma_read_req_upiu(uint32_t slot, uint64_t addr) "failed to read req upiu. UTRLDBR slot %"PRIu32", request upiu addr %"PRIu64"" ufs_err_dma_read_prdt(uint32_t slot, uint64_t addr) "failed to read prdt. UTRLDBR slot %"PRIu32", prdt addr %"PRIu64"" +ufs_err_dma_read_sq(uint8_t sqid, uint64_t addr) "failed to read sq entry. sqid %"PRIu8", hwaddr %"PRIu64"" ufs_err_dma_write_utrd(uint32_t slot, uint64_t addr) "failed to write utrd. UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64"" ufs_err_dma_write_rsp_upiu(uint32_t slot, uint64_t addr) "failed to write rsp upiu. UTRLDBR slot %"PRIu32", response upiu addr %"PRIu64"" +ufs_err_dma_write_cq(uint8_t cqid, uint64_t addr) "failed to write cq entry. cqid %"PRIu8", hwaddr %"PRIu64"" ufs_err_utrl_slot_error(uint32_t slot) "UTRLDBR slot %"PRIu32" is in error" ufs_err_utrl_slot_busy(uint32_t slot) "UTRLDBR slot %"PRIu32" is busy" ufs_err_unsupport_register_offset(uint32_t offset) "Register offset 0x%"PRIx32" is not yet supported" @@ -31,3 +36,15 @@ ufs_err_query_invalid_opcode(uint8_t opcode) "query request has invalid opcode. ufs_err_query_invalid_idn(uint8_t opcode, uint8_t idn) "query request has invalid idn. opcode: 0x%"PRIx8", idn 0x%"PRIx8"" ufs_err_query_invalid_index(uint8_t opcode, uint8_t index) "query request has invalid index. opcode: 0x%"PRIx8", index 0x%"PRIx8"" ufs_err_invalid_trans_code(uint32_t slot, uint8_t trans_code) "request upiu has invalid transaction code. slot: %"PRIu32", trans_code: 0x%"PRIx8"" +ufs_err_mcq_db_wr_invalid_sqid(uint8_t qid) "invalid mcq sqid %"PRIu8"" +ufs_err_mcq_db_wr_invalid_db(uint8_t qid, uint32_t db) "invalid mcq doorbell sqid %"PRIu8", db %"PRIu32"" +ufs_err_mcq_create_sq_invalid_sqid(uint8_t qid) "invalid mcq sqid %"PRIu8"" +ufs_err_mcq_create_sq_invalid_cqid(uint8_t qid) "invalid mcq cqid %"PRIu8"" +ufs_err_mcq_create_sq_already_exists(uint8_t qid) "mcq sqid %"PRIu8 "already exists" +ufs_err_mcq_delete_sq_invalid_sqid(uint8_t qid) "invalid mcq sqid %"PRIu8"" +ufs_err_mcq_delete_sq_not_exists(uint8_t qid) "mcq sqid %"PRIu8 "not exists" +ufs_err_mcq_create_cq_invalid_cqid(uint8_t qid) "invalid mcq cqid %"PRIu8"" +ufs_err_mcq_create_cq_already_exists(uint8_t qid) "mcq cqid %"PRIu8 "already exists" +ufs_err_mcq_delete_cq_invalid_cqid(uint8_t qid) "invalid mcq cqid %"PRIu8"" +ufs_err_mcq_delete_cq_not_exists(uint8_t qid) "mcq cqid %"PRIu8 "not exists" +ufs_err_mcq_delete_cq_sq_not_deleted(uint8_t sqid, uint8_t cqid) "mcq sq %"PRIu8" still has cq %"PRIu8"" diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c index bac78a32bb..71a88d221c 100644 --- a/hw/ufs/ufs.c +++ b/hw/ufs/ufs.c @@ -9,7 +9,7 @@ */ /** - * Reference Specs: https://www.jedec.org/, 3.1 + * Reference Specs: https://www.jedec.org/, 4.0 * * Usage * ----- @@ -28,10 +28,45 @@ #include "trace.h" #include "ufs.h" -/* The QEMU-UFS device follows spec version 3.1 */ -#define UFS_SPEC_VER 0x0310 +/* The QEMU-UFS device follows spec version 4.0 */ +#define UFS_SPEC_VER 0x0400 #define UFS_MAX_NUTRS 32 #define UFS_MAX_NUTMRS 8 +#define UFS_MCQ_QCFGPTR 2 + +static void ufs_exec_req(UfsRequest *req); +static void ufs_clear_req(UfsRequest *req); + +static inline uint64_t ufs_mcq_reg_addr(UfsHc *u, int qid) +{ + /* Submission Queue MCQ Registers offset (400h) */ + return (UFS_MCQ_QCFGPTR * 0x200) + qid * 0x40; +} + +static inline uint64_t ufs_mcq_op_reg_addr(UfsHc *u, int qid) +{ + /* MCQ Operation & Runtime Registers offset (1000h) */ + return UFS_MCQ_OPR_START + qid * 48; +} + +static inline uint64_t ufs_reg_size(UfsHc *u) +{ + /* Total UFS HCI Register size in bytes */ + return ufs_mcq_op_reg_addr(u, 0) + sizeof(u->mcq_op_reg); +} + +static inline bool ufs_is_mcq_reg(UfsHc *u, uint64_t addr) +{ + uint64_t mcq_reg_addr = ufs_mcq_reg_addr(u, 0); + return addr >= mcq_reg_addr && addr < mcq_reg_addr + sizeof(u->mcq_reg); +} + +static inline bool ufs_is_mcq_op_reg(UfsHc *u, uint64_t addr) +{ + uint64_t mcq_op_reg_addr = ufs_mcq_op_reg_addr(u, 0); + return (addr >= mcq_op_reg_addr && + addr < mcq_op_reg_addr + sizeof(u->mcq_op_reg)); +} static MemTxResult ufs_addr_read(UfsHc *u, hwaddr addr, void *buf, int size) { @@ -181,9 +216,14 @@ static MemTxResult ufs_dma_read_upiu(UfsRequest *req) { MemTxResult ret; - ret = ufs_dma_read_utrd(req); - if (ret) { - return ret; + /* + * In case of MCQ, UTRD has already been read from a SQ, so skip it. + */ + if (!ufs_mcq_req(req)) { + ret = ufs_dma_read_utrd(req); + if (ret) { + return ret; + } } ret = ufs_dma_read_req_upiu(req); @@ -335,6 +375,219 @@ static void ufs_process_uiccmd(UfsHc *u, uint32_t val) ufs_irq_check(u); } +static void ufs_mcq_init_req(UfsHc *u, UfsRequest *req, UfsSq *sq) +{ + memset(req, 0, sizeof(*req)); + + req->hc = u; + req->state = UFS_REQUEST_IDLE; + req->slot = UFS_INVALID_SLOT; + req->sq = sq; +} + +static void ufs_mcq_process_sq(void *opaque) +{ + UfsSq *sq = opaque; + UfsHc *u = sq->u; + UfsSqEntry sqe; + UfsRequest *req; + hwaddr addr; + uint16_t head = ufs_mcq_sq_head(u, sq->sqid); + int err; + + while (!(ufs_mcq_sq_empty(u, sq->sqid) || QTAILQ_EMPTY(&sq->req_list))) { + addr = sq->addr + head; + err = ufs_addr_read(sq->u, addr, (void *)&sqe, sizeof(sqe)); + if (err) { + trace_ufs_err_dma_read_sq(sq->sqid, addr); + return; + } + + head = (head + sizeof(sqe)) % (sq->size * sizeof(sqe)); + ufs_mcq_update_sq_head(u, sq->sqid, head); + + req = QTAILQ_FIRST(&sq->req_list); + QTAILQ_REMOVE(&sq->req_list, req, entry); + + ufs_mcq_init_req(sq->u, req, sq); + memcpy(&req->utrd, &sqe, sizeof(req->utrd)); + + req->state = UFS_REQUEST_RUNNING; + ufs_exec_req(req); + } +} + +static void ufs_mcq_process_cq(void *opaque) +{ + UfsCq *cq = opaque; + UfsHc *u = cq->u; + UfsRequest *req, *next; + MemTxResult ret; + uint32_t tail = ufs_mcq_cq_tail(u, cq->cqid); + + QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) + { + ufs_dma_write_rsp_upiu(req); + + req->cqe.utp_addr = + ((uint64_t)req->utrd.command_desc_base_addr_hi << 32ULL) | + req->utrd.command_desc_base_addr_lo; + req->cqe.utp_addr |= req->sq->sqid; + req->cqe.resp_len = req->utrd.response_upiu_length; + req->cqe.resp_off = req->utrd.response_upiu_offset; + req->cqe.prdt_len = req->utrd.prd_table_length; + req->cqe.prdt_off = req->utrd.prd_table_offset; + req->cqe.status = req->utrd.header.dword_2 & 0xf; + req->cqe.error = 0; + + ret = ufs_addr_write(u, cq->addr + tail, &req->cqe, sizeof(req->cqe)); + if (ret) { + trace_ufs_err_dma_write_cq(cq->cqid, cq->addr + tail); + } + QTAILQ_REMOVE(&cq->req_list, req, entry); + + tail = (tail + sizeof(req->cqe)) % (cq->size * sizeof(req->cqe)); + ufs_mcq_update_cq_tail(u, cq->cqid, tail); + + ufs_clear_req(req); + QTAILQ_INSERT_TAIL(&req->sq->req_list, req, entry); + } + + if (!ufs_mcq_cq_empty(u, cq->cqid)) { + u->mcq_op_reg[cq->cqid].cq_int.is = + FIELD_DP32(u->mcq_op_reg[cq->cqid].cq_int.is, CQIS, TEPS, 1); + + u->reg.is = FIELD_DP32(u->reg.is, IS, CQES, 1); + ufs_irq_check(u); + } +} + +static bool ufs_mcq_create_sq(UfsHc *u, uint8_t qid, uint32_t attr) +{ + UfsMcqReg *reg = &u->mcq_reg[qid]; + UfsSq *sq; + uint8_t cqid = FIELD_EX32(attr, SQATTR, CQID); + + if (qid >= u->params.mcq_maxq) { + trace_ufs_err_mcq_create_sq_invalid_sqid(qid); + return false; + } + + if (u->sq[qid]) { + trace_ufs_err_mcq_create_sq_already_exists(qid); + return false; + } + + if (!u->cq[cqid]) { + trace_ufs_err_mcq_create_sq_invalid_cqid(qid); + return false; + } + + sq = g_malloc0(sizeof(*sq)); + sq->u = u; + sq->sqid = qid; + sq->cq = u->cq[cqid]; + sq->addr = ((uint64_t)reg->squba << 32) | reg->sqlba; + sq->size = ((FIELD_EX32(attr, SQATTR, SIZE) + 1) << 2) / sizeof(UfsSqEntry); + + sq->bh = qemu_bh_new_guarded(ufs_mcq_process_sq, sq, + &DEVICE(u)->mem_reentrancy_guard); + sq->req = g_new0(UfsRequest, sq->size); + QTAILQ_INIT(&sq->req_list); + for (int i = 0; i < sq->size; i++) { + ufs_mcq_init_req(u, &sq->req[i], sq); + QTAILQ_INSERT_TAIL(&sq->req_list, &sq->req[i], entry); + } + + u->sq[qid] = sq; + + trace_ufs_mcq_create_sq(sq->sqid, sq->cq->cqid, sq->addr, sq->size); + return true; +} + +static bool ufs_mcq_delete_sq(UfsHc *u, uint8_t qid) +{ + UfsSq *sq; + + if (qid >= u->params.mcq_maxq) { + trace_ufs_err_mcq_delete_sq_invalid_sqid(qid); + return false; + } + + if (!u->sq[qid]) { + trace_ufs_err_mcq_delete_sq_not_exists(qid); + return false; + } + + sq = u->sq[qid]; + + qemu_bh_delete(sq->bh); + g_free(sq->req); + g_free(sq); + u->sq[qid] = NULL; + return true; +} + +static bool ufs_mcq_create_cq(UfsHc *u, uint8_t qid, uint32_t attr) +{ + UfsMcqReg *reg = &u->mcq_reg[qid]; + UfsCq *cq; + + if (qid >= u->params.mcq_maxq) { + trace_ufs_err_mcq_create_cq_invalid_cqid(qid); + return false; + } + + if (u->cq[qid]) { + trace_ufs_err_mcq_create_cq_already_exists(qid); + return false; + } + + cq = g_malloc0(sizeof(*cq)); + cq->u = u; + cq->cqid = qid; + cq->addr = ((uint64_t)reg->cquba << 32) | reg->cqlba; + cq->size = ((FIELD_EX32(attr, CQATTR, SIZE) + 1) << 2) / sizeof(UfsCqEntry); + + cq->bh = qemu_bh_new_guarded(ufs_mcq_process_cq, cq, + &DEVICE(u)->mem_reentrancy_guard); + QTAILQ_INIT(&cq->req_list); + + u->cq[qid] = cq; + + trace_ufs_mcq_create_cq(cq->cqid, cq->addr, cq->size); + return true; +} + +static bool ufs_mcq_delete_cq(UfsHc *u, uint8_t qid) +{ + UfsCq *cq; + + if (qid >= u->params.mcq_maxq) { + trace_ufs_err_mcq_delete_cq_invalid_cqid(qid); + return false; + } + + if (!u->cq[qid]) { + trace_ufs_err_mcq_delete_cq_not_exists(qid); + return false; + } + + for (int i = 0; i < ARRAY_SIZE(u->sq); i++) { + if (u->sq[i] && u->sq[i]->cq->cqid == qid) { + trace_ufs_err_mcq_delete_cq_sq_not_deleted(i, qid); + return false; + } + } + + cq = u->cq[qid]; + + qemu_bh_delete(cq->bh); + g_free(cq); + u->cq[qid] = NULL; + return true; +} + static void ufs_write_reg(UfsHc *u, hwaddr offset, uint32_t data, unsigned size) { switch (offset) { @@ -390,6 +643,12 @@ static void ufs_write_reg(UfsHc *u, hwaddr offset, uint32_t data, unsigned size) case A_UCMDARG3: u->reg.ucmdarg3 = data; break; + case A_CONFIG: + u->reg.config = data; + break; + case A_MCQCONFIG: + u->reg.mcqconfig = data; + break; case A_UTRLCLR: case A_UTMRLDBR: case A_UTMRLCLR: @@ -402,18 +661,138 @@ static void ufs_write_reg(UfsHc *u, hwaddr offset, uint32_t data, unsigned size) } } +static void ufs_write_mcq_reg(UfsHc *u, hwaddr offset, uint32_t data, + unsigned size) +{ + int qid = offset / sizeof(UfsMcqReg); + UfsMcqReg *reg = &u->mcq_reg[qid]; + + switch (offset % sizeof(UfsMcqReg)) { + case A_SQATTR: + if (!FIELD_EX32(reg->sqattr, SQATTR, SQEN) && + FIELD_EX32(data, SQATTR, SQEN)) { + if (!ufs_mcq_create_sq(u, qid, data)) { + break; + } + } else if (FIELD_EX32(reg->sqattr, SQATTR, SQEN) && + !FIELD_EX32(data, SQATTR, SQEN)) { + if (!ufs_mcq_delete_sq(u, qid)) { + break; + } + } + reg->sqattr = data; + break; + case A_SQLBA: + reg->sqlba = data; + break; + case A_SQUBA: + reg->squba = data; + break; + case A_SQCFG: + reg->sqcfg = data; + break; + case A_CQATTR: + if (!FIELD_EX32(reg->cqattr, CQATTR, CQEN) && + FIELD_EX32(data, CQATTR, CQEN)) { + if (!ufs_mcq_create_cq(u, qid, data)) { + break; + } + } else if (FIELD_EX32(reg->cqattr, CQATTR, CQEN) && + !FIELD_EX32(data, CQATTR, CQEN)) { + if (!ufs_mcq_delete_cq(u, qid)) { + break; + } + } + reg->cqattr = data; + break; + case A_CQLBA: + reg->cqlba = data; + break; + case A_CQUBA: + reg->cquba = data; + break; + case A_CQCFG: + reg->cqcfg = data; + break; + case A_SQDAO: + case A_SQISAO: + case A_CQDAO: + case A_CQISAO: + trace_ufs_err_unsupport_register_offset(offset); + break; + default: + trace_ufs_err_invalid_register_offset(offset); + break; + } +} + +static void ufs_mcq_process_db(UfsHc *u, uint8_t qid, uint32_t db) +{ + UfsSq *sq; + + if (qid >= u->params.mcq_maxq) { + trace_ufs_err_mcq_db_wr_invalid_sqid(qid); + return; + } + + sq = u->sq[qid]; + if (sq->size * sizeof(UfsSqEntry) <= db) { + trace_ufs_err_mcq_db_wr_invalid_db(qid, db); + return; + } + + ufs_mcq_update_sq_tail(u, sq->sqid, db); + qemu_bh_schedule(sq->bh); +} + +static void ufs_write_mcq_op_reg(UfsHc *u, hwaddr offset, uint32_t data, + unsigned size) +{ + int qid = offset / sizeof(UfsMcqOpReg); + UfsMcqOpReg *opr = &u->mcq_op_reg[qid]; + + switch (offset % sizeof(UfsMcqOpReg)) { + case offsetof(UfsMcqOpReg, sq.tp): + if (opr->sq.tp != data) { + ufs_mcq_process_db(u, qid, data); + } + opr->sq.tp = data; + break; + case offsetof(UfsMcqOpReg, cq.hp): + opr->cq.hp = data; + ufs_mcq_update_cq_head(u, qid, data); + break; + case offsetof(UfsMcqOpReg, cq_int.is): + opr->cq_int.is &= ~data; + break; + default: + trace_ufs_err_invalid_register_offset(offset); + break; + } +} + static uint64_t ufs_mmio_read(void *opaque, hwaddr addr, unsigned size) { UfsHc *u = (UfsHc *)opaque; - uint8_t *ptr = (uint8_t *)&u->reg; + uint8_t *ptr; uint64_t value; - - if (addr > sizeof(u->reg) - size) { + uint64_t offset; + + if (addr < sizeof(u->reg)) { + offset = addr; + ptr = (uint8_t *)&u->reg; + } else if (ufs_is_mcq_reg(u, addr)) { + offset = addr - ufs_mcq_reg_addr(u, 0); + ptr = (uint8_t *)&u->mcq_reg; + } else if (ufs_is_mcq_op_reg(u, addr)) { + offset = addr - ufs_mcq_op_reg_addr(u, 0); + ptr = (uint8_t *)&u->mcq_op_reg; + } else { trace_ufs_err_invalid_register_offset(addr); return 0; } - value = *(uint32_t *)(ptr + addr); + value = *(uint32_t *)(ptr + offset); trace_ufs_mmio_read(addr, value, size); return value; } @@ -423,13 +802,17 @@ static void ufs_mmio_write(void *opaque, hwaddr addr, uint64_t data, { UfsHc *u = (UfsHc *)opaque; - if (addr > sizeof(u->reg) - size) { + trace_ufs_mmio_write(addr, data, size); + + if (addr < sizeof(u->reg)) { + ufs_write_reg(u, addr, data, size); + } else if (ufs_is_mcq_reg(u, addr)) { + ufs_write_mcq_reg(u, addr - ufs_mcq_reg_addr(u, 0), data, size); + } else if (ufs_is_mcq_op_reg(u, addr)) { + ufs_write_mcq_op_reg(u, addr - ufs_mcq_op_reg_addr(u, 0), data, size); + } else { trace_ufs_err_invalid_register_offset(addr); - return; } - - trace_ufs_mmio_write(addr, data, size); - ufs_write_reg(u, addr, data, size); } static const MemoryRegionOps ufs_mmio_ops = { @@ -1086,9 +1469,16 @@ void ufs_complete_req(UfsRequest *req, UfsReqResult req_result) req->utrd.header.dword_2 = cpu_to_le32(UFS_OCS_INVALID_CMD_TABLE_ATTR); } - trace_ufs_complete_req(req->slot); req->state = UFS_REQUEST_COMPLETE; - qemu_bh_schedule(u->complete_bh); + + if (ufs_mcq_req(req)) { + trace_ufs_mcq_complete_req(req->sq->sqid); + QTAILQ_INSERT_TAIL(&req->sq->cq->req_list, req, entry); + qemu_bh_schedule(req->sq->cq->bh); + } else { + trace_ufs_complete_req(req->slot); + qemu_bh_schedule(u->complete_bh); + } } static void ufs_clear_req(UfsRequest *req) @@ -1158,6 +1548,11 @@ static bool ufs_check_constraints(UfsHc *u, Error **errp) return false; } + if (u->params.mcq_maxq >= UFS_MAX_MCQ_QNUM) { + error_setg(errp, "mcq-maxq must be less than %d", UFS_MAX_MCQ_QNUM); + return false; + } + return true; } @@ -1189,15 +1584,24 @@ static void ufs_init_state(UfsHc *u) &DEVICE(u)->mem_reentrancy_guard); u->complete_bh = qemu_bh_new_guarded(ufs_sendback_req, u, &DEVICE(u)->mem_reentrancy_guard); + + if (u->params.mcq) { + memset(u->sq, 0, sizeof(u->sq)); + memset(u->cq, 0, sizeof(u->cq)); + } } static void ufs_init_hc(UfsHc *u) { uint32_t cap = 0; + uint32_t mcqconfig = 0; + uint32_t mcqcap = 0; - u->reg_size = pow2ceil(sizeof(UfsReg)); + u->reg_size = pow2ceil(ufs_reg_size(u)); memset(&u->reg, 0, sizeof(u->reg)); + memset(&u->mcq_reg, 0, sizeof(u->mcq_reg)); + memset(&u->mcq_op_reg, 0, sizeof(u->mcq_op_reg)); cap = FIELD_DP32(cap, CAP, NUTRS, (u->params.nutrs - 1)); cap = FIELD_DP32(cap, CAP, RTT, 2); cap = FIELD_DP32(cap, CAP, NUTMRS, (u->params.nutmrs - 1)); @@ -1206,7 +1610,29 @@ static void ufs_init_hc(UfsHc *u) cap = FIELD_DP32(cap, CAP, OODDS, 0); cap = FIELD_DP32(cap, CAP, UICDMETMS, 0); cap = FIELD_DP32(cap, CAP, CS, 0); + cap = FIELD_DP32(cap, CAP, LSDBS, 1); + cap = FIELD_DP32(cap, CAP, MCQS, u->params.mcq); u->reg.cap = cap; + + if (u->params.mcq) { + mcqconfig = FIELD_DP32(mcqconfig, MCQCONFIG, MAC, 0x1f); + u->reg.mcqconfig = mcqconfig; + + mcqcap = FIELD_DP32(mcqcap, MCQCAP, MAXQ, u->params.mcq_maxq - 1); + mcqcap = FIELD_DP32(mcqcap, MCQCAP, RRP, 1); + mcqcap = FIELD_DP32(mcqcap, MCQCAP, QCFGPTR, UFS_MCQ_QCFGPTR); + u->reg.mcqcap = mcqcap; + + for (int i = 0; i < ARRAY_SIZE(u->mcq_reg); i++) { + uint64_t addr = ufs_mcq_op_reg_addr(u, i); + u->mcq_reg[i].sqdao = addr; + u->mcq_reg[i].sqisao = addr + sizeof(UfsMcqSqReg); + addr += sizeof(UfsMcqSqReg); + u->mcq_reg[i].cqdao = addr + sizeof(UfsMcqSqIntReg); + addr += sizeof(UfsMcqSqIntReg); + u->mcq_reg[i].cqisao = addr + sizeof(UfsMcqCqReg); + } + } u->reg.ver = UFS_SPEC_VER; memset(&u->device_desc, 0, sizeof(DeviceDescriptor)); @@ -1288,12 +1714,25 @@ static void ufs_exit(PCIDevice *pci_dev) ufs_clear_req(&u->req_list[i]); } g_free(u->req_list); + + for (int i = 0; i < ARRAY_SIZE(u->sq); i++) { + if (u->sq[i]) { + ufs_mcq_delete_sq(u, i); + } + } + for (int i = 0; i < ARRAY_SIZE(u->cq); i++) { + if (u->cq[i]) { + ufs_mcq_delete_cq(u, i); + } + } } static Property ufs_props[] = { DEFINE_PROP_STRING("serial", UfsHc, params.serial), DEFINE_PROP_UINT8("nutrs", UfsHc, params.nutrs, 32), DEFINE_PROP_UINT8("nutmrs", UfsHc, params.nutmrs, 8), + DEFINE_PROP_BOOL("mcq", UfsHc, params.mcq, false), + DEFINE_PROP_UINT8("mcq-maxq", UfsHc, params.mcq_maxq, 2), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/ufs/ufs.h b/hw/ufs/ufs.h index 8fda94f4ef..6c9382cbc4 100644 --- a/hw/ufs/ufs.h +++ b/hw/ufs/ufs.h @@ -16,6 +16,7 @@ #include "block/ufs.h" #define UFS_MAX_LUS 32 +#define UFS_MAX_MCQ_QNUM 32 #define UFS_BLOCK_SIZE_SHIFT 12 #define UFS_BLOCK_SIZE (1 << UFS_BLOCK_SIZE_SHIFT) @@ -45,10 +46,11 @@ typedef enum UfsReqResult { UFS_REQUEST_NO_COMPLETE = 2, } UfsReqResult; +#define UFS_INVALID_SLOT (-1) typedef struct UfsRequest { struct UfsHc *hc; UfsRequestState state; - int slot; + int slot; /* -1 when it's a MCQ request */ UtpTransferReqDesc utrd; UtpUpiuReq req_upiu; @@ -57,8 +59,18 @@ typedef struct UfsRequest { /* for scsi command */ QEMUSGList *sg; uint32_t data_len; + + /* for MCQ */ + struct UfsSq *sq; + struct UfsCqEntry cqe; + QTAILQ_ENTRY(UfsRequest) entry; } UfsRequest; +static inline bool ufs_mcq_req(UfsRequest *req) +{ + return req->sq != NULL; +} + struct UfsLu; typedef UfsReqResult (*UfsScsiOp)(struct UfsLu *, UfsRequest *); @@ -76,13 +88,43 @@ typedef struct UfsParams { char *serial; uint8_t nutrs; /* Number of UTP Transfer Request Slots */ uint8_t nutmrs; /* Number of UTP Task Management Request Slots */ + bool mcq; /* Multiple Command Queue support */ + uint8_t mcq_qcfgptr; /* MCQ Queue Configuration Pointer in MCQCAP */ + uint8_t mcq_maxq; /* MCQ Maximum number of Queues */ } UfsParams; +/* + * MCQ Properties + */ +typedef struct UfsSq { + struct UfsHc *u; + uint8_t sqid; + struct UfsCq *cq; + uint64_t addr; + uint16_t size; /* A number of entries (qdepth) */ + + QEMUBH *bh; /* Bottom half to process requests in async */ + UfsRequest *req; + QTAILQ_HEAD(, UfsRequest) req_list; /* Free request list */ +} UfsSq; + +typedef struct UfsCq { + struct UfsHc *u; + uint8_t cqid; + uint64_t addr; + uint16_t size; /* A number of entries (qdepth) */ + + QEMUBH *bh; + QTAILQ_HEAD(, UfsRequest) req_list; +} UfsCq; + typedef struct UfsHc { PCIDevice parent_obj; UfsBus bus; MemoryRegion iomem; UfsReg reg; + UfsMcqReg mcq_reg[UFS_MAX_MCQ_QNUM]; + UfsMcqOpReg mcq_op_reg[UFS_MAX_MCQ_QNUM]; UfsParams params; uint32_t reg_size; UfsRequest *req_list; @@ -100,8 +142,62 @@ typedef struct UfsHc { qemu_irq irq; QEMUBH *doorbell_bh; QEMUBH *complete_bh; + + /* MCQ properties */ + UfsSq *sq[UFS_MAX_MCQ_QNUM]; + UfsCq *cq[UFS_MAX_MCQ_QNUM]; } UfsHc; +static inline uint32_t ufs_mcq_sq_tail(UfsHc *u, uint32_t qid) +{ + return u->mcq_op_reg[qid].sq.tp; +} + +static inline void ufs_mcq_update_sq_tail(UfsHc *u, uint32_t qid, uint32_t db) +{ + u->mcq_op_reg[qid].sq.tp = db; +} + +static inline uint32_t ufs_mcq_sq_head(UfsHc *u, uint32_t qid) +{ + return u->mcq_op_reg[qid].sq.hp; +} + +static inline void ufs_mcq_update_sq_head(UfsHc *u, uint32_t qid, uint32_t db) +{ + u->mcq_op_reg[qid].sq.hp = db; +} + +static inline bool ufs_mcq_sq_empty(UfsHc *u, uint32_t qid) +{ + return ufs_mcq_sq_tail(u, qid) == ufs_mcq_sq_head(u, qid); +} + +static inline uint32_t ufs_mcq_cq_tail(UfsHc *u, uint32_t qid) +{ + return u->mcq_op_reg[qid].cq.tp; +} + +static inline void ufs_mcq_update_cq_tail(UfsHc *u, uint32_t qid, uint32_t db) +{ + u->mcq_op_reg[qid].cq.tp = db; +} + +static inline uint32_t ufs_mcq_cq_head(UfsHc *u, uint32_t qid) +{ + return u->mcq_op_reg[qid].cq.hp; +} + +static inline void ufs_mcq_update_cq_head(UfsHc *u, uint32_t qid, uint32_t db) +{ + u->mcq_op_reg[qid].cq.hp = db; +} + +static inline bool ufs_mcq_cq_empty(UfsHc *u, uint32_t qid) +{ + return ufs_mcq_cq_tail(u, qid) == ufs_mcq_cq_head(u, qid); +} + #define TYPE_UFS "ufs" #define UFS(obj) OBJECT_CHECK(UfsHc, (obj), TYPE_UFS) diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index fc8fc91a1d..acd6016980 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -927,6 +927,11 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed) case OHCI_TD_DIR_SETUP: str = "setup"; pid = USB_TOKEN_SETUP; + if (OHCI_BM(ed->flags, ED_EN) > 0) { /* setup only allowed to ep 0 */ + trace_usb_ohci_td_bad_pid(str, ed->flags, td.flags); + ohci_die(ohci); + return 1; + } break; default: trace_usb_ohci_td_bad_direction(dir); diff --git a/hw/usb/trace-events b/hw/usb/trace-events index ed7dc210d3..fd7b90d70c 100644 --- a/hw/usb/trace-events +++ b/hw/usb/trace-events @@ -28,6 +28,7 @@ usb_ohci_iso_td_data_overrun(int ret, ssize_t len) "DataOverrun %d > %zu" usb_ohci_iso_td_data_underrun(int ret) "DataUnderrun %d" usb_ohci_iso_td_nak(int ret) "got NAK/STALL %d" usb_ohci_iso_td_bad_response(int ret) "Bad device response %d" +usb_ohci_td_bad_pid(const char *s, uint32_t edf, uint32_t tdf) "Bad pid %s: ed.flags 0x%x td.flags 0x%x" usb_ohci_port_attach(int index) "port #%d" usb_ohci_port_detach(int index) "port #%d" usb_ohci_port_wakeup(int index) "port #%d" diff --git a/include/block/ufs.h b/include/block/ufs.h index d61598b8f3..92da7a89b9 100644 --- a/include/block/ufs.h +++ b/include/block/ufs.h @@ -7,7 +7,7 @@ typedef struct QEMU_PACKED UfsReg { uint32_t cap; - uint32_t rsvd0; + uint32_t mcqcap; uint32_t ver; uint32_t rsvd1; uint32_t hcpid; @@ -46,6 +46,13 @@ typedef struct QEMU_PACKED UfsReg { uint32_t rsvd7[4]; uint32_t rsvd8[16]; uint32_t ccap; + uint32_t rsvd9[127]; + uint32_t config; + uint32_t rsvd10[3]; + uint32_t rsvd11[28]; + uint32_t mcqconfig; + uint32_t esilba; + uint32_t esiuba; } UfsReg; REG32(CAP, offsetof(UfsReg, cap)) @@ -57,6 +64,15 @@ REG32(CAP, offsetof(UfsReg, cap)) FIELD(CAP, OODDS, 25, 1) FIELD(CAP, UICDMETMS, 26, 1) FIELD(CAP, CS, 28, 1) + FIELD(CAP, LSDBS, 29, 1) + FIELD(CAP, MCQS, 30, 1) +REG32(MCQCAP, offsetof(UfsReg, mcqcap)) + FIELD(MCQCAP, MAXQ, 0, 8) + FIELD(MCQCAP, SP, 8, 1) + FIELD(MCQCAP, RRP, 9, 1) + FIELD(MCQCAP, EIS, 10, 1) + FIELD(MCQCAP, QCFGPTR, 16, 8) + FIELD(MCQCAP, MIAG, 24, 8) REG32(VER, offsetof(UfsReg, ver)) REG32(HCPID, offsetof(UfsReg, hcpid)) REG32(HCMID, offsetof(UfsReg, hcmid)) @@ -78,6 +94,7 @@ REG32(IS, offsetof(UfsReg, is)) FIELD(IS, HCFES, 16, 1) FIELD(IS, SBFES, 17, 1) FIELD(IS, CEFES, 18, 1) + FIELD(IS, CQES, 20, 1) REG32(IE, offsetof(UfsReg, ie)) FIELD(IE, UTRCE, 0, 1) FIELD(IE, UDEPRIE, 1, 1) @@ -95,6 +112,7 @@ REG32(IE, offsetof(UfsReg, ie)) FIELD(IE, HCFEE, 16, 1) FIELD(IE, SBFEE, 17, 1) FIELD(IE, CEFEE, 18, 1) + FIELD(IE, CQEE, 20, 1) REG32(HCS, offsetof(UfsReg, hcs)) FIELD(HCS, DP, 0, 1) FIELD(HCS, UTRLRDY, 1, 1) @@ -128,9 +146,14 @@ REG32(UCMDARG1, offsetof(UfsReg, ucmdarg1)) REG32(UCMDARG2, offsetof(UfsReg, ucmdarg2)) REG32(UCMDARG3, offsetof(UfsReg, ucmdarg3)) REG32(CCAP, offsetof(UfsReg, ccap)) +REG32(CONFIG, offsetof(UfsReg, config)) + FIELD(CONFIG, QT, 0, 1) +REG32(MCQCONFIG, offsetof(UfsReg, mcqconfig)) + FIELD(MCQCONFIG, MAC, 8, 8) #define UFS_INTR_MASK \ - ((1 << R_IS_CEFES_SHIFT) | (1 << R_IS_SBFES_SHIFT) | \ + ((1 << R_IS_CQES_SHIFT) | \ + (1 << R_IS_CEFES_SHIFT) | (1 << R_IS_SBFES_SHIFT) | \ (1 << R_IS_HCFES_SHIFT) | (1 << R_IS_UTPES_SHIFT) | \ (1 << R_IS_DFES_SHIFT) | (1 << R_IS_UCCS_SHIFT) | \ (1 << R_IS_UTMRCS_SHIFT) | (1 << R_IS_ULSS_SHIFT) | \ @@ -157,6 +180,84 @@ REG32(CCAP, offsetof(UfsReg, ccap)) ((be32_to_cpu(dword2) >> UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_SHIFT) & \ UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_MASK) +typedef struct QEMU_PACKED UfsMcqReg { + uint32_t sqattr; + uint32_t sqlba; + uint32_t squba; + uint32_t sqdao; + uint32_t sqisao; + uint32_t sqcfg; + uint32_t rsvd0[2]; + uint32_t cqattr; + uint32_t cqlba; + uint32_t cquba; + uint32_t cqdao; + uint32_t cqisao; + uint32_t cqcfg; + uint32_t rsvd1[2]; +} UfsMcqReg; + +REG32(SQATTR, offsetof(UfsMcqReg, sqattr)) + FIELD(SQATTR, SIZE, 0, 16) + FIELD(SQATTR, CQID, 16, 8) + FIELD(SQATTR, SQPL, 28, 3) + FIELD(SQATTR, SQEN, 31, 1) +REG32(SQLBA, offsetof(UfsMcqReg, sqlba)) +REG32(SQUBA, offsetof(UfsMcqReg, squba)) +REG32(SQDAO, offsetof(UfsMcqReg, sqdao)) +REG32(SQISAO, offsetof(UfsMcqReg, sqisao)) +REG32(SQCFG, offsetof(UfsMcqReg, sqcfg)) +REG32(CQATTR, offsetof(UfsMcqReg, cqattr)) + FIELD(CQATTR, SIZE, 0, 16) + FIELD(CQATTR, CQEN, 31, 1) +REG32(CQLBA, offsetof(UfsMcqReg, cqlba)) +REG32(CQUBA, offsetof(UfsMcqReg, cquba)) +REG32(CQDAO, offsetof(UfsMcqReg, cqdao)) +REG32(CQISAO, offsetof(UfsMcqReg, cqisao)) +REG32(CQCFG, offsetof(UfsMcqReg, cqcfg)) + +typedef struct QEMU_PACKED UfsMcqSqReg { + uint32_t hp; + uint32_t tp; + uint32_t rtc; + uint32_t cti; + uint32_t rts; +} UfsMcqSqReg; + +typedef struct QEMU_PACKED UfsMcqCqReg { + uint32_t hp; + uint32_t tp; +} UfsMcqCqReg; + +typedef struct QEMU_PACKED UfsMcqSqIntReg { + uint32_t is; + uint32_t ie; +} UfsMcqSqIntReg; + +typedef struct QEMU_PACKED UfsMcqCqIntReg { + uint32_t is; + uint32_t ie; + uint32_t iacr; +} UfsMcqCqIntReg; + +REG32(CQIS, offsetof(UfsMcqCqIntReg, is)) + FIELD(CQIS, TEPS, 0, 1) + +/* + * Provide MCQ Operation & Runtime Registers as a contiguous addressed + * registers for the simplicity. + * DAO(Doorbell Address Offset) and ISAO(Interrupt Status Register Address + * Offset) registers should be properly configured with the following + * structure. + */ +#define UFS_MCQ_OPR_START 0x1000 +typedef struct QEMU_PACKED UfsMcqOpReg { + UfsMcqSqReg sq; + UfsMcqSqIntReg sq_int; + UfsMcqCqReg cq; + UfsMcqCqIntReg cq_int; +} UfsMcqOpReg; + typedef struct QEMU_PACKED DeviceDescriptor { uint8_t length; uint8_t descriptor_idn; @@ -1064,9 +1165,31 @@ typedef struct QEMU_PACKED UtpUpiuRsp { }; } UtpUpiuRsp; +/* + * MCQ Completion Queue Entry + */ +typedef UtpTransferReqDesc UfsSqEntry; +typedef struct QEMU_PACKED UfsCqEntry { + uint64_t utp_addr; + uint16_t resp_len; + uint16_t resp_off; + uint16_t prdt_len; + uint16_t prdt_off; + uint8_t status; + uint8_t error; + uint16_t rsvd1; + uint32_t rsvd2[3]; +} UfsCqEntry; + static inline void _ufs_check_size(void) { - QEMU_BUILD_BUG_ON(sizeof(UfsReg) != 0x104); + QEMU_BUILD_BUG_ON(sizeof(UfsReg) != 0x38C); + QEMU_BUILD_BUG_ON(sizeof(UfsMcqReg) != 64); + QEMU_BUILD_BUG_ON(sizeof(UfsMcqSqReg) != 20); + QEMU_BUILD_BUG_ON(sizeof(UfsMcqCqReg) != 8); + QEMU_BUILD_BUG_ON(sizeof(UfsMcqSqIntReg) != 8); + QEMU_BUILD_BUG_ON(sizeof(UfsMcqCqIntReg) != 12); + QEMU_BUILD_BUG_ON(sizeof(UfsMcqOpReg) != 48); QEMU_BUILD_BUG_ON(sizeof(DeviceDescriptor) != 89); QEMU_BUILD_BUG_ON(sizeof(GeometryDescriptor) != 87); QEMU_BUILD_BUG_ON(sizeof(UnitDescriptor) != 45); @@ -1086,5 +1209,7 @@ static inline void _ufs_check_size(void) QEMU_BUILD_BUG_ON(sizeof(UtpTaskReqDesc) != 80); QEMU_BUILD_BUG_ON(sizeof(UtpCmdRsp) != 40); QEMU_BUILD_BUG_ON(sizeof(UtpUpiuRsp) != 288); + QEMU_BUILD_BUG_ON(sizeof(UfsSqEntry) != 32); + QEMU_BUILD_BUG_ON(sizeof(UfsCqEntry) != 32); } #endif diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h index 9387d38748..099de3375e 100644 --- a/include/hw/core/tcg-cpu-ops.h +++ b/include/hw/core/tcg-cpu-ops.h @@ -115,8 +115,19 @@ struct TCGCPUOps { void (*do_interrupt)(CPUState *cpu); /** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */ bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request); - /** @cpu_exec_halt: Callback for handling halt in cpu_exec */ - void (*cpu_exec_halt)(CPUState *cpu); + /** + * @cpu_exec_halt: Callback for handling halt in cpu_exec. + * + * The target CPU should do any special processing here that it needs + * to do when the CPU is in the halted state. + * + * Return true to indicate that the CPU should now leave halt, false + * if it should remain in the halted state. + * + * If this method is not provided, the default is to do nothing, and + * to leave halt if cpu_has_work() returns true. + */ + bool (*cpu_exec_halt)(CPUState *cpu); /** * @tlb_fill: Handle a softmmu tlb miss * diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h index b300d0446d..c59ca104fe 100644 --- a/target/arm/cpu-features.h +++ b/target/arm/cpu-features.h @@ -571,6 +571,11 @@ static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0; } +static inline bool isar_feature_aa64_wfxt(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, WFXT) >= 2; +} + static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0; diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 77f8c9c748..35fa281f1b 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1132,6 +1132,35 @@ static bool arm_cpu_virtio_is_big_endian(CPUState *cs) return arm_cpu_data_is_big_endian(env); } +#ifdef CONFIG_TCG +static bool arm_cpu_exec_halt(CPUState *cs) +{ + bool leave_halt = cpu_has_work(cs); + + if (leave_halt) { + /* We're about to come out of WFI/WFE: disable the WFxT timer */ + ARMCPU *cpu = ARM_CPU(cs); + if (cpu->wfxt_timer) { + timer_del(cpu->wfxt_timer); + } + } + return leave_halt; +} +#endif + +static void arm_wfxt_timer_cb(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + + /* + * We expect the CPU to be halted; this will cause arm_cpu_is_work() + * to return true (so we will come out of halt even with no other + * pending interrupt), and the TCG accelerator's cpu_exec_interrupt() + * function auto-clears the CPU_INTERRUPT_EXITTB flag for us. + */ + cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); +} #endif static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) @@ -1877,6 +1906,9 @@ static void arm_cpu_finalizefn(Object *obj) if (cpu->pmu_timer) { timer_free(cpu->pmu_timer); } + if (cpu->wfxt_timer) { + timer_free(cpu->wfxt_timer); + } #endif } @@ -2369,6 +2401,13 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) #endif } +#ifndef CONFIG_USER_ONLY + if (tcg_enabled() && cpu_isar_feature(aa64_wfxt, cpu)) { + cpu->wfxt_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + arm_wfxt_timer_cb, cpu); + } +#endif + if (tcg_enabled()) { /* * Don't report some architectural features in the ID registers @@ -2625,6 +2664,7 @@ static const TCGCPUOps arm_tcg_ops = { #else .tlb_fill = arm_cpu_tlb_fill, .cpu_exec_interrupt = arm_cpu_exec_interrupt, + .cpu_exec_halt = arm_cpu_exec_halt, .do_interrupt = arm_cpu_do_interrupt, .do_transaction_failed = arm_cpu_do_transaction_failed, .do_unaligned_access = arm_cpu_do_unaligned_access, diff --git a/target/arm/cpu.h b/target/arm/cpu.h index c17264c239..3841359d0f 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -866,6 +866,9 @@ struct ArchCPU { * pmu_op_finish() - it does not need other handling during migration */ QEMUTimer *pmu_timer; + /* Timer used for WFxT timeouts */ + QEMUTimer *wfxt_timer; + /* GPIO outputs for generic timer */ qemu_irq gt_timer_outputs[NUM_GTIMERS]; /* GPIO output for GICv3 maintenance interrupt signal */ diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index c15d086049..862d2b92fa 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -109,7 +109,11 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) * No explicit bits enabled, and no implicit bits from sve-max-vq. */ if (!cpu_isar_feature(aa64_sve, cpu)) { - /* SVE is disabled and so are all vector lengths. Good. */ + /* + * SVE is disabled and so are all vector lengths. Good. + * Disable all SVE extensions as well. + */ + cpu->isar.id_aa64zfr0 = 0; return; } diff --git a/target/arm/helper.c b/target/arm/helper.c index 7587635960..ce31957235 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -2665,7 +2665,7 @@ static CPAccessResult gt_stimer_access(CPUARMState *env, } } -static uint64_t gt_get_countervalue(CPUARMState *env) +uint64_t gt_get_countervalue(CPUARMState *env) { ARMCPU *cpu = env_archcpu(env); @@ -2800,7 +2800,7 @@ static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri) return gt_get_countervalue(env) - gt_phys_cnt_offset(env); } -static uint64_t gt_virt_cnt_offset(CPUARMState *env) +uint64_t gt_virt_cnt_offset(CPUARMState *env) { uint64_t hcr; diff --git a/target/arm/helper.h b/target/arm/helper.h index f830531dd3..eca2043fc2 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -53,6 +53,7 @@ DEF_HELPER_2(exception_pc_alignment, noreturn, env, tl) DEF_HELPER_1(setend, void, env) DEF_HELPER_2(wfi, void, env, i32) DEF_HELPER_1(wfe, void, env) +DEF_HELPER_2(wfit, void, env, i64) DEF_HELPER_1(yield, void, env) DEF_HELPER_1(pre_hvc, void, env) DEF_HELPER_2(pre_smc, void, env, i32) @@ -268,50 +269,6 @@ DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, ptr) DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32) /* neon_helper.c */ -DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_uqadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_uqadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_uqadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_uqadd_s64, TCG_CALL_NO_RWG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(neon_sqadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_sqadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_sqadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_sqadd_u64, TCG_CALL_NO_RWG, i64, env, i64, i64) -DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qadd_s64, i64, env, i64, i64) -DEF_HELPER_3(neon_qsub_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qsub_s64, i64, env, i64, i64) - -DEF_HELPER_2(neon_hadd_s8, i32, i32, i32) -DEF_HELPER_2(neon_hadd_u8, i32, i32, i32) -DEF_HELPER_2(neon_hadd_s16, i32, i32, i32) -DEF_HELPER_2(neon_hadd_u16, i32, i32, i32) -DEF_HELPER_2(neon_hadd_s32, s32, s32, s32) -DEF_HELPER_2(neon_hadd_u32, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_s8, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_u8, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_s16, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_u16, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_s32, s32, s32, s32) -DEF_HELPER_2(neon_rhadd_u32, i32, i32, i32) -DEF_HELPER_2(neon_hsub_s8, i32, i32, i32) -DEF_HELPER_2(neon_hsub_u8, i32, i32, i32) -DEF_HELPER_2(neon_hsub_s16, i32, i32, i32) -DEF_HELPER_2(neon_hsub_u16, i32, i32, i32) -DEF_HELPER_2(neon_hsub_s32, s32, s32, s32) -DEF_HELPER_2(neon_hsub_u32, i32, i32, i32) - DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) @@ -351,6 +308,32 @@ DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32) DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32) DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64) DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64) +DEF_HELPER_FLAGS_5(neon_sqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_urshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_2(neon_add_u8, i32, i32, i32) DEF_HELPER_2(neon_add_u16, i32, i32, i32) @@ -836,6 +819,22 @@ DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -970,6 +969,16 @@ DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/internals.h b/target/arm/internals.h index ee3ebd383e..11b5da2562 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1770,4 +1770,12 @@ bool check_watchpoint_in_range(int i, target_ulong addr); CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr); int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type); int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type); + +/* Return the current value of the system counter in ticks */ +uint64_t gt_get_countervalue(CPUARMState *env); +/* + * Return the currently applicable offset between the system counter + * and CNTVCT_EL0 (this will be either 0 or the value of CNTVOFF_EL2). + */ +uint64_t gt_virt_cnt_offset(CPUARMState *env); #endif diff --git a/target/arm/machine.c b/target/arm/machine.c index b2b39b2475..0a722ca7e7 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -242,6 +242,25 @@ static const VMStateDescription vmstate_irq_line_state = { } }; +static bool wfxt_timer_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + + /* We'll only have the timer object if FEAT_WFxT is implemented */ + return cpu->wfxt_timer; +} + +static const VMStateDescription vmstate_wfxt_timer = { + .name = "cpu/wfxt-timer", + .version_id = 1, + .minimum_version_id = 1, + .needed = wfxt_timer_needed, + .fields = (const VMStateField[]) { + VMSTATE_TIMER_PTR(wfxt_timer, ARMCPU), + VMSTATE_END_OF_LIST() + } +}; + static bool m_needed(void *opaque) { ARMCPU *cpu = opaque; @@ -957,6 +976,7 @@ const VMStateDescription vmstate_arm_cpu = { #endif &vmstate_serror, &vmstate_irq_line_state, + &vmstate_wfxt_timer, NULL } }; diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index f48adef5bb..2b7a3254a0 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -32,6 +32,7 @@ &rr_e rd rn esz &rrr_e rd rn rm esz &rrx_e rd rn rm idx esz +&rrrr_e rd rn rm ra esz &qrr_e q rd rn esz &qrrr_e q rd rn rm esz &qrrx_e q rd rn rm idx esz @@ -42,8 +43,11 @@ @rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd @rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1 +@rrr_d ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=3 @rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd @rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd +@rrr_e ........ esz:2 . rm:5 ...... rn:5 rd:5 &rrr_e +@r2r_e ........ esz:2 . ..... ...... rm:5 rd:5 &rrr_e rn=%rd @rrx_h ........ .. .. rm:4 .... . . rn:5 rd:5 &rrx_e esz=1 idx=%hlm @rrx_s ........ .. . rm:5 .... . . rn:5 rd:5 &rrx_e esz=2 idx=%hl @@ -59,6 +63,7 @@ @qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1 @qrrr_sd . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=%esz_sd @qrrr_e . q:1 ...... esz:2 . rm:5 ...... rn:5 rd:5 &qrrr_e +@qr2r_e . q:1 ...... esz:2 . ..... ...... rm:5 rd:5 &qrrr_e rn=%rd @qrrx_h . q:1 .. .... .. .. rm:4 .... . . rn:5 rd:5 \ &qrrx_e esz=1 idx=%hlm @@ -225,6 +230,10 @@ ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB NOP 1101 0101 0000 0011 0010 ---- --- 11111 } +# System instructions with register argument +WFET 1101 0101 0000 0011 0001 0000 000 rd:5 +WFIT 1101 0101 0000 0011 0001 0000 001 rd:5 + # Barriers CLREX 1101 0101 0000 0011 0011 ---- 010 11111 @@ -744,6 +753,35 @@ FRECPS_s 0101 1110 0.1 ..... 11111 1 ..... ..... @rrr_sd FRSQRTS_s 0101 1110 110 ..... 00111 1 ..... ..... @rrr_h FRSQRTS_s 0101 1110 1.1 ..... 11111 1 ..... ..... @rrr_sd +SQADD_s 0101 1110 ..1 ..... 00001 1 ..... ..... @rrr_e +UQADD_s 0111 1110 ..1 ..... 00001 1 ..... ..... @rrr_e +SQSUB_s 0101 1110 ..1 ..... 00101 1 ..... ..... @rrr_e +UQSUB_s 0111 1110 ..1 ..... 00101 1 ..... ..... @rrr_e + +SUQADD_s 0101 1110 ..1 00000 00111 0 ..... ..... @r2r_e +USQADD_s 0111 1110 ..1 00000 00111 0 ..... ..... @r2r_e + +SSHL_s 0101 1110 111 ..... 01000 1 ..... ..... @rrr_d +USHL_s 0111 1110 111 ..... 01000 1 ..... ..... @rrr_d +SRSHL_s 0101 1110 111 ..... 01010 1 ..... ..... @rrr_d +URSHL_s 0111 1110 111 ..... 01010 1 ..... ..... @rrr_d +SQSHL_s 0101 1110 ..1 ..... 01001 1 ..... ..... @rrr_e +UQSHL_s 0111 1110 ..1 ..... 01001 1 ..... ..... @rrr_e +SQRSHL_s 0101 1110 ..1 ..... 01011 1 ..... ..... @rrr_e +UQRSHL_s 0111 1110 ..1 ..... 01011 1 ..... ..... @rrr_e + +ADD_s 0101 1110 111 ..... 10000 1 ..... ..... @rrr_d +SUB_s 0111 1110 111 ..... 10000 1 ..... ..... @rrr_d +CMGT_s 0101 1110 111 ..... 00110 1 ..... ..... @rrr_d +CMHI_s 0111 1110 111 ..... 00110 1 ..... ..... @rrr_d +CMGE_s 0101 1110 111 ..... 00111 1 ..... ..... @rrr_d +CMHS_s 0111 1110 111 ..... 00111 1 ..... ..... @rrr_d +CMTST_s 0101 1110 111 ..... 10001 1 ..... ..... @rrr_d +CMEQ_s 0111 1110 111 ..... 10001 1 ..... ..... @rrr_d + +SQDMULH_s 0101 1110 ..1 ..... 10110 1 ..... ..... @rrr_e +SQRDMULH_s 0111 1110 ..1 ..... 10110 1 ..... ..... @rrr_e + ### Advanced SIMD scalar pairwise FADDP_s 0101 1110 0011 0000 1101 10 ..... ..... @rr_h @@ -857,6 +895,53 @@ BSL_v 0.10 1110 011 ..... 00011 1 ..... ..... @qrrr_b BIT_v 0.10 1110 101 ..... 00011 1 ..... ..... @qrrr_b BIF_v 0.10 1110 111 ..... 00011 1 ..... ..... @qrrr_b +SQADD_v 0.00 1110 ..1 ..... 00001 1 ..... ..... @qrrr_e +UQADD_v 0.10 1110 ..1 ..... 00001 1 ..... ..... @qrrr_e +SQSUB_v 0.00 1110 ..1 ..... 00101 1 ..... ..... @qrrr_e +UQSUB_v 0.10 1110 ..1 ..... 00101 1 ..... ..... @qrrr_e + +SUQADD_v 0.00 1110 ..1 00000 00111 0 ..... ..... @qr2r_e +USQADD_v 0.10 1110 ..1 00000 00111 0 ..... ..... @qr2r_e + +SSHL_v 0.00 1110 ..1 ..... 01000 1 ..... ..... @qrrr_e +USHL_v 0.10 1110 ..1 ..... 01000 1 ..... ..... @qrrr_e +SRSHL_v 0.00 1110 ..1 ..... 01010 1 ..... ..... @qrrr_e +URSHL_v 0.10 1110 ..1 ..... 01010 1 ..... ..... @qrrr_e +SQSHL_v 0.00 1110 ..1 ..... 01001 1 ..... ..... @qrrr_e +UQSHL_v 0.10 1110 ..1 ..... 01001 1 ..... ..... @qrrr_e +SQRSHL_v 0.00 1110 ..1 ..... 01011 1 ..... ..... @qrrr_e +UQRSHL_v 0.10 1110 ..1 ..... 01011 1 ..... ..... @qrrr_e + +ADD_v 0.00 1110 ..1 ..... 10000 1 ..... ..... @qrrr_e +SUB_v 0.10 1110 ..1 ..... 10000 1 ..... ..... @qrrr_e +CMGT_v 0.00 1110 ..1 ..... 00110 1 ..... ..... @qrrr_e +CMHI_v 0.10 1110 ..1 ..... 00110 1 ..... ..... @qrrr_e +CMGE_v 0.00 1110 ..1 ..... 00111 1 ..... ..... @qrrr_e +CMHS_v 0.10 1110 ..1 ..... 00111 1 ..... ..... @qrrr_e +CMTST_v 0.00 1110 ..1 ..... 10001 1 ..... ..... @qrrr_e +CMEQ_v 0.10 1110 ..1 ..... 10001 1 ..... ..... @qrrr_e +SHADD_v 0.00 1110 ..1 ..... 00000 1 ..... ..... @qrrr_e +UHADD_v 0.10 1110 ..1 ..... 00000 1 ..... ..... @qrrr_e +SHSUB_v 0.00 1110 ..1 ..... 00100 1 ..... ..... @qrrr_e +UHSUB_v 0.10 1110 ..1 ..... 00100 1 ..... ..... @qrrr_e +SRHADD_v 0.00 1110 ..1 ..... 00010 1 ..... ..... @qrrr_e +URHADD_v 0.10 1110 ..1 ..... 00010 1 ..... ..... @qrrr_e +SMAX_v 0.00 1110 ..1 ..... 01100 1 ..... ..... @qrrr_e +UMAX_v 0.10 1110 ..1 ..... 01100 1 ..... ..... @qrrr_e +SMIN_v 0.00 1110 ..1 ..... 01101 1 ..... ..... @qrrr_e +UMIN_v 0.10 1110 ..1 ..... 01101 1 ..... ..... @qrrr_e +SABD_v 0.00 1110 ..1 ..... 01110 1 ..... ..... @qrrr_e +UABD_v 0.10 1110 ..1 ..... 01110 1 ..... ..... @qrrr_e +SABA_v 0.00 1110 ..1 ..... 01111 1 ..... ..... @qrrr_e +UABA_v 0.10 1110 ..1 ..... 01111 1 ..... ..... @qrrr_e +MUL_v 0.00 1110 ..1 ..... 10011 1 ..... ..... @qrrr_e +PMUL_v 0.10 1110 001 ..... 10011 1 ..... ..... @qrrr_b +MLA_v 0.00 1110 ..1 ..... 10010 1 ..... ..... @qrrr_e +MLS_v 0.10 1110 ..1 ..... 10010 1 ..... ..... @qrrr_e + +SQDMULH_v 0.00 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e +SQRDMULH_v 0.10 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e + ### Advanced SIMD scalar x indexed element FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h @@ -875,6 +960,12 @@ FMULX_si 0111 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h FMULX_si 0111 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s FMULX_si 0111 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d +SQDMULH_si 0101 1111 01 .. .... 1100 . 0 ..... ..... @rrx_h +SQDMULH_si 0101 1111 10 .. .... 1100 . 0 ..... ..... @rrx_s + +SQRDMULH_si 0101 1111 01 .. .... 1101 . 0 ..... ..... @rrx_h +SQRDMULH_si 0101 1111 10 . ..... 1101 . 0 ..... ..... @rrx_s + ### Advanced SIMD vector x indexed element FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h @@ -897,3 +988,31 @@ FMLAL_vi 0.00 1111 10 .. .... 0000 . 0 ..... ..... @qrrx_h FMLSL_vi 0.00 1111 10 .. .... 0100 . 0 ..... ..... @qrrx_h FMLAL2_vi 0.10 1111 10 .. .... 1000 . 0 ..... ..... @qrrx_h FMLSL2_vi 0.10 1111 10 .. .... 1100 . 0 ..... ..... @qrrx_h + +MUL_vi 0.00 1111 01 .. .... 1000 . 0 ..... ..... @qrrx_h +MUL_vi 0.00 1111 10 . ..... 1000 . 0 ..... ..... @qrrx_s + +MLA_vi 0.10 1111 01 .. .... 0000 . 0 ..... ..... @qrrx_h +MLA_vi 0.10 1111 10 . ..... 0000 . 0 ..... ..... @qrrx_s + +MLS_vi 0.10 1111 01 .. .... 0100 . 0 ..... ..... @qrrx_h +MLS_vi 0.10 1111 10 . ..... 0100 . 0 ..... ..... @qrrx_s + +SQDMULH_vi 0.00 1111 01 .. .... 1100 . 0 ..... ..... @qrrx_h +SQDMULH_vi 0.00 1111 10 . ..... 1100 . 0 ..... ..... @qrrx_s + +SQRDMULH_vi 0.00 1111 01 .. .... 1101 . 0 ..... ..... @qrrx_h +SQRDMULH_vi 0.00 1111 10 . ..... 1101 . 0 ..... ..... @qrrx_s + +# Floating-point conditional select + +FCSEL 0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd + +# Floating-point data-processing (3 source) + +@rrrr_hsd .... .... .. . rm:5 . ra:5 rn:5 rd:5 &rrrr_e esz=%esz_hsd + +FMADD 0001 1111 .. 0 ..... 0 ..... ..... ..... @rrrr_hsd +FMSUB 0001 1111 .. 0 ..... 1 ..... ..... ..... @rrrr_hsd +FNMADD 0001 1111 .. 1 ..... 0 ..... ..... ..... @rrrr_hsd +FNMSUB 0001 1111 .. 1 ..... 1 ..... ..... ..... @rrrr_hsd diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index da41a44f75..0899251eef 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -1168,6 +1168,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = cpu->isar.id_aa64isar2; t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ + t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ cpu->isar.id_aa64isar2 = t; t = cpu->isar.id_aa64pfr0; diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c index 22c9d17dce..56a1dc1f75 100644 --- a/target/arm/tcg/gengvec.c +++ b/target/arm/tcg/gengvec.c @@ -29,11 +29,32 @@ static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, { TCGv_ptr qc_ptr = tcg_temp_new_ptr(); + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, opr_sz, max_sz, 0, fn); } +void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_neon_sqdmulh_h, gen_helper_neon_sqdmulh_s + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + +void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_neon_sqrdmulh_h, gen_helper_neon_sqrdmulh_s + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { @@ -933,21 +954,17 @@ void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, /* CMTST : test is "if (X & Y != 0)". */ static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { - tcg_gen_and_i32(d, a, b); - tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); + tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b); } void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { - tcg_gen_and_i64(d, a, b); - tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); + tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b); } static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) { - tcg_gen_and_vec(vece, d, a, b); - tcg_gen_dupi_vec(vece, a, 0); - tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); + tcg_gen_cmp_vec(TCG_COND_TSTNE, vece, d, a, b); } void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -1217,21 +1234,113 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, +void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[] = { + gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, + gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[] = { + gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, + gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[] = { + gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, + gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, + opr_sz, max_sz, 0, fns[vece]); +} + +void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[] = { + gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, + gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, + opr_sz, max_sz, 0, fns[vece]); +} + +void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[] = { + gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h, + gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, + opr_sz, max_sz, 0, fns[vece]); +} + +void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[] = { + gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h, + gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, + opr_sz, max_sz, 0, fns[vece]); +} + +void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_add_i64(tmp, a, b); + tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_add_i64(t, a, b); + tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, + tcg_constant_i64(UINT64_MAX), t); + tcg_gen_xor_i64(t, t, res); + tcg_gen_or_i64(qc, qc, t); +} + +static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b) { TCGv_vec x = tcg_temp_new_vec_matching(t); tcg_gen_add_vec(vece, x, a, b); tcg_gen_usadd_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); } void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { static const TCGOpcode vecop_list[] = { - INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + INDEX_op_usadd_vec, INDEX_op_add_vec, 0 }; static const GVecGen4 ops[4] = { { .fniv = gen_uqadd_vec, @@ -1250,30 +1359,68 @@ void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, .opt_opc = vecop_list, .vece = MO_32 }, { .fniv = gen_uqadd_vec, + .fni8 = gen_uqadd_d, .fno = gen_helper_gvec_uqadd_d, .write_aofs = true, .opt_opc = vecop_list, .vece = MO_64 }, }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, +void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); + int64_t min = -1ll - max; + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_add_i64(tmp, a, b); + tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); + tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_add_i64(t0, a, b); + + /* Compute signed overflow indication into T1 */ + tcg_gen_xor_i64(t1, a, b); + tcg_gen_xor_i64(t2, t0, a); + tcg_gen_andc_i64(t1, t2, t1); + + /* Compute saturated value into T2 */ + tcg_gen_sari_i64(t2, a, 63); + tcg_gen_xori_i64(t2, t2, INT64_MAX); + + tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); + tcg_gen_xor_i64(t0, t0, res); + tcg_gen_or_i64(qc, qc, t0); +} + +static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b) { TCGv_vec x = tcg_temp_new_vec_matching(t); tcg_gen_add_vec(vece, x, a, b); tcg_gen_ssadd_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); } void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { static const TCGOpcode vecop_list[] = { - INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 }; static const GVecGen4 ops[4] = { { .fniv = gen_sqadd_vec, @@ -1292,30 +1439,53 @@ void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_sqadd_vec, + .fni8 = gen_sqadd_d, .fno = gen_helper_gvec_sqadd_d, .opt_opc = vecop_list, .write_aofs = true, .vece = MO_64 }, }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, +void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_sub_i64(tmp, a, b); + tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t, a, b); + tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); + tcg_gen_xor_i64(t, t, res); + tcg_gen_or_i64(qc, qc, t); +} + +static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b) { TCGv_vec x = tcg_temp_new_vec_matching(t); tcg_gen_sub_vec(vece, x, a, b); tcg_gen_ussub_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); } void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { static const TCGOpcode vecop_list[] = { - INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 }; static const GVecGen4 ops[4] = { { .fniv = gen_uqsub_vec, @@ -1334,30 +1504,68 @@ void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_uqsub_vec, + .fni8 = gen_uqsub_d, .fno = gen_helper_gvec_uqsub_d, .opt_opc = vecop_list, .write_aofs = true, .vece = MO_64 }, }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, +void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); + int64_t min = -1ll - max; + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_sub_i64(tmp, a, b); + tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); + tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t0, a, b); + + /* Compute signed overflow indication into T1 */ + tcg_gen_xor_i64(t1, a, b); + tcg_gen_xor_i64(t2, t0, a); + tcg_gen_and_i64(t1, t1, t2); + + /* Compute saturated value into T2 */ + tcg_gen_sari_i64(t2, a, 63); + tcg_gen_xori_i64(t2, t2, INT64_MAX); + + tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); + tcg_gen_xor_i64(t0, t0, res); + tcg_gen_or_i64(qc, qc, t0); +} + +static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b) { TCGv_vec x = tcg_temp_new_vec_matching(t); tcg_gen_sub_vec(vece, x, a, b); tcg_gen_sssub_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); } void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { static const TCGOpcode vecop_list[] = { - INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 }; static const GVecGen4 ops[4] = { { .fniv = gen_sqsub_vec, @@ -1376,11 +1584,14 @@ void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_sqsub_vec, + .fni8 = gen_sqsub_d, .fno = gen_helper_gvec_sqsub_d, .opt_opc = vecop_list, .write_aofs = true, .vece = MO_64 }, }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } @@ -1670,3 +1881,435 @@ void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, tcg_debug_assert(vece <= MO_32); tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); } + +static void gen_shadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_and_i64(t, a, b); + tcg_gen_vec_sar8i_i64(a, a, 1); + tcg_gen_vec_sar8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_add8_i64(d, a, b); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_shadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_and_i64(t, a, b); + tcg_gen_vec_sar16i_i64(a, a, 1); + tcg_gen_vec_sar16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_add16_i64(d, a, b); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_shadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_and_i32(t, a, b); + tcg_gen_sari_i32(a, a, 1); + tcg_gen_sari_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_add_i32(d, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_shadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_and_vec(vece, t, a, b); + tcg_gen_sari_vec(vece, a, a, 1); + tcg_gen_sari_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_add_vec(vece, d, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 g[] = { + { .fni8 = gen_shadd8_i64, + .fniv = gen_shadd_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shadd16_i64, + .fniv = gen_shadd_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shadd_i32, + .fniv = gen_shadd_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_uhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_and_i64(t, a, b); + tcg_gen_vec_shr8i_i64(a, a, 1); + tcg_gen_vec_shr8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_add8_i64(d, a, b); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_uhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_and_i64(t, a, b); + tcg_gen_vec_shr16i_i64(a, a, 1); + tcg_gen_vec_shr16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_add16_i64(d, a, b); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_uhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_and_i32(t, a, b); + tcg_gen_shri_i32(a, a, 1); + tcg_gen_shri_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_add_i32(d, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_uhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_and_vec(vece, t, a, b); + tcg_gen_shri_vec(vece, a, a, 1); + tcg_gen_shri_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_add_vec(vece, d, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 g[] = { + { .fni8 = gen_uhadd8_i64, + .fniv = gen_uhadd_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_uhadd16_i64, + .fniv = gen_uhadd_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_uhadd_i32, + .fniv = gen_uhadd_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_shsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andc_i64(t, b, a); + tcg_gen_vec_sar8i_i64(a, a, 1); + tcg_gen_vec_sar8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_sub8_i64(d, a, b); + tcg_gen_vec_sub8_i64(d, d, t); +} + +static void gen_shsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andc_i64(t, b, a); + tcg_gen_vec_sar16i_i64(a, a, 1); + tcg_gen_vec_sar16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_sub16_i64(d, a, b); + tcg_gen_vec_sub16_i64(d, d, t); +} + +static void gen_shsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_andc_i32(t, b, a); + tcg_gen_sari_i32(a, a, 1); + tcg_gen_sari_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_sub_i32(d, a, b); + tcg_gen_sub_i32(d, d, t); +} + +static void gen_shsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_andc_vec(vece, t, b, a); + tcg_gen_sari_vec(vece, a, a, 1); + tcg_gen_sari_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_sub_vec(vece, d, a, b); + tcg_gen_sub_vec(vece, d, d, t); +} + +void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 g[4] = { + { .fni8 = gen_shsub8_i64, + .fniv = gen_shsub_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shsub16_i64, + .fniv = gen_shsub_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shsub_i32, + .fniv = gen_shsub_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_uhsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andc_i64(t, b, a); + tcg_gen_vec_shr8i_i64(a, a, 1); + tcg_gen_vec_shr8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_sub8_i64(d, a, b); + tcg_gen_vec_sub8_i64(d, d, t); +} + +static void gen_uhsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andc_i64(t, b, a); + tcg_gen_vec_shr16i_i64(a, a, 1); + tcg_gen_vec_shr16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_sub16_i64(d, a, b); + tcg_gen_vec_sub16_i64(d, d, t); +} + +static void gen_uhsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_andc_i32(t, b, a); + tcg_gen_shri_i32(a, a, 1); + tcg_gen_shri_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_sub_i32(d, a, b); + tcg_gen_sub_i32(d, d, t); +} + +static void gen_uhsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_andc_vec(vece, t, b, a); + tcg_gen_shri_vec(vece, a, a, 1); + tcg_gen_shri_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_sub_vec(vece, d, a, b); + tcg_gen_sub_vec(vece, d, d, t); +} + +void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 g[4] = { + { .fni8 = gen_uhsub8_i64, + .fniv = gen_uhsub_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_uhsub16_i64, + .fniv = gen_uhsub_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_uhsub_i32, + .fniv = gen_uhsub_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_srhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_or_i64(t, a, b); + tcg_gen_vec_sar8i_i64(a, a, 1); + tcg_gen_vec_sar8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_add8_i64(d, a, b); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_srhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_or_i64(t, a, b); + tcg_gen_vec_sar16i_i64(a, a, 1); + tcg_gen_vec_sar16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_add16_i64(d, a, b); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_srhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_or_i32(t, a, b); + tcg_gen_sari_i32(a, a, 1); + tcg_gen_sari_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_add_i32(d, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_srhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_or_vec(vece, t, a, b); + tcg_gen_sari_vec(vece, a, a, 1); + tcg_gen_sari_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_add_vec(vece, d, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 g[] = { + { .fni8 = gen_srhadd8_i64, + .fniv = gen_srhadd_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_srhadd16_i64, + .fniv = gen_srhadd_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_srhadd_i32, + .fniv = gen_srhadd_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_urhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_or_i64(t, a, b); + tcg_gen_vec_shr8i_i64(a, a, 1); + tcg_gen_vec_shr8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_add8_i64(d, a, b); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_urhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_or_i64(t, a, b); + tcg_gen_vec_shr16i_i64(a, a, 1); + tcg_gen_vec_shr16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_add16_i64(d, a, b); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_urhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_or_i32(t, a, b); + tcg_gen_shri_i32(a, a, 1); + tcg_gen_shri_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_add_i32(d, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_urhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_or_vec(vece, t, a, b); + tcg_gen_shri_vec(vece, a, a, 1); + tcg_gen_shri_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_add_vec(vece, d, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 g[] = { + { .fni8 = gen_urhadd8_i64, + .fniv = gen_urhadd_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_urhadd16_i64, + .fniv = gen_urhadd_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_urhadd_i32, + .fniv = gen_urhadd_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} diff --git a/target/arm/tcg/gengvec64.c b/target/arm/tcg/gengvec64.c index 093b498b13..2617cde0a5 100644 --- a/target/arm/tcg/gengvec64.c +++ b/target/arm/tcg/gengvec64.c @@ -188,3 +188,184 @@ void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); } +/* + * Set @res to the correctly saturated result. + * Set @qc non-zero if saturation occured. + */ +void gen_suqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + TCGv_i64 max = tcg_constant_i64((1ull << ((8 << esz) - 1)) - 1); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_add_i64(t, a, b); + tcg_gen_smin_i64(res, t, max); + tcg_gen_xor_i64(t, t, res); + tcg_gen_or_i64(qc, qc, t); +} + +void gen_suqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 max = tcg_constant_i64(INT64_MAX); + TCGv_i64 t = tcg_temp_new_i64(); + + /* Maximum value that can be added to @a without overflow. */ + tcg_gen_sub_i64(t, max, a); + + /* Constrain addend so that the next addition never overflows. */ + tcg_gen_umin_i64(t, t, b); + tcg_gen_add_i64(res, a, t); + + tcg_gen_xor_i64(t, t, b); + tcg_gen_or_i64(qc, qc, t); +} + +static void gen_suqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec max = + tcg_constant_vec_matching(t, vece, (1ull << ((8 << vece) - 1)) - 1); + TCGv_vec u = tcg_temp_new_vec_matching(t); + + /* Maximum value that can be added to @a without overflow. */ + tcg_gen_sub_vec(vece, u, max, a); + + /* Constrain addend so that the next addition never overflows. */ + tcg_gen_umin_vec(vece, u, u, b); + tcg_gen_add_vec(vece, t, u, a); + + /* Compute QC by comparing the adjusted @b. */ + tcg_gen_xor_vec(vece, u, u, b); + tcg_gen_or_vec(vece, qc, qc, u); +} + +void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_add_vec, INDEX_op_sub_vec, INDEX_op_umin_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_suqadd_vec, + .fno = gen_helper_gvec_suqadd_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_suqadd_vec, + .fno = gen_helper_gvec_suqadd_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_suqadd_vec, + .fno = gen_helper_gvec_suqadd_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_suqadd_vec, + .fni8 = gen_suqadd_d, + .fno = gen_helper_gvec_suqadd_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_usqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + TCGv_i64 max = tcg_constant_i64(MAKE_64BIT_MASK(0, 8 << esz)); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_add_i64(tmp, a, b); + tcg_gen_smin_i64(res, tmp, max); + tcg_gen_smax_i64(res, res, zero); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_usqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 tneg = tcg_temp_new_i64(); + TCGv_i64 tpos = tcg_temp_new_i64(); + TCGv_i64 max = tcg_constant_i64(UINT64_MAX); + TCGv_i64 zero = tcg_constant_i64(0); + + tcg_gen_add_i64(tmp, a, b); + + /* If @b is positive, saturate if (a + b) < a, aka unsigned overflow. */ + tcg_gen_movcond_i64(TCG_COND_LTU, tpos, tmp, a, max, tmp); + + /* If @b is negative, saturate if a < -b, ie subtraction is negative. */ + tcg_gen_neg_i64(tneg, b); + tcg_gen_movcond_i64(TCG_COND_LTU, tneg, a, tneg, zero, tmp); + + /* Select correct result from sign of @b. */ + tcg_gen_movcond_i64(TCG_COND_LT, res, b, zero, tneg, tpos); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +static void gen_usqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec u = tcg_temp_new_vec_matching(t); + TCGv_vec z = tcg_constant_vec_matching(t, vece, 0); + + /* Compute unsigned saturation of add for +b and sub for -b. */ + tcg_gen_neg_vec(vece, t, b); + tcg_gen_usadd_vec(vece, u, a, b); + tcg_gen_ussub_vec(vece, t, a, t); + + /* Select the correct result depending on the sign of b. */ + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, b, z, t, u); + + /* Compute QC by comparing against the non-saturated result. */ + tcg_gen_add_vec(vece, u, a, b); + tcg_gen_xor_vec(vece, u, u, t); + tcg_gen_or_vec(vece, qc, qc, u); +} + +void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_add_vec, + INDEX_op_usadd_vec, INDEX_op_ussub_vec, + INDEX_op_cmpsel_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_usqadd_vec, + .fno = gen_helper_gvec_usqadd_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_usqadd_vec, + .fno = gen_helper_gvec_usqadd_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_usqadd_vec, + .fno = gen_helper_gvec_usqadd_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_usqadd_vec, + .fni8 = gen_usqadd_d, + .fno = gen_helper_gvec_usqadd_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} diff --git a/target/arm/tcg/neon-dp.decode b/target/arm/tcg/neon-dp.decode index fd3a01bfa0..788578c8fa 100644 --- a/target/arm/tcg/neon-dp.decode +++ b/target/arm/tcg/neon-dp.decode @@ -102,37 +102,12 @@ VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev - -# Insns operating on 64-bit elements (size!=0b11 handled elsewhere) -# The _rev suffix indicates that Vn and Vm are reversed (as explained -# by the comment for the @3same_rev format). -@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \ - &3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3 - -{ - VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev - VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev -} -{ - VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev - VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev -} -{ - VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev - VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev -} -{ - VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev - VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev -} -{ - VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev - VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev -} -{ - VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev - VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev -} +VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev +VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev +VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev +VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev +VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev +VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c index a0b51c8809..082bfd88ad 100644 --- a/target/arm/tcg/neon_helper.c +++ b/target/arm/tcg/neon_helper.c @@ -6,10 +6,11 @@ * * This code is licensed under the GNU GPL v2. */ -#include "qemu/osdep.h" +#include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" #include "vec_internal.h" @@ -117,6 +118,29 @@ NEON_VOP_BODY(vtype, n) uint32_t HELPER(glue(neon_,name))(CPUARMState *env, uint32_t arg1, uint32_t arg2) \ NEON_VOP_BODY(vtype, n) +#define NEON_GVEC_VOP2(name, vtype) \ +void HELPER(name)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + vtype *d = vd, *n = vn, *m = vm; \ + for (i = 0; i < opr_sz / sizeof(vtype); i++) { \ + NEON_FN(d[i], n[i], m[i]); \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +#define NEON_GVEC_VOP2_ENV(name, vtype) \ +void HELPER(name)(void *vd, void *vn, void *vm, void *venv, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + vtype *d = vd, *n = vn, *m = vm; \ + CPUARMState *env = venv; \ + for (i = 0; i < opr_sz / sizeof(vtype); i++) { \ + NEON_FN(d[i], n[i], m[i]); \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + /* Pairwise operations. */ /* For 32-bit elements each segment only contains a single element, so the elementwise and pairwise operations are the same. */ @@ -155,414 +179,6 @@ uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ return arg; \ } - -#define NEON_USAT(dest, src1, src2, type) do { \ - uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ - if (tmp != (type)tmp) { \ - SET_QC(); \ - dest = ~0; \ - } else { \ - dest = tmp; \ - }} while(0) -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) -NEON_VOP_ENV(qadd_u8, neon_u8, 4) -#undef NEON_FN -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) -NEON_VOP_ENV(qadd_u16, neon_u16, 2) -#undef NEON_FN -#undef NEON_USAT - -uint32_t HELPER(neon_qadd_u32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - uint32_t res = a + b; - if (res < a) { - SET_QC(); - res = ~0; - } - return res; -} - -uint64_t HELPER(neon_qadd_u64)(CPUARMState *env, uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 + src2; - if (res < src1) { - SET_QC(); - res = ~(uint64_t)0; - } - return res; -} - -#define NEON_SSAT(dest, src1, src2, type) do { \ - int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ - if (tmp != (type)tmp) { \ - SET_QC(); \ - if (src2 > 0) { \ - tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ - } else { \ - tmp = 1 << (sizeof(type) * 8 - 1); \ - } \ - } \ - dest = tmp; \ - } while(0) -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) -NEON_VOP_ENV(qadd_s8, neon_s8, 4) -#undef NEON_FN -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) -NEON_VOP_ENV(qadd_s16, neon_s16, 2) -#undef NEON_FN -#undef NEON_SSAT - -uint32_t HELPER(neon_qadd_s32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - uint32_t res = a + b; - if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) { - SET_QC(); - res = ~(((int32_t)a >> 31) ^ SIGNBIT); - } - return res; -} - -uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 + src2; - if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { - SET_QC(); - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; - } - return res; -} - -/* Unsigned saturating accumulate of signed value - * - * Op1/Rn is treated as signed - * Op2/Rd is treated as unsigned - * - * Explicit casting is used to ensure the correct sign extension of - * inputs. The result is treated as a unsigned value and saturated as such. - * - * We use a macro for the 8/16 bit cases which expects signed integers of va, - * vb, and vr for interim calculation and an unsigned 32 bit result value r. - */ - -#define USATACC(bits, shift) \ - do { \ - va = sextract32(a, shift, bits); \ - vb = extract32(b, shift, bits); \ - vr = va + vb; \ - if (vr > UINT##bits##_MAX) { \ - SET_QC(); \ - vr = UINT##bits##_MAX; \ - } else if (vr < 0) { \ - SET_QC(); \ - vr = 0; \ - } \ - r = deposit32(r, shift, bits, vr); \ - } while (0) - -uint32_t HELPER(neon_uqadd_s8)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int16_t va, vb, vr; - uint32_t r = 0; - - USATACC(8, 0); - USATACC(8, 8); - USATACC(8, 16); - USATACC(8, 24); - return r; -} - -uint32_t HELPER(neon_uqadd_s16)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int32_t va, vb, vr; - uint64_t r = 0; - - USATACC(16, 0); - USATACC(16, 16); - return r; -} - -#undef USATACC - -uint32_t HELPER(neon_uqadd_s32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int64_t va = (int32_t)a; - int64_t vb = (uint32_t)b; - int64_t vr = va + vb; - if (vr > UINT32_MAX) { - SET_QC(); - vr = UINT32_MAX; - } else if (vr < 0) { - SET_QC(); - vr = 0; - } - return vr; -} - -uint64_t HELPER(neon_uqadd_s64)(CPUARMState *env, uint64_t a, uint64_t b) -{ - uint64_t res; - res = a + b; - /* We only need to look at the pattern of SIGN bits to detect - * +ve/-ve saturation - */ - if (~a & b & ~res & SIGNBIT64) { - SET_QC(); - res = UINT64_MAX; - } else if (a & ~b & res & SIGNBIT64) { - SET_QC(); - res = 0; - } - return res; -} - -/* Signed saturating accumulate of unsigned value - * - * Op1/Rn is treated as unsigned - * Op2/Rd is treated as signed - * - * The result is treated as a signed value and saturated as such - * - * We use a macro for the 8/16 bit cases which expects signed integers of va, - * vb, and vr for interim calculation and an unsigned 32 bit result value r. - */ - -#define SSATACC(bits, shift) \ - do { \ - va = extract32(a, shift, bits); \ - vb = sextract32(b, shift, bits); \ - vr = va + vb; \ - if (vr > INT##bits##_MAX) { \ - SET_QC(); \ - vr = INT##bits##_MAX; \ - } else if (vr < INT##bits##_MIN) { \ - SET_QC(); \ - vr = INT##bits##_MIN; \ - } \ - r = deposit32(r, shift, bits, vr); \ - } while (0) - -uint32_t HELPER(neon_sqadd_u8)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int16_t va, vb, vr; - uint32_t r = 0; - - SSATACC(8, 0); - SSATACC(8, 8); - SSATACC(8, 16); - SSATACC(8, 24); - return r; -} - -uint32_t HELPER(neon_sqadd_u16)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int32_t va, vb, vr; - uint32_t r = 0; - - SSATACC(16, 0); - SSATACC(16, 16); - - return r; -} - -#undef SSATACC - -uint32_t HELPER(neon_sqadd_u32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int64_t res; - int64_t op1 = (uint32_t)a; - int64_t op2 = (int32_t)b; - res = op1 + op2; - if (res > INT32_MAX) { - SET_QC(); - res = INT32_MAX; - } else if (res < INT32_MIN) { - SET_QC(); - res = INT32_MIN; - } - return res; -} - -uint64_t HELPER(neon_sqadd_u64)(CPUARMState *env, uint64_t a, uint64_t b) -{ - uint64_t res; - res = a + b; - /* We only need to look at the pattern of SIGN bits to detect an overflow */ - if (((a & res) - | (~b & res) - | (a & ~b)) & SIGNBIT64) { - SET_QC(); - res = INT64_MAX; - } - return res; -} - - -#define NEON_USAT(dest, src1, src2, type) do { \ - uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ - if (tmp != (type)tmp) { \ - SET_QC(); \ - dest = 0; \ - } else { \ - dest = tmp; \ - }} while(0) -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) -NEON_VOP_ENV(qsub_u8, neon_u8, 4) -#undef NEON_FN -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) -NEON_VOP_ENV(qsub_u16, neon_u16, 2) -#undef NEON_FN -#undef NEON_USAT - -uint32_t HELPER(neon_qsub_u32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - uint32_t res = a - b; - if (res > a) { - SET_QC(); - res = 0; - } - return res; -} - -uint64_t HELPER(neon_qsub_u64)(CPUARMState *env, uint64_t src1, uint64_t src2) -{ - uint64_t res; - - if (src1 < src2) { - SET_QC(); - res = 0; - } else { - res = src1 - src2; - } - return res; -} - -#define NEON_SSAT(dest, src1, src2, type) do { \ - int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ - if (tmp != (type)tmp) { \ - SET_QC(); \ - if (src2 < 0) { \ - tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ - } else { \ - tmp = 1 << (sizeof(type) * 8 - 1); \ - } \ - } \ - dest = tmp; \ - } while(0) -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) -NEON_VOP_ENV(qsub_s8, neon_s8, 4) -#undef NEON_FN -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) -NEON_VOP_ENV(qsub_s16, neon_s16, 2) -#undef NEON_FN -#undef NEON_SSAT - -uint32_t HELPER(neon_qsub_s32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - uint32_t res = a - b; - if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) { - SET_QC(); - res = ~(((int32_t)a >> 31) ^ SIGNBIT); - } - return res; -} - -uint64_t HELPER(neon_qsub_s64)(CPUARMState *env, uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 - src2; - if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { - SET_QC(); - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; - } - return res; -} - -#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 -NEON_VOP(hadd_s8, neon_s8, 4) -NEON_VOP(hadd_u8, neon_u8, 4) -NEON_VOP(hadd_s16, neon_s16, 2) -NEON_VOP(hadd_u16, neon_u16, 2) -#undef NEON_FN - -int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) -{ - int32_t dest; - - dest = (src1 >> 1) + (src2 >> 1); - if (src1 & src2 & 1) - dest++; - return dest; -} - -uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) -{ - uint32_t dest; - - dest = (src1 >> 1) + (src2 >> 1); - if (src1 & src2 & 1) - dest++; - return dest; -} - -#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 -NEON_VOP(rhadd_s8, neon_s8, 4) -NEON_VOP(rhadd_u8, neon_u8, 4) -NEON_VOP(rhadd_s16, neon_s16, 2) -NEON_VOP(rhadd_u16, neon_u16, 2) -#undef NEON_FN - -int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) -{ - int32_t dest; - - dest = (src1 >> 1) + (src2 >> 1); - if ((src1 | src2) & 1) - dest++; - return dest; -} - -uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) -{ - uint32_t dest; - - dest = (src1 >> 1) + (src2 >> 1); - if ((src1 | src2) & 1) - dest++; - return dest; -} - -#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 -NEON_VOP(hsub_s8, neon_s8, 4) -NEON_VOP(hsub_u8, neon_u8, 4) -NEON_VOP(hsub_s16, neon_s16, 2) -NEON_VOP(hsub_u16, neon_u16, 2) -#undef NEON_FN - -int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) -{ - int32_t dest; - - dest = (src1 >> 1) - (src2 >> 1); - if ((~src1) & src2 & 1) - dest--; - return dest; -} - -uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) -{ - uint32_t dest; - - dest = (src1 >> 1) - (src2 >> 1); - if ((~src1) & src2 & 1) - dest--; - return dest; -} - #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 NEON_POP(pmin_s8, neon_s8, 4) NEON_POP(pmin_u8, neon_u8, 4) @@ -590,11 +206,23 @@ NEON_VOP(shl_s16, neon_s16, 2) #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, NULL)) NEON_VOP(rshl_s8, neon_s8, 4) +NEON_GVEC_VOP2(gvec_srshl_b, int8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, NULL)) NEON_VOP(rshl_s16, neon_s16, 2) +NEON_GVEC_VOP2(gvec_srshl_h, int16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, true, NULL)) +NEON_GVEC_VOP2(gvec_srshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_d(src1, (int8_t)src2, true, NULL)) +NEON_GVEC_VOP2(gvec_srshl_d, int64_t) #undef NEON_FN uint32_t HELPER(neon_rshl_s32)(uint32_t val, uint32_t shift) @@ -610,11 +238,23 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, NULL)) NEON_VOP(rshl_u8, neon_u8, 4) +NEON_GVEC_VOP2(gvec_urshl_b, uint8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, NULL)) NEON_VOP(rshl_u16, neon_u16, 2) +NEON_GVEC_VOP2(gvec_urshl_h, uint16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, true, NULL)) +NEON_GVEC_VOP2(gvec_urshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_d(src1, (int8_t)src2, true, NULL)) +NEON_GVEC_VOP2(gvec_urshl_d, int64_t) #undef NEON_FN uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shift) @@ -630,11 +270,23 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) NEON_VOP_ENV(qshl_u8, neon_u8, 4) +NEON_GVEC_VOP2_ENV(neon_uqshl_b, uint8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) NEON_VOP_ENV(qshl_u16, neon_u16, 2) +NEON_GVEC_VOP2_ENV(neon_uqshl_h, uint16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_uqshl_s, uint32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_d(src1, (int8_t)src2, false, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_uqshl_d, uint64_t) #undef NEON_FN uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -650,11 +302,23 @@ uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) NEON_VOP_ENV(qshl_s8, neon_s8, 4) +NEON_GVEC_VOP2_ENV(neon_sqshl_b, int8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) NEON_VOP_ENV(qshl_s16, neon_s16, 2) +NEON_GVEC_VOP2_ENV(neon_sqshl_h, int16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_sqshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_d(src1, (int8_t)src2, false, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_sqshl_d, int64_t) #undef NEON_FN uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -690,11 +354,23 @@ uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_u8, neon_u8, 4) +NEON_GVEC_VOP2_ENV(neon_uqrshl_b, uint8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_u16, neon_u16, 2) +NEON_GVEC_VOP2_ENV(neon_uqrshl_h, uint16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, true, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_uqrshl_s, uint32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_d(src1, (int8_t)src2, true, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_uqrshl_d, uint64_t) #undef NEON_FN uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -710,11 +386,23 @@ uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_s8, neon_s8, 4) +NEON_GVEC_VOP2_ENV(neon_sqrshl_b, int8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_s16, neon_s16, 2) +NEON_GVEC_VOP2_ENV(neon_sqrshl_h, int16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, true, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_sqrshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_d(src1, (int8_t)src2, true, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_sqrshl_d, int64_t) #undef NEON_FN uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift) diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index c199b69fbf..c083e5cfb8 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -409,6 +409,60 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len) #endif } +void HELPER(wfit)(CPUARMState *env, uint64_t timeout) +{ +#ifdef CONFIG_USER_ONLY + /* + * WFI in the user-mode emulator is technically permitted but not + * something any real-world code would do. AArch64 Linux kernels + * trap it via SCTRL_EL1.nTWI and make it an (expensive) NOP; + * AArch32 kernels don't trap it so it will delay a bit. + * For QEMU, make it NOP here, because trying to raise EXCP_HLT + * would trigger an abort. + */ + return; +#else + ARMCPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + int target_el = check_wfx_trap(env, false); + /* The WFIT should time out when CNTVCT_EL0 >= the specified value. */ + uint64_t cntval = gt_get_countervalue(env); + uint64_t offset = gt_virt_cnt_offset(env); + uint64_t cntvct = cntval - offset; + uint64_t nexttick; + + if (cpu_has_work(cs) || cntvct >= timeout) { + /* + * Don't bother to go into our "low power state" if + * we would just wake up immediately. + */ + return; + } + + if (target_el) { + env->pc -= 4; + raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, false), + target_el); + } + + if (uadd64_overflow(timeout, offset, &nexttick)) { + nexttick = UINT64_MAX; + } + if (nexttick > INT64_MAX / gt_cntfrq_period_ns(cpu)) { + /* + * If the timeout is too long for the signed 64-bit range + * of a QEMUTimer, let it expire early. + */ + timer_mod_ns(cpu->wfxt_timer, INT64_MAX); + } else { + timer_mod(cpu->wfxt_timer, nexttick); + } + cs->exception_index = EXCP_HLT; + cs->halted = 1; + cpu_loop_exit(cs); +#endif +} + void HELPER(wfe)(CPUARMState *env) { /* This is a hint instruction that is semantically different diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 9167e4d0bd..93543da39c 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -724,19 +724,6 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, is_q ? 16 : 8, vec_full_reg_size(s), data, fn); } -/* Expand a 3-operand + qc + operation using an out-of-line helper. */ -static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, - int rm, gen_helper_gvec_3_ptr *fn) -{ - TCGv_ptr qc_ptr = tcg_temp_new_ptr(); - - tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), qc_ptr, - is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); -} - /* Expand a 4-operand operation using an out-of-line helper. */ static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, int rm, int ra, int data, gen_helper_gvec_4 *fn) @@ -1363,6 +1350,14 @@ static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) return true; } +static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) +{ + if (a->esz == MO_8) { + return false; + } + return do_gvec_fn3_no64(s, a, fn); +} + static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) { if (!a->q && a->esz == MO_64) { @@ -1750,6 +1745,47 @@ static bool trans_WFE(DisasContext *s, arg_WFI *a) return true; } +static bool trans_WFIT(DisasContext *s, arg_WFIT *a) +{ + if (!dc_isar_feature(aa64_wfxt, s)) { + return false; + } + + /* + * Because we need to pass the register value to the helper, + * it's easier to emit the code now, unlike trans_WFI which + * defers it to aarch64_tr_tb_stop(). That means we need to + * check ss_active so that single-stepping a WFIT doesn't halt. + */ + if (s->ss_active) { + /* Act like a NOP under architectural singlestep */ + return true; + } + + gen_a64_update_pc(s, 4); + gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); + /* Go back to the main loop to check for interrupts */ + s->base.is_jmp = DISAS_EXIT; + return true; +} + +static bool trans_WFET(DisasContext *s, arg_WFET *a) +{ + if (!dc_isar_feature(aa64_wfxt, s)) { + return false; + } + + /* + * We rely here on our WFE implementation being a NOP, so we + * don't need to do anything different to handle the WFET timeout + * from what trans_WFE does. + */ + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + s->base.is_jmp = DISAS_WFE; + } + return true; +} + static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) { if (s->pauth_active) { @@ -5060,6 +5096,163 @@ static const FPScalar f_scalar_frsqrts = { }; TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) +static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, + MemOp sgn_n, MemOp sgn_m, + void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), + void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) +{ + TCGv_i64 t0, t1, t2, qc; + MemOp esz = a->esz; + + if (!fp_access_check(s)) { + return true; + } + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + qc = tcg_temp_new_i64(); + read_vec_element(s, t1, a->rn, 0, esz | sgn_n); + read_vec_element(s, t2, a->rm, 0, esz | sgn_m); + tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); + + if (esz == MO_64) { + gen_d(t0, qc, t1, t2); + } else { + gen_bhs(t0, qc, t1, t2, esz); + tcg_gen_ext_i64(t0, t0, esz); + } + + write_fp_dreg(s, a->rd, t0); + tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); + return true; +} + +TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) +TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) +TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) +TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) +TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) +TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) + +static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, + void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) +{ + if (fp_access_check(s)) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element(s, t0, a->rn, 0, MO_64); + read_vec_element(s, t1, a->rm, 0, MO_64); + fn(t0, t0, t1); + write_fp_dreg(s, a->rd, t0); + } + return true; +} + +TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) +TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) +TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) +TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) +TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) +TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) + +typedef struct ENVScalar2 { + NeonGenTwoOpEnvFn *gen_bhs[3]; + NeonGenTwo64OpEnvFn *gen_d; +} ENVScalar2; + +static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) +{ + if (!fp_access_check(s)) { + return true; + } + if (a->esz == MO_64) { + TCGv_i64 t0 = read_fp_dreg(s, a->rn); + TCGv_i64 t1 = read_fp_dreg(s, a->rm); + f->gen_d(t0, tcg_env, t0, t1); + write_fp_dreg(s, a->rd, t0); + } else { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t0, a->rn, 0, a->esz); + read_vec_element_i32(s, t1, a->rm, 0, a->esz); + f->gen_bhs[a->esz](t0, tcg_env, t0, t1); + write_fp_sreg(s, a->rd, t0); + } + return true; +} + +static const ENVScalar2 f_scalar_sqshl = { + { gen_helper_neon_qshl_s8, + gen_helper_neon_qshl_s16, + gen_helper_neon_qshl_s32 }, + gen_helper_neon_qshl_s64, +}; +TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) + +static const ENVScalar2 f_scalar_uqshl = { + { gen_helper_neon_qshl_u8, + gen_helper_neon_qshl_u16, + gen_helper_neon_qshl_u32 }, + gen_helper_neon_qshl_u64, +}; +TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) + +static const ENVScalar2 f_scalar_sqrshl = { + { gen_helper_neon_qrshl_s8, + gen_helper_neon_qrshl_s16, + gen_helper_neon_qrshl_s32 }, + gen_helper_neon_qrshl_s64, +}; +TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) + +static const ENVScalar2 f_scalar_uqrshl = { + { gen_helper_neon_qrshl_u8, + gen_helper_neon_qrshl_u16, + gen_helper_neon_qrshl_u32 }, + gen_helper_neon_qrshl_u64, +}; +TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) + +static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, + const ENVScalar2 *f) +{ + if (a->esz == MO_16 || a->esz == MO_32) { + return do_env_scalar2(s, a, f); + } + return false; +} + +static const ENVScalar2 f_scalar_sqdmulh = { + { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } +}; +TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) + +static const ENVScalar2 f_scalar_sqrdmulh = { + { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } +}; +TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) + +static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) +{ + if (fp_access_check(s)) { + TCGv_i64 t0 = read_fp_dreg(s, a->rn); + TCGv_i64 t1 = read_fp_dreg(s, a->rm); + tcg_gen_negsetcond_i64(cond, t0, t0, t1); + write_fp_dreg(s, a->rd, t0); + } + return true; +} + +TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) +TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) +TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) +TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) +TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) +TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) + static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3_ptr * const fns[3]) { @@ -5298,6 +5491,68 @@ TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) +TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) +TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) +TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) +TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) +TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) +TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) + +TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) +TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) +TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) +TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) +TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) +TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) +TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) +TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) + +TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) +TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) +TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) +TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) +TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) +TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) +TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) +TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) +TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) +TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) +TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) +TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) +TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) +TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) +TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) +TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) +TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) +TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) +TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) +TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) + +static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) +{ + if (a->esz == MO_64 && !a->q) { + return false; + } + if (fp_access_check(s)) { + tcg_gen_gvec_cmp(cond, a->esz, + vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + a->q ? 16 : 8, vec_full_reg_size(s)); + } + return true; +} + +TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) +TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) +TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) +TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) +TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) +TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) + +TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) +TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) + /* * Advanced SIMD scalar/vector x indexed element */ @@ -5405,6 +5660,27 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) TRANS(FMLA_si, do_fmla_scalar_idx, a, false) TRANS(FMLS_si, do_fmla_scalar_idx, a, true) +static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, + const ENVScalar2 *f) +{ + if (a->esz < MO_16 || a->esz > MO_32) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t0, a->rn, 0, a->esz); + read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); + f->gen_bhs[a->esz](t0, tcg_env, t0, t1); + write_fp_sreg(s, a->rd, t0); + } + return true; +} + +TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) +TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) + static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, gen_helper_gvec_3_ptr * const fns[3]) { @@ -5501,6 +5777,67 @@ TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) +static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, + gen_helper_gvec_3 * const fns[2]) +{ + assert(a->esz == MO_16 || a->esz == MO_32); + if (fp_access_check(s)) { + gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); + } + return true; +} + +static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { + gen_helper_gvec_mul_idx_h, + gen_helper_gvec_mul_idx_s, +}; +TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) + +static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) +{ + static gen_helper_gvec_4 * const fns[2][2] = { + { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, + { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, + }; + + assert(a->esz == MO_16 || a->esz == MO_32); + if (fp_access_check(s)) { + gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, + a->idx, fns[a->esz - 1][sub]); + } + return true; +} + +TRANS(MLA_vi, do_mla_vector_idx, a, false) +TRANS(MLS_vi, do_mla_vector_idx, a, true) + +static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, + gen_helper_gvec_4 * const fns[2]) +{ + assert(a->esz == MO_16 || a->esz == MO_32); + if (fp_access_check(s)) { + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + offsetof(CPUARMState, vfp.qc), + a->q ? 16 : 8, vec_full_reg_size(s), + a->idx, fns[a->esz - 1]); + } + return true; +} + +static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { + gen_helper_neon_sqdmulh_idx_h, + gen_helper_neon_sqdmulh_idx_s, +}; +TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) + +static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { + gen_helper_neon_sqrdmulh_idx_h, + gen_helper_neon_sqrdmulh_idx_s, +}; +TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) + /* * Advanced SIMD scalar pairwise */ @@ -5570,6 +5907,132 @@ static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) return true; } +/* + * Floating-point conditional select + */ + +static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) +{ + TCGv_i64 t_true, t_false; + DisasCompare64 c; + + switch (a->esz) { + case MO_32: + case MO_64: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + break; + default: + return false; + } + + if (!fp_access_check(s)) { + return true; + } + + /* Zero extend sreg & hreg inputs to 64 bits now. */ + t_true = tcg_temp_new_i64(); + t_false = tcg_temp_new_i64(); + read_vec_element(s, t_true, a->rn, 0, a->esz); + read_vec_element(s, t_false, a->rm, 0, a->esz); + + a64_test_cc(&c, a->cond); + tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), + t_true, t_false); + + /* + * Note that sregs & hregs write back zeros to the high bits, + * and we've already done the zero-extension. + */ + write_fp_dreg(s, a->rd, t_true); + return true; +} + +/* + * Floating-point data-processing (3 source) + */ + +static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) +{ + TCGv_ptr fpst; + + /* + * These are fused multiply-add. Note that doing the negations here + * as separate steps is correct: an input NaN should come out with + * its sign bit flipped if it is a negated-input. + */ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 tn = read_fp_dreg(s, a->rn); + TCGv_i64 tm = read_fp_dreg(s, a->rm); + TCGv_i64 ta = read_fp_dreg(s, a->ra); + + if (neg_a) { + gen_vfp_negd(ta, ta); + } + if (neg_n) { + gen_vfp_negd(tn, tn); + } + fpst = fpstatus_ptr(FPST_FPCR); + gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); + write_fp_dreg(s, a->rd, ta); + } + break; + + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 tn = read_fp_sreg(s, a->rn); + TCGv_i32 tm = read_fp_sreg(s, a->rm); + TCGv_i32 ta = read_fp_sreg(s, a->ra); + + if (neg_a) { + gen_vfp_negs(ta, ta); + } + if (neg_n) { + gen_vfp_negs(tn, tn); + } + fpst = fpstatus_ptr(FPST_FPCR); + gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); + write_fp_sreg(s, a->rd, ta); + } + break; + + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 tn = read_fp_hreg(s, a->rn); + TCGv_i32 tm = read_fp_hreg(s, a->rm); + TCGv_i32 ta = read_fp_hreg(s, a->ra); + + if (neg_a) { + gen_vfp_negh(ta, ta); + } + if (neg_n) { + gen_vfp_negh(tn, tn); + } + fpst = fpstatus_ptr(FPST_FPCR_F16); + gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); + write_fp_sreg(s, a->rd, ta); + } + break; + + default: + return false; + } + return true; +} + +TRANS(FMADD, do_fmadd, a, false, false) +TRANS(FNMADD, do_fmadd, a, true, true) +TRANS(FMSUB, do_fmadd, a, false, true) +TRANS(FNMSUB, do_fmadd, a, true, false) + /* Shift a TCGv src by TCGv shift_amount, put result in dst. * Note that it is the caller's responsibility to ensure that the * shift amount is in range (ie 0..31 or 0..63) and provide the ARM @@ -6954,68 +7417,6 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) } } -/* Floating point conditional select - * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 - * +---+---+---+-----------+------+---+------+------+-----+------+------+ - * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | - * +---+---+---+-----------+------+---+------+------+-----+------+------+ - */ -static void disas_fp_csel(DisasContext *s, uint32_t insn) -{ - unsigned int mos, type, rm, cond, rn, rd; - TCGv_i64 t_true, t_false; - DisasCompare64 c; - MemOp sz; - - mos = extract32(insn, 29, 3); - type = extract32(insn, 22, 2); - rm = extract32(insn, 16, 5); - cond = extract32(insn, 12, 4); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); - - if (mos) { - unallocated_encoding(s); - return; - } - - switch (type) { - case 0: - sz = MO_32; - break; - case 1: - sz = MO_64; - break; - case 3: - sz = MO_16; - if (dc_isar_feature(aa64_fp16, s)) { - break; - } - /* fallthru */ - default: - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - /* Zero extend sreg & hreg inputs to 64 bits now. */ - t_true = tcg_temp_new_i64(); - t_false = tcg_temp_new_i64(); - read_vec_element(s, t_true, rn, 0, sz); - read_vec_element(s, t_false, rm, 0, sz); - - a64_test_cc(&c, cond); - tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), - t_true, t_false); - - /* Note that sregs & hregs write back zeros to the high bits, - and we've already done the zero-extension. */ - write_fp_dreg(s, rd, t_true); -} - /* Floating-point data-processing (1 source) - half precision */ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) { @@ -7369,152 +7770,6 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) } } -/* Floating-point data-processing (3 source) - single precision */ -static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) -{ - TCGv_i32 tcg_op1, tcg_op2, tcg_op3; - TCGv_i32 tcg_res = tcg_temp_new_i32(); - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - - tcg_op1 = read_fp_sreg(s, rn); - tcg_op2 = read_fp_sreg(s, rm); - tcg_op3 = read_fp_sreg(s, ra); - - /* These are fused multiply-add, and must be done as one - * floating point operation with no rounding between the - * multiplication and addition steps. - * NB that doing the negations here as separate steps is - * correct : an input NaN should come out with its sign bit - * flipped if it is a negated-input. - */ - if (o1 == true) { - gen_vfp_negs(tcg_op3, tcg_op3); - } - - if (o0 != o1) { - gen_vfp_negs(tcg_op1, tcg_op1); - } - - gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); - - write_fp_sreg(s, rd, tcg_res); -} - -/* Floating-point data-processing (3 source) - double precision */ -static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) -{ - TCGv_i64 tcg_op1, tcg_op2, tcg_op3; - TCGv_i64 tcg_res = tcg_temp_new_i64(); - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - - tcg_op1 = read_fp_dreg(s, rn); - tcg_op2 = read_fp_dreg(s, rm); - tcg_op3 = read_fp_dreg(s, ra); - - /* These are fused multiply-add, and must be done as one - * floating point operation with no rounding between the - * multiplication and addition steps. - * NB that doing the negations here as separate steps is - * correct : an input NaN should come out with its sign bit - * flipped if it is a negated-input. - */ - if (o1 == true) { - gen_vfp_negd(tcg_op3, tcg_op3); - } - - if (o0 != o1) { - gen_vfp_negd(tcg_op1, tcg_op1); - } - - gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); - - write_fp_dreg(s, rd, tcg_res); -} - -/* Floating-point data-processing (3 source) - half precision */ -static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) -{ - TCGv_i32 tcg_op1, tcg_op2, tcg_op3; - TCGv_i32 tcg_res = tcg_temp_new_i32(); - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); - - tcg_op1 = read_fp_hreg(s, rn); - tcg_op2 = read_fp_hreg(s, rm); - tcg_op3 = read_fp_hreg(s, ra); - - /* These are fused multiply-add, and must be done as one - * floating point operation with no rounding between the - * multiplication and addition steps. - * NB that doing the negations here as separate steps is - * correct : an input NaN should come out with its sign bit - * flipped if it is a negated-input. - */ - if (o1 == true) { - tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); - } - - if (o0 != o1) { - tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); - } - - gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); - - write_fp_sreg(s, rd, tcg_res); -} - -/* Floating point data-processing (3 source) - * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 - * +---+---+---+-----------+------+----+------+----+------+------+------+ - * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | - * +---+---+---+-----------+------+----+------+----+------+------+------+ - */ -static void disas_fp_3src(DisasContext *s, uint32_t insn) -{ - int mos = extract32(insn, 29, 3); - int type = extract32(insn, 22, 2); - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int ra = extract32(insn, 10, 5); - int rm = extract32(insn, 16, 5); - bool o0 = extract32(insn, 15, 1); - bool o1 = extract32(insn, 21, 1); - - if (mos) { - unallocated_encoding(s); - return; - } - - switch (type) { - case 0: - if (!fp_access_check(s)) { - return; - } - handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); - break; - case 1: - if (!fp_access_check(s)) { - return; - } - handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); - break; - case 3: - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); - break; - default: - unallocated_encoding(s); - } -} - /* Floating point immediate * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 * +---+---+---+-----------+------+---+------------+-------+------+------+ @@ -7959,8 +8214,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) static void disas_data_proc_fp(DisasContext *s, uint32_t insn) { if (extract32(insn, 24, 1)) { - /* Floating point data-processing (3 source) */ - disas_fp_3src(s, insn); + unallocated_encoding(s); /* in decodetree */ } else if (extract32(insn, 21, 1) == 0) { /* Floating point to fixed point conversions */ disas_fp_fixed_conv(s, insn); @@ -7976,7 +8230,7 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn) break; case 3: /* Floating point conditional select */ - disas_fp_csel(s, insn); + unallocated_encoding(s); /* in decodetree */ break; case 0: switch (ctz32(extract32(insn, 12, 4))) { @@ -9282,219 +9536,6 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) } } -static void handle_3same_64(DisasContext *s, int opcode, bool u, - TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) -{ - /* Handle 64x64->64 opcodes which are shared between the scalar - * and vector 3-same groups. We cover every opcode where size == 3 - * is valid in either the three-reg-same (integer, not pairwise) - * or scalar-three-reg-same groups. - */ - TCGCond cond; - - switch (opcode) { - case 0x1: /* SQADD */ - if (u) { - gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } else { - gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } - break; - case 0x5: /* SQSUB */ - if (u) { - gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } else { - gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } - break; - case 0x6: /* CMGT, CMHI */ - cond = u ? TCG_COND_GTU : TCG_COND_GT; - do_cmop: - /* 64 bit integer comparison, result = test ? -1 : 0. */ - tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); - break; - case 0x7: /* CMGE, CMHS */ - cond = u ? TCG_COND_GEU : TCG_COND_GE; - goto do_cmop; - case 0x11: /* CMTST, CMEQ */ - if (u) { - cond = TCG_COND_EQ; - goto do_cmop; - } - gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm); - break; - case 0x8: /* SSHL, USHL */ - if (u) { - gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); - } else { - gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); - } - break; - case 0x9: /* SQSHL, UQSHL */ - if (u) { - gen_helper_neon_qshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } else { - gen_helper_neon_qshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } - break; - case 0xa: /* SRSHL, URSHL */ - if (u) { - gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); - } else { - gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); - } - break; - case 0xb: /* SQRSHL, UQRSHL */ - if (u) { - gen_helper_neon_qrshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } else { - gen_helper_neon_qrshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } - break; - case 0x10: /* ADD, SUB */ - if (u) { - tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); - } else { - tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); - } - break; - default: - g_assert_not_reached(); - } -} - -/* AdvSIMD scalar three same - * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 - * +-----+---+-----------+------+---+------+--------+---+------+------+ - * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | - * +-----+---+-----------+------+---+------+--------+---+------+------+ - */ -static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 11, 5); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 22, 2); - bool u = extract32(insn, 29, 1); - TCGv_i64 tcg_rd; - - switch (opcode) { - case 0x1: /* SQADD, UQADD */ - case 0x5: /* SQSUB, UQSUB */ - case 0x9: /* SQSHL, UQSHL */ - case 0xb: /* SQRSHL, UQRSHL */ - break; - case 0x8: /* SSHL, USHL */ - case 0xa: /* SRSHL, URSHL */ - case 0x6: /* CMGT, CMHI */ - case 0x7: /* CMGE, CMHS */ - case 0x11: /* CMTST, CMEQ */ - case 0x10: /* ADD, SUB (vector) */ - if (size != 3) { - unallocated_encoding(s); - return; - } - break; - case 0x16: /* SQDMULH, SQRDMULH (vector) */ - if (size != 1 && size != 2) { - unallocated_encoding(s); - return; - } - break; - default: - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - tcg_rd = tcg_temp_new_i64(); - - if (size == 3) { - TCGv_i64 tcg_rn = read_fp_dreg(s, rn); - TCGv_i64 tcg_rm = read_fp_dreg(s, rm); - - handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); - } else { - /* Do a single operation on the lowest element in the vector. - * We use the standard Neon helpers and rely on 0 OP 0 == 0 with - * no side effects for all these operations. - * OPTME: special-purpose helpers would avoid doing some - * unnecessary work in the helper for the 8 and 16 bit cases. - */ - NeonGenTwoOpEnvFn *genenvfn; - TCGv_i32 tcg_rn = tcg_temp_new_i32(); - TCGv_i32 tcg_rm = tcg_temp_new_i32(); - TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_rn, rn, 0, size); - read_vec_element_i32(s, tcg_rm, rm, 0, size); - - switch (opcode) { - case 0x1: /* SQADD, UQADD */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, - { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, - { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0x5: /* SQSUB, UQSUB */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, - { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, - { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0x9: /* SQSHL, UQSHL */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, - { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, - { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0xb: /* SQRSHL, UQRSHL */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, - { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, - { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0x16: /* SQDMULH, SQRDMULH */ - { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, - }; - assert(size == 1 || size == 2); - genenvfn = fns[size - 1][u]; - break; - } - default: - g_assert_not_reached(); - } - - genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm); - tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); - } - - write_fp_dreg(s, rd, tcg_rd); -} - /* AdvSIMD scalar three same extra * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ @@ -9981,88 +10022,6 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, clear_vec_high(s, is_q, rd); } -/* Remaining saturating accumulating ops */ -static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, - bool is_q, int size, int rn, int rd) -{ - bool is_double = (size == 3); - - if (is_double) { - TCGv_i64 tcg_rn = tcg_temp_new_i64(); - TCGv_i64 tcg_rd = tcg_temp_new_i64(); - int pass; - - for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { - read_vec_element(s, tcg_rn, rn, pass, MO_64); - read_vec_element(s, tcg_rd, rd, pass, MO_64); - - if (is_u) { /* USQADD */ - gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd); - } else { /* SUQADD */ - gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd); - } - write_vec_element(s, tcg_rd, rd, pass, MO_64); - } - clear_vec_high(s, !is_scalar, rd); - } else { - TCGv_i32 tcg_rn = tcg_temp_new_i32(); - TCGv_i32 tcg_rd = tcg_temp_new_i32(); - int pass, maxpasses; - - if (is_scalar) { - maxpasses = 1; - } else { - maxpasses = is_q ? 4 : 2; - } - - for (pass = 0; pass < maxpasses; pass++) { - if (is_scalar) { - read_vec_element_i32(s, tcg_rn, rn, pass, size); - read_vec_element_i32(s, tcg_rd, rd, pass, size); - } else { - read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); - read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); - } - - if (is_u) { /* USQADD */ - switch (size) { - case 0: - gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - case 1: - gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - case 2: - gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - default: - g_assert_not_reached(); - } - } else { /* SUQADD */ - switch (size) { - case 0: - gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - case 1: - gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - case 2: - gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - default: - g_assert_not_reached(); - } - } - - if (is_scalar) { - write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); - } - write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); - } - clear_vec_high(s, is_q, rd); - } -} - /* AdvSIMD scalar two reg misc * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 * +-----+---+-----------+------+-----------+--------+-----+------+------+ @@ -10082,12 +10041,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) TCGv_ptr tcg_fpstatus; switch (opcode) { - case 0x3: /* USQADD / SUQADD*/ - if (!fp_access_check(s)) { - return; - } - handle_2misc_satacc(s, true, u, false, size, rn, rd); - return; case 0x7: /* SQABS / SQNEG */ break; case 0xa: /* CMLT */ @@ -10187,6 +10140,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } break; default: + case 0x3: /* USQADD / SUQADD */ unallocated_encoding(s); return; } @@ -10919,284 +10873,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } -/* Integer op subgroup of C3.6.16. */ -static void disas_simd_3same_int(DisasContext *s, uint32_t insn) -{ - int is_q = extract32(insn, 30, 1); - int u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 11, 5); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - int pass; - TCGCond cond; - - switch (opcode) { - case 0x13: /* MUL, PMUL */ - if (u && size != 0) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x0: /* SHADD, UHADD */ - case 0x2: /* SRHADD, URHADD */ - case 0x4: /* SHSUB, UHSUB */ - case 0xc: /* SMAX, UMAX */ - case 0xd: /* SMIN, UMIN */ - case 0xe: /* SABD, UABD */ - case 0xf: /* SABA, UABA */ - case 0x12: /* MLA, MLS */ - if (size == 3) { - unallocated_encoding(s); - return; - } - break; - case 0x16: /* SQDMULH, SQRDMULH */ - if (size == 0 || size == 3) { - unallocated_encoding(s); - return; - } - break; - default: - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - } - - if (!fp_access_check(s)) { - return; - } - - switch (opcode) { - case 0x01: /* SQADD, UQADD */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); - } - return; - case 0x05: /* SQSUB, UQSUB */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); - } - return; - case 0x08: /* SSHL, USHL */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); - } - return; - case 0x0c: /* SMAX, UMAX */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); - } - return; - case 0x0d: /* SMIN, UMIN */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); - } - return; - case 0xe: /* SABD, UABD */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); - } - return; - case 0xf: /* SABA, UABA */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); - } - return; - case 0x10: /* ADD, SUB */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); - } - return; - case 0x13: /* MUL, PMUL */ - if (!u) { /* MUL */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); - } else { /* PMUL */ - gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); - } - return; - case 0x12: /* MLA, MLS */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); - } - return; - case 0x16: /* SQDMULH, SQRDMULH */ - { - static gen_helper_gvec_3_ptr * const fns[2][2] = { - { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, - { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, - }; - gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); - } - return; - case 0x11: - if (!u) { /* CMTST */ - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); - return; - } - /* else CMEQ */ - cond = TCG_COND_EQ; - goto do_gvec_cmp; - case 0x06: /* CMGT, CMHI */ - cond = u ? TCG_COND_GTU : TCG_COND_GT; - goto do_gvec_cmp; - case 0x07: /* CMGE, CMHS */ - cond = u ? TCG_COND_GEU : TCG_COND_GE; - do_gvec_cmp: - tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - is_q ? 16 : 8, vec_full_reg_size(s)); - return; - } - - if (size == 3) { - assert(is_q); - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); - - write_vec_element(s, tcg_res, rd, pass, MO_64); - } - } else { - for (pass = 0; pass < (is_q ? 4 : 2); pass++) { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - NeonGenTwoOpFn *genfn = NULL; - NeonGenTwoOpEnvFn *genenvfn = NULL; - - read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); - read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); - - switch (opcode) { - case 0x0: /* SHADD, UHADD */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, - { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, - { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, - }; - genfn = fns[size][u]; - break; - } - case 0x2: /* SRHADD, URHADD */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, - { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, - { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, - }; - genfn = fns[size][u]; - break; - } - case 0x4: /* SHSUB, UHSUB */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, - { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, - { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, - }; - genfn = fns[size][u]; - break; - } - case 0x9: /* SQSHL, UQSHL */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, - { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, - { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0xa: /* SRSHL, URSHL */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, - { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, - { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, - }; - genfn = fns[size][u]; - break; - } - case 0xb: /* SQRSHL, UQRSHL */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, - { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, - { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - default: - g_assert_not_reached(); - } - - if (genenvfn) { - genenvfn(tcg_res, tcg_env, tcg_op1, tcg_op2); - } else { - genfn(tcg_res, tcg_op1, tcg_op2); - } - - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); - } - } - clear_vec_high(s, is_q, rd); -} - -/* AdvSIMD three same - * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 - * +---+---+---+-----------+------+---+------+--------+---+------+------+ - * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | - * +---+---+---+-----------+------+---+------+--------+---+------+------+ - */ -static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) -{ - int opcode = extract32(insn, 11, 5); - - switch (opcode) { - default: - disas_simd_3same_int(s, insn); - break; - case 0x3: /* logic ops */ - case 0x14: /* SMAXP, UMAXP */ - case 0x15: /* SMINP, UMINP */ - case 0x17: /* ADDP */ - case 0x18 ... 0x31: /* floating point ops */ - unallocated_encoding(s); - break; - } -} - /* AdvSIMD three same extra * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ @@ -11683,16 +11359,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } break; - case 0x3: /* SUQADD, USQADD */ - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_2misc_satacc(s, false, u, is_q, size, rn, rd); - return; case 0x7: /* SQABS, SQNEG */ if (size == 3 && !is_q) { unallocated_encoding(s); @@ -11867,6 +11533,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) break; } default: + case 0x3: /* SUQADD, USQADD */ unallocated_encoding(s); return; } @@ -12375,14 +12042,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) TCGv_ptr fpst; switch (16 * u + opcode) { - case 0x08: /* MUL */ - case 0x10: /* MLA */ - case 0x14: /* MLS */ - if (is_scalar) { - unallocated_encoding(s); - return; - } - break; case 0x02: /* SMLAL, SMLAL2 */ case 0x12: /* UMLAL, UMLAL2 */ case 0x06: /* SMLSL, SMLSL2 */ @@ -12400,9 +12059,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x0b: /* SQDMULL, SQDMULL2 */ is_long = true; break; - case 0x0c: /* SQDMULH */ - case 0x0d: /* SQRDMULH */ - break; case 0x1d: /* SQRDMLAH */ case 0x1f: /* SQRDMLSH */ if (!dc_isar_feature(aa64_rdm, s)) { @@ -12462,7 +12118,12 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x01: /* FMLA */ case 0x04: /* FMLSL */ case 0x05: /* FMLS */ + case 0x08: /* MUL */ case 0x09: /* FMUL */ + case 0x0c: /* SQDMULH */ + case 0x0d: /* SQRDMULH */ + case 0x10: /* MLA */ + case 0x14: /* MLS */ case 0x18: /* FMLAL2 */ case 0x19: /* FMULX */ case 0x1c: /* FMLSL2 */ @@ -12583,56 +12244,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) : gen_helper_gvec_fcmlah_idx); } return; - - case 0x08: /* MUL */ - if (!is_long && !is_scalar) { - static gen_helper_gvec_3 * const fns[3] = { - gen_helper_gvec_mul_idx_h, - gen_helper_gvec_mul_idx_s, - gen_helper_gvec_mul_idx_d, - }; - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - is_q ? 16 : 8, vec_full_reg_size(s), - index, fns[size - 1]); - return; - } - break; - - case 0x10: /* MLA */ - if (!is_long && !is_scalar) { - static gen_helper_gvec_4 * const fns[3] = { - gen_helper_gvec_mla_idx_h, - gen_helper_gvec_mla_idx_s, - gen_helper_gvec_mla_idx_d, - }; - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vec_full_reg_offset(s, rd), - is_q ? 16 : 8, vec_full_reg_size(s), - index, fns[size - 1]); - return; - } - break; - - case 0x14: /* MLS */ - if (!is_long && !is_scalar) { - static gen_helper_gvec_4 * const fns[3] = { - gen_helper_gvec_mls_idx_h, - gen_helper_gvec_mls_idx_s, - gen_helper_gvec_mls_idx_d, - }; - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vec_full_reg_offset(s, rd), - is_q ? 16 : 8, vec_full_reg_size(s), - index, fns[size - 1]); - return; - } - break; } if (size == 3) { @@ -12668,7 +12279,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); switch (16 * u + opcode) { - case 0x08: /* MUL */ case 0x10: /* MLA */ case 0x14: /* MLS */ { @@ -12917,7 +12527,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) */ static const AArch64DecodeTable data_proc_simd[] = { /* pattern , mask , fn */ - { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, @@ -12929,7 +12538,6 @@ static const AArch64DecodeTable data_proc_simd[] = { { 0x0e000000, 0xbf208c00, disas_simd_tb }, { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, { 0x2e000000, 0xbf208400, disas_simd_ext }, - { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h index 91750f0ca9..0fcf7cb63a 100644 --- a/target/arm/tcg/translate-a64.h +++ b/target/arm/tcg/translate-a64.h @@ -198,6 +198,20 @@ void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, uint32_t a, uint32_t oprsz, uint32_t maxsz); +void gen_suqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_suqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b); +void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); + +void gen_usqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_usqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b); +void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); + void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c index 18b048611b..915c9e56db 100644 --- a/target/arm/tcg/translate-neon.c +++ b/target/arm/tcg/translate-neon.c @@ -794,6 +794,12 @@ DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) +DO_3SAME(VRSHL_S, gen_gvec_srshl) +DO_3SAME(VRSHL_U, gen_gvec_urshl) +DO_3SAME(VQSHL_S, gen_neon_sqshl) +DO_3SAME(VQSHL_U, gen_neon_uqshl) +DO_3SAME(VQRSHL_S, gen_neon_sqrshl) +DO_3SAME(VQRSHL_U, gen_neon_uqrshl) /* These insns are all gvec_bitsel but with the inputs in various orders. */ #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ @@ -835,6 +841,12 @@ DO_3SAME_NO_SZ_3(VPMAX_S, gen_gvec_smaxp) DO_3SAME_NO_SZ_3(VPMIN_S, gen_gvec_sminp) DO_3SAME_NO_SZ_3(VPMAX_U, gen_gvec_umaxp) DO_3SAME_NO_SZ_3(VPMIN_U, gen_gvec_uminp) +DO_3SAME_NO_SZ_3(VHADD_S, gen_gvec_shadd) +DO_3SAME_NO_SZ_3(VHADD_U, gen_gvec_uhadd) +DO_3SAME_NO_SZ_3(VHSUB_S, gen_gvec_shsub) +DO_3SAME_NO_SZ_3(VHSUB_U, gen_gvec_uhsub) +DO_3SAME_NO_SZ_3(VRHADD_S, gen_gvec_srhadd) +DO_3SAME_NO_SZ_3(VRHADD_U, gen_gvec_urhadd) #define DO_3SAME_CMP(INSN, COND) \ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ @@ -912,51 +924,6 @@ DO_SHA2(SHA256H, gen_helper_crypto_sha256h) DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) -#define DO_3SAME_64(INSN, FUNC) \ - static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ - uint32_t rn_ofs, uint32_t rm_ofs, \ - uint32_t oprsz, uint32_t maxsz) \ - { \ - static const GVecGen3 op = { .fni8 = FUNC }; \ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ - } \ - DO_3SAME(INSN, gen_##INSN##_3s) - -#define DO_3SAME_64_ENV(INSN, FUNC) \ - static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ - { \ - FUNC(d, tcg_env, n, m); \ - } \ - DO_3SAME_64(INSN, gen_##INSN##_elt) - -DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) -DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) -DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) -DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) -DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) -DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) - -#define DO_3SAME_32(INSN, FUNC) \ - static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ - uint32_t rn_ofs, uint32_t rm_ofs, \ - uint32_t oprsz, uint32_t maxsz) \ - { \ - static const GVecGen3 ops[4] = { \ - { .fni4 = gen_helper_neon_##FUNC##8 }, \ - { .fni4 = gen_helper_neon_##FUNC##16 }, \ - { .fni4 = gen_helper_neon_##FUNC##32 }, \ - { 0 }, \ - }; \ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ - } \ - static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ - { \ - if (a->size > 2) { \ - return false; \ - } \ - return do_3same(s, a, gen_##INSN##_3s); \ - } - /* * Some helper functions need to be passed the tcg_env. In order * to use those with the gvec APIs like tcg_gen_gvec_3() we need @@ -969,67 +936,12 @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) FUNC(d, tcg_env, n, m); \ } -#define DO_3SAME_32_ENV(INSN, FUNC) \ - WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ - WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ - WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ - static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ - uint32_t rn_ofs, uint32_t rm_ofs, \ - uint32_t oprsz, uint32_t maxsz) \ - { \ - static const GVecGen3 ops[4] = { \ - { .fni4 = gen_##INSN##_tramp8 }, \ - { .fni4 = gen_##INSN##_tramp16 }, \ - { .fni4 = gen_##INSN##_tramp32 }, \ - { 0 }, \ - }; \ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ - } \ - static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ - { \ - if (a->size > 2) { \ - return false; \ - } \ - return do_3same(s, a, gen_##INSN##_3s); \ - } - -DO_3SAME_32(VHADD_S, hadd_s) -DO_3SAME_32(VHADD_U, hadd_u) -DO_3SAME_32(VHSUB_S, hsub_s) -DO_3SAME_32(VHSUB_U, hsub_u) -DO_3SAME_32(VRHADD_S, rhadd_s) -DO_3SAME_32(VRHADD_U, rhadd_u) -DO_3SAME_32(VRSHL_S, rshl_s) -DO_3SAME_32(VRSHL_U, rshl_u) - -DO_3SAME_32_ENV(VQSHL_S, qshl_s) -DO_3SAME_32_ENV(VQSHL_U, qshl_u) -DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) -DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) - #define DO_3SAME_VQDMULH(INSN, FUNC) \ - WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ - WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ - static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ - uint32_t rn_ofs, uint32_t rm_ofs, \ - uint32_t oprsz, uint32_t maxsz) \ - { \ - static const GVecGen3 ops[2] = { \ - { .fni4 = gen_##INSN##_tramp16 }, \ - { .fni4 = gen_##INSN##_tramp32 }, \ - }; \ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \ - } \ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ - { \ - if (a->size != 1 && a->size != 2) { \ - return false; \ - } \ - return do_3same(s, a, gen_##INSN##_3s); \ - } + { return a->size >= 1 && a->size <= 2 && do_3same(s, a, FUNC); } -DO_3SAME_VQDMULH(VQDMULH, qdmulh) -DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) +DO_3SAME_VQDMULH(VQDMULH, gen_gvec_sqdmulh_qc) +DO_3SAME_VQDMULH(VQRDMULH, gen_gvec_sqrdmulh_qc) #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \ static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index 3abdbedfe5..aba21f730f 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -459,6 +459,31 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); @@ -466,12 +491,27 @@ void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_uqadd_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b); void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_sqadd_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b); void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_uqsub_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b); void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_sqsub_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b); void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); @@ -499,6 +539,10 @@ void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 56fea14edb..b05922b425 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -311,6 +311,38 @@ void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + + for (i = 0; i < opr_sz / 2; i += 16 / 2) { + int16_t mm = m[i]; + for (j = 0; j < 16 / 2; ++j) { + d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + + for (i = 0; i < opr_sz / 2; i += 16 / 2) { + int16_t mm = m[i]; + for (j = 0; j < 16 / 2; ++j) { + d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqrdmlah_h)(void *vd, void *vn, void *vm, void *va, uint32_t desc) { @@ -474,6 +506,38 @@ void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + + for (i = 0; i < opr_sz / 4; i += 16 / 4) { + int32_t mm = m[i]; + for (j = 0; j < 16 / 4; ++j) { + d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + + for (i = 0; i < opr_sz / 4; i += 16 / 4) { + int32_t mm = m[i]; + for (j = 0; j < 16 / 4; ++j) { + d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm, void *va, uint32_t desc) { @@ -1555,6 +1619,14 @@ DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX) DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX) DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX) +DO_SAT(gvec_usqadd_b, int, uint8_t, int8_t, +, 0, UINT8_MAX) +DO_SAT(gvec_usqadd_h, int, uint16_t, int16_t, +, 0, UINT16_MAX) +DO_SAT(gvec_usqadd_s, int64_t, uint32_t, int32_t, +, 0, UINT32_MAX) + +DO_SAT(gvec_suqadd_b, int, int8_t, uint8_t, +, INT8_MIN, INT8_MAX) +DO_SAT(gvec_suqadd_h, int, int16_t, uint16_t, +, INT16_MIN, INT16_MAX) +DO_SAT(gvec_suqadd_s, int64_t, int32_t, uint32_t, +, INT32_MIN, INT32_MAX) + #undef DO_SAT void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn, @@ -1645,6 +1717,62 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn, clear_tail(d, oprsz, simd_maxsz(desc)); } +void HELPER(gvec_usqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + uint64_t nn = n[i]; + int64_t mm = m[i]; + uint64_t dd = nn + mm; + + if (mm < 0) { + if (nn < (uint64_t)-mm) { + dd = 0; + q = true; + } + } else { + if (dd < nn) { + dd = UINT64_MAX; + q = true; + } + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_suqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + int64_t nn = n[i]; + uint64_t mm = m[i]; + int64_t dd = nn + mm; + + if (mm > (uint64_t)(INT64_MAX - nn)) { + dd = INT64_MAX; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} #define DO_SRA(NAME, TYPE) \ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index effc2c1c98..85957943bf 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -39,7 +39,7 @@ QEMU_BUILD_BUG_ON(TCG_PHYS_ADDR_BITS > TARGET_PHYS_ADDR_SPACE_BITS); */ void x86_cpu_do_interrupt(CPUState *cpu); #ifndef CONFIG_USER_ONLY -void x86_cpu_exec_halt(CPUState *cpu); +bool x86_cpu_exec_halt(CPUState *cpu); bool x86_need_replay_interrupt(int interrupt_request); bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req); #endif diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c index 2db8083748..9ba94deb3a 100644 --- a/target/i386/tcg/sysemu/seg_helper.c +++ b/target/i386/tcg/sysemu/seg_helper.c @@ -128,7 +128,7 @@ void x86_cpu_do_interrupt(CPUState *cs) } } -void x86_cpu_exec_halt(CPUState *cpu) +bool x86_cpu_exec_halt(CPUState *cpu) { if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -138,6 +138,7 @@ void x86_cpu_exec_halt(CPUState *cpu) cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); bql_unlock(); } + return cpu_has_work(cpu); } bool x86_need_replay_interrupt(int interrupt_request) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index eb1a2e7d6d..cee6fc4a9a 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -153,8 +153,10 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb), ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc), ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f), + ISA_EXT_DATA_ENTRY(zve32x, PRIV_VERSION_1_10_0, ext_zve32x), ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f), ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d), + ISA_EXT_DATA_ENTRY(zve64x, PRIV_VERSION_1_10_0, ext_zve64x), ISA_EXT_DATA_ENTRY(zvfbfmin, PRIV_VERSION_1_12_0, ext_zvfbfmin), ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma), ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), @@ -545,6 +547,7 @@ static void rv64_thead_c906_cpu_init(Object *obj) cpu->cfg.mvendorid = THEAD_VENDOR_ID; #ifndef CONFIG_USER_ONLY set_satp_mode_max_supported(cpu, VM_1_10_SV39); + th_register_custom_csrs(cpu); #endif /* inherited from parent obj via riscv_cpu_init() */ @@ -1132,6 +1135,7 @@ void riscv_cpu_finalize_features(RISCVCPU *cpu, Error **errp) error_propagate(errp, local_err); return; } + riscv_tcg_cpu_finalize_dynamic_decoder(cpu); } else if (kvm_enabled()) { riscv_kvm_cpu_finalize_features(cpu, &local_err); if (local_err != NULL) { @@ -1396,7 +1400,7 @@ static const MISAExtInfo misa_ext_info_arr[] = { MISA_EXT_INFO(RVJ, "x-j", "Dynamic translated languages"), MISA_EXT_INFO(RVV, "v", "Vector operations"), MISA_EXT_INFO(RVG, "g", "General purpose (IMAFD_Zicsr_Zifencei)"), - MISA_EXT_INFO(RVB, "x-b", "Bit manipulation (Zba_Zbb_Zbs)") + MISA_EXT_INFO(RVB, "b", "Bit manipulation (Zba_Zbb_Zbs)") }; static void riscv_cpu_validate_misa_mxl(RISCVCPUClass *mcc) @@ -1472,8 +1476,10 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { MULTI_EXT_CFG_BOOL("zfh", ext_zfh, false), MULTI_EXT_CFG_BOOL("zfhmin", ext_zfhmin, false), MULTI_EXT_CFG_BOOL("zve32f", ext_zve32f, false), + MULTI_EXT_CFG_BOOL("zve32x", ext_zve32x, false), MULTI_EXT_CFG_BOOL("zve64f", ext_zve64f, false), MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false), + MULTI_EXT_CFG_BOOL("zve64x", ext_zve64x, false), MULTI_EXT_CFG_BOOL("zvfbfmin", ext_zvfbfmin, false), MULTI_EXT_CFG_BOOL("zvfbfwma", ext_zvfbfwma, false), MULTI_EXT_CFG_BOOL("zvfh", ext_zvfh, false), @@ -1535,7 +1541,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { /* Vector cryptography extensions */ MULTI_EXT_CFG_BOOL("zvbb", ext_zvbb, false), MULTI_EXT_CFG_BOOL("zvbc", ext_zvbc, false), - MULTI_EXT_CFG_BOOL("zvkb", ext_zvkg, false), + MULTI_EXT_CFG_BOOL("zvkb", ext_zvkb, false), MULTI_EXT_CFG_BOOL("zvkg", ext_zvkg, false), MULTI_EXT_CFG_BOOL("zvkned", ext_zvkned, false), MULTI_EXT_CFG_BOOL("zvknha", ext_zvknha, false), diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 2d0c02c35b..12d8b5344a 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -455,6 +455,7 @@ struct ArchCPU { uint32_t pmu_avail_ctrs; /* Mapping of events to counters */ GHashTable *pmu_event_ctr_map; + const GPtrArray *decoders; }; /** @@ -819,7 +820,13 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops); void riscv_cpu_register_gdb_regs_for_features(CPUState *cs); +target_ulong riscv_new_csr_seed(target_ulong new_value, + target_ulong write_mask); + uint8_t satp_mode_max_from_map(uint32_t map); const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit); +/* Implemented in th_csr.c */ +void th_register_custom_csrs(RISCVCPU *cpu); + #endif /* RISCV_CPU_H */ diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index fc2068ee4d..74318a925c 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -670,11 +670,11 @@ typedef enum RISCVException { RISCV_EXCP_INST_PAGE_FAULT = 0xc, /* since: priv-1.10.0 */ RISCV_EXCP_LOAD_PAGE_FAULT = 0xd, /* since: priv-1.10.0 */ RISCV_EXCP_STORE_PAGE_FAULT = 0xf, /* since: priv-1.10.0 */ - RISCV_EXCP_SEMIHOST = 0x10, RISCV_EXCP_INST_GUEST_PAGE_FAULT = 0x14, RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT = 0x15, RISCV_EXCP_VIRT_INSTRUCTION_FAULT = 0x16, RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT = 0x17, + RISCV_EXCP_SEMIHOST = 0x3f, } RISCVException; #define RISCV_EXCP_INT_FLAG 0x80000000 diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index cb750154bd..e1e4f32698 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -91,8 +91,10 @@ struct RISCVCPUConfig { bool ext_zhinx; bool ext_zhinxmin; bool ext_zve32f; + bool ext_zve32x; bool ext_zve64f; bool ext_zve64d; + bool ext_zve64x; bool ext_zvbb; bool ext_zvbc; bool ext_zvkb; diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 8ad546a45a..a02497d778 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -73,7 +73,7 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, *pc = env->xl == MXL_RV32 ? env->pc & UINT32_MAX : env->pc; *cs_base = 0; - if (cpu->cfg.ext_zve32f) { + if (cpu->cfg.ext_zve32x) { /* * If env->vl equals to VLMAX, we can use generic vector operation * expanders (GVEC) to accerlate the vector operations. @@ -1177,28 +1177,30 @@ static void raise_mmu_exception(CPURISCVState *env, target_ulong address, switch (access_type) { case MMU_INST_FETCH: - if (env->virt_enabled && !first_stage) { + if (pmp_violation) { + cs->exception_index = RISCV_EXCP_INST_ACCESS_FAULT; + } else if (env->virt_enabled && !first_stage) { cs->exception_index = RISCV_EXCP_INST_GUEST_PAGE_FAULT; } else { - cs->exception_index = pmp_violation ? - RISCV_EXCP_INST_ACCESS_FAULT : RISCV_EXCP_INST_PAGE_FAULT; + cs->exception_index = RISCV_EXCP_INST_PAGE_FAULT; } break; case MMU_DATA_LOAD: - if (two_stage && !first_stage) { + if (pmp_violation) { + cs->exception_index = RISCV_EXCP_LOAD_ACCESS_FAULT; + } else if (two_stage && !first_stage) { cs->exception_index = RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT; } else { - cs->exception_index = pmp_violation ? - RISCV_EXCP_LOAD_ACCESS_FAULT : RISCV_EXCP_LOAD_PAGE_FAULT; + cs->exception_index = RISCV_EXCP_LOAD_PAGE_FAULT; } break; case MMU_DATA_STORE: - if (two_stage && !first_stage) { + if (pmp_violation) { + cs->exception_index = RISCV_EXCP_STORE_AMO_ACCESS_FAULT; + } else if (two_stage && !first_stage) { cs->exception_index = RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT; } else { - cs->exception_index = pmp_violation ? - RISCV_EXCP_STORE_AMO_ACCESS_FAULT : - RISCV_EXCP_STORE_PAGE_FAULT; + cs->exception_index = RISCV_EXCP_STORE_PAGE_FAULT; } break; default: @@ -1374,17 +1376,17 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, __func__, pa, ret, prot_pmp, tlb_size); prot &= prot_pmp; - } - - if (ret != TRANSLATE_SUCCESS) { + } else { /* * Guest physical address translation failed, this is a HS * level exception */ first_stage_error = false; - env->guest_phys_fault_addr = (im_address | - (address & - (TARGET_PAGE_SIZE - 1))) >> 2; + if (ret != TRANSLATE_PMP_FAIL) { + env->guest_phys_fault_addr = (im_address | + (address & + (TARGET_PAGE_SIZE - 1))) >> 2; + } } } } else { @@ -1718,6 +1720,7 @@ void riscv_cpu_do_interrupt(CPUState *cs) tval = env->bins; break; case RISCV_EXCP_BREAKPOINT: + tval = env->badaddr; if (cs->watchpoint_hit) { tval = cs->watchpoint_hit->hitaddr; cs->watchpoint_hit = NULL; diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 726096444f..58ef7079dc 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -93,7 +93,7 @@ static RISCVException fs(CPURISCVState *env, int csrno) static RISCVException vs(CPURISCVState *env, int csrno) { - if (riscv_cpu_cfg(env)->ext_zve32f) { + if (riscv_cpu_cfg(env)->ext_zve32x) { #if !defined(CONFIG_USER_ONLY) if (!env->debugger && !riscv_cpu_vector_enabled(env)) { return RISCV_EXCP_ILLEGAL_INST; @@ -4267,10 +4267,8 @@ static RISCVException write_upmbase(CPURISCVState *env, int csrno, #endif /* Crypto Extension */ -static RISCVException rmw_seed(CPURISCVState *env, int csrno, - target_ulong *ret_value, - target_ulong new_value, - target_ulong write_mask) +target_ulong riscv_new_csr_seed(target_ulong new_value, + target_ulong write_mask) { uint16_t random_v; Error *random_e = NULL; @@ -4294,6 +4292,18 @@ static RISCVException rmw_seed(CPURISCVState *env, int csrno, rval = random_v | SEED_OPST_ES16; } + return rval; +} + +static RISCVException rmw_seed(CPURISCVState *env, int csrno, + target_ulong *ret_value, + target_ulong new_value, + target_ulong write_mask) +{ + target_ulong rval; + + rval = riscv_new_csr_seed(new_value, write_mask); + if (ret_value) { *ret_value = rval; } diff --git a/target/riscv/debug.c b/target/riscv/debug.c index e30d99cc2f..b110370ea6 100644 --- a/target/riscv/debug.c +++ b/target/riscv/debug.c @@ -798,6 +798,7 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs) if ((ctrl & TYPE2_EXEC) && (bp->pc == pc)) { /* check U/S/M bit against current privilege level */ if ((ctrl >> 3) & BIT(env->priv)) { + env->badaddr = pc; return true; } } @@ -810,11 +811,13 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs) if (env->virt_enabled) { /* check VU/VS bit against current privilege level */ if ((ctrl >> 23) & BIT(env->priv)) { + env->badaddr = pc; return true; } } else { /* check U/S/M bit against current privilege level */ if ((ctrl >> 3) & BIT(env->priv)) { + env->badaddr = pc; return true; } } diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c index be7a02cd90..c07df972f1 100644 --- a/target/riscv/gdbstub.c +++ b/target/riscv/gdbstub.c @@ -288,7 +288,7 @@ static GDBFeature *riscv_gen_dynamic_csr_feature(CPUState *cs, int base_reg) static GDBFeature *ricsv_gen_dynamic_vector_feature(CPUState *cs, int base_reg) { RISCVCPU *cpu = RISCV_CPU(cs); - int reg_width = cpu->cfg.vlenb; + int bitsize = cpu->cfg.vlenb << 3; GDBFeatureBuilder builder; int i; @@ -298,7 +298,7 @@ static GDBFeature *ricsv_gen_dynamic_vector_feature(CPUState *cs, int base_reg) /* First define types and totals in a whole VL */ for (i = 0; i < ARRAY_SIZE(vec_lanes); i++) { - int count = reg_width / vec_lanes[i].size; + int count = bitsize / vec_lanes[i].size; gdb_feature_builder_append_tag( &builder, "<vector id=\"%s\" type=\"%s\" count=\"%d\"/>", vec_lanes[i].id, vec_lanes[i].gdb_type, count); @@ -316,7 +316,7 @@ static GDBFeature *ricsv_gen_dynamic_vector_feature(CPUState *cs, int base_reg) /* Define vector registers */ for (i = 0; i < 32; i++) { gdb_feature_builder_append_reg(&builder, g_strdup_printf("v%d", i), - reg_width, i, "riscv_vector", "vector"); + bitsize, i, "riscv_vector", "vector"); } gdb_feature_builder_end(&builder); @@ -338,7 +338,7 @@ void riscv_cpu_register_gdb_regs_for_features(CPUState *cs) gdb_find_static_feature("riscv-32bit-fpu.xml"), 0); } - if (env->misa_ext & RVV) { + if (cpu->cfg.ext_zve32x) { gdb_register_coprocessor(cs, riscv_gdb_get_vector, riscv_gdb_set_vector, ricsv_gen_dynamic_vector_feature(cs, cs->gdb_num_regs), diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 8a63523851..451261ce5a 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -132,6 +132,7 @@ DEF_HELPER_6(csrrw_i128, tl, env, int, tl, tl, tl, tl) DEF_HELPER_1(sret, tl, env) DEF_HELPER_1(mret, tl, env) DEF_HELPER_1(wfi, void, env) +DEF_HELPER_1(wrs_nto, void, env) DEF_HELPER_1(tlb_flush, void, env) DEF_HELPER_1(tlb_flush_all, void, env) /* Native Debug */ diff --git a/target/riscv/insn_trans/trans_privileged.c.inc b/target/riscv/insn_trans/trans_privileged.c.inc index 620ab54eb0..bc5263a4e0 100644 --- a/target/riscv/insn_trans/trans_privileged.c.inc +++ b/target/riscv/insn_trans/trans_privileged.c.inc @@ -62,6 +62,8 @@ static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a) if (pre == 0x01f01013 && ebreak == 0x00100073 && post == 0x40705013) { generate_exception(ctx, RISCV_EXCP_SEMIHOST); } else { + tcg_gen_st_tl(tcg_constant_tl(ebreak_addr), tcg_env, + offsetof(CPURISCVState, badaddr)); generate_exception(ctx, RISCV_EXCP_BREAKPOINT); } return true; diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 7d84e7d812..3a3896ba06 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -50,6 +50,22 @@ static bool require_rvf(DisasContext *s) } } +static bool require_rvfmin(DisasContext *s) +{ + if (s->mstatus_fs == EXT_STATUS_DISABLED) { + return false; + } + + switch (s->sew) { + case MO_16: + return s->cfg_ptr->ext_zvfhmin; + case MO_32: + return s->cfg_ptr->ext_zve32f; + default: + return false; + } +} + static bool require_scale_rvf(DisasContext *s) { if (s->mstatus_fs == EXT_STATUS_DISABLED) { @@ -75,8 +91,6 @@ static bool require_scale_rvfmin(DisasContext *s) } switch (s->sew) { - case MO_8: - return s->cfg_ptr->ext_zvfhmin; case MO_16: return s->cfg_ptr->ext_zve32f; case MO_32: @@ -149,7 +163,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) { TCGv s1, dst; - if (!require_rvv(s) || !s->cfg_ptr->ext_zve32f) { + if (!require_rvv(s) || !s->cfg_ptr->ext_zve32x) { return false; } @@ -179,7 +193,7 @@ static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) { TCGv dst; - if (!require_rvv(s) || !s->cfg_ptr->ext_zve32f) { + if (!require_rvv(s) || !s->cfg_ptr->ext_zve32x) { return false; } @@ -2317,8 +2331,8 @@ GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && + require_rvf(s) && require_scale_rvf(s) && - (s->sew != MO_8) && vext_check_isa_ill(s) && vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); } @@ -2356,8 +2370,8 @@ GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check) static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && + require_rvf(s) && require_scale_rvf(s) && - (s->sew != MO_8) && vext_check_isa_ill(s) && vext_check_ds(s, a->rd, a->rs2, a->vm); } @@ -2388,8 +2402,8 @@ GEN_OPFVF_WIDEN_TRANS(vfwsub_vf) static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && + require_rvf(s) && require_scale_rvf(s) && - (s->sew != MO_8) && vext_check_isa_ill(s) && vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm); } @@ -2427,8 +2441,8 @@ GEN_OPFWV_WIDEN_TRANS(vfwsub_wv) static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && + require_rvf(s) && require_scale_rvf(s) && - (s->sew != MO_8) && vext_check_isa_ill(s) && vext_check_dd(s, a->rd, a->rs2, a->vm); } @@ -2685,8 +2699,8 @@ static bool opxfv_widen_check(DisasContext *s, arg_rmr *a) static bool opffv_widen_check(DisasContext *s, arg_rmr *a) { return opfv_widen_check(s, a) && - require_scale_rvfmin(s) && - (s->sew != MO_8); + require_rvfmin(s) && + require_scale_rvfmin(s); } #define GEN_OPFV_WIDEN_TRANS(NAME, CHECK, HELPER, FRM) \ @@ -2790,15 +2804,15 @@ static bool opfxv_narrow_check(DisasContext *s, arg_rmr *a) static bool opffv_narrow_check(DisasContext *s, arg_rmr *a) { return opfv_narrow_check(s, a) && - require_scale_rvfmin(s) && - (s->sew != MO_8); + require_rvfmin(s) && + require_scale_rvfmin(s); } static bool opffv_rod_narrow_check(DisasContext *s, arg_rmr *a) { return opfv_narrow_check(s, a) && - require_scale_rvf(s) && - (s->sew != MO_8); + require_rvf(s) && + require_scale_rvf(s); } #define GEN_OPFV_NARROW_TRANS(NAME, CHECK, HELPER, FRM) \ @@ -2925,8 +2939,8 @@ GEN_OPFVV_TRANS(vfredmin_vs, freduction_check) static bool freduction_widen_check(DisasContext *s, arg_rmrr *a) { return reduction_widen_check(s, a) && - require_scale_rvf(s) && - (s->sew != MO_8); + require_rvf(s) && + require_scale_rvf(s); } GEN_OPFVV_WIDEN_TRANS(vfwredusum_vs, freduction_widen_check) diff --git a/target/riscv/insn_trans/trans_rvzawrs.c.inc b/target/riscv/insn_trans/trans_rvzawrs.c.inc index 32efbff4d5..0eef033838 100644 --- a/target/riscv/insn_trans/trans_rvzawrs.c.inc +++ b/target/riscv/insn_trans/trans_rvzawrs.c.inc @@ -16,7 +16,7 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ -static bool trans_wrs(DisasContext *ctx) +static bool trans_wrs_sto(DisasContext *ctx, arg_wrs_sto *a) { if (!ctx->cfg_ptr->ext_zawrs) { return false; @@ -40,12 +40,23 @@ static bool trans_wrs(DisasContext *ctx) return true; } -#define GEN_TRANS_WRS(insn) \ -static bool trans_ ## insn(DisasContext *ctx, arg_ ## insn *a) \ -{ \ - (void)a; \ - return trans_wrs(ctx); \ -} +static bool trans_wrs_nto(DisasContext *ctx, arg_wrs_nto *a) +{ + if (!ctx->cfg_ptr->ext_zawrs) { + return false; + } -GEN_TRANS_WRS(wrs_nto) -GEN_TRANS_WRS(wrs_sto) + /* + * Depending on the mode of execution, mstatus.TW and hstatus.VTW, wrs.nto + * should raise an exception when the implementation-specific bounded time + * limit has expired. Our time limit is zero, so we either return + * immediately, as does our implementation of wrs.sto, or raise an + * exception, as handled by the wrs.nto helper. + */ +#ifndef CONFIG_USER_ONLY + gen_helper_wrs_nto(tcg_env); +#endif + + /* We only get here when helper_wrs_nto() doesn't raise an exception. */ + return trans_wrs_sto(ctx, NULL); +} diff --git a/target/riscv/insn_trans/trans_rvzicbo.c.inc b/target/riscv/insn_trans/trans_rvzicbo.c.inc index d5d7095903..15711c3140 100644 --- a/target/riscv/insn_trans/trans_rvzicbo.c.inc +++ b/target/riscv/insn_trans/trans_rvzicbo.c.inc @@ -31,27 +31,35 @@ static bool trans_cbo_clean(DisasContext *ctx, arg_cbo_clean *a) { REQUIRE_ZICBOM(ctx); - gen_helper_cbo_clean_flush(tcg_env, cpu_gpr[a->rs1]); + TCGv src = get_address(ctx, a->rs1, 0); + + gen_helper_cbo_clean_flush(tcg_env, src); return true; } static bool trans_cbo_flush(DisasContext *ctx, arg_cbo_flush *a) { REQUIRE_ZICBOM(ctx); - gen_helper_cbo_clean_flush(tcg_env, cpu_gpr[a->rs1]); + TCGv src = get_address(ctx, a->rs1, 0); + + gen_helper_cbo_clean_flush(tcg_env, src); return true; } static bool trans_cbo_inval(DisasContext *ctx, arg_cbo_inval *a) { REQUIRE_ZICBOM(ctx); - gen_helper_cbo_inval(tcg_env, cpu_gpr[a->rs1]); + TCGv src = get_address(ctx, a->rs1, 0); + + gen_helper_cbo_inval(tcg_env, src); return true; } static bool trans_cbo_zero(DisasContext *ctx, arg_cbo_zero *a) { REQUIRE_ZICBOZ(ctx); - gen_helper_cbo_zero(tcg_env, cpu_gpr[a->rs1]); + TCGv src = get_address(ctx, a->rs1, 0); + + gen_helper_cbo_zero(tcg_env, src); return true; } diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index eaa36121c7..235e2cdaca 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -409,6 +409,12 @@ static KVMCPUConfig kvm_v_vlenb = { KVM_REG_RISCV_VECTOR_CSR_REG(vlenb) }; +static KVMCPUConfig kvm_sbi_dbcn = { + .name = "sbi_dbcn", + .kvm_reg_id = KVM_REG_RISCV | KVM_REG_SIZE_U64 | + KVM_REG_RISCV_SBI_EXT | KVM_RISCV_SBI_EXT_DBCN +}; + static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) { CPURISCVState *env = &cpu->env; @@ -427,10 +433,14 @@ static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg); ret = kvm_set_one_reg(cs, id, ®); if (ret != 0) { - error_report("Unable to %s extension %s in KVM, error %d", - reg ? "enable" : "disable", - multi_ext_cfg->name, ret); - exit(EXIT_FAILURE); + if (!reg && ret == -EINVAL) { + warn_report("KVM cannot disable extension %s", + multi_ext_cfg->name); + } else { + error_report("Unable to enable extension %s in KVM, error %d", + multi_ext_cfg->name, ret); + exit(EXIT_FAILURE); + } } } } @@ -1037,6 +1047,20 @@ static int uint64_cmp(const void *a, const void *b) return 0; } +static void kvm_riscv_check_sbi_dbcn_support(RISCVCPU *cpu, + KVMScratchCPU *kvmcpu, + struct kvm_reg_list *reglist) +{ + struct kvm_reg_list *reg_search; + + reg_search = bsearch(&kvm_sbi_dbcn.kvm_reg_id, reglist->reg, reglist->n, + sizeof(uint64_t), uint64_cmp); + + if (reg_search) { + kvm_sbi_dbcn.supported = true; + } +} + static void kvm_riscv_read_vlenb(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, struct kvm_reg_list *reglist) { @@ -1142,6 +1166,8 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) if (riscv_has_ext(&cpu->env, RVV)) { kvm_riscv_read_vlenb(cpu, kvmcpu, reglist); } + + kvm_riscv_check_sbi_dbcn_support(cpu, kvmcpu, reglist); } static void riscv_init_kvm_registers(Object *cpu_obj) @@ -1316,6 +1342,17 @@ static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs) return ret; } +static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs) +{ + target_ulong reg = 1; + + if (!kvm_sbi_dbcn.supported) { + return 0; + } + + return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, ®); +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret = 0; @@ -1333,6 +1370,8 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_riscv_update_cpu_misa_ext(cpu, cs); kvm_riscv_update_cpu_cfg_isa_ext(cpu, cs); + ret = kvm_vcpu_enable_sbi_dbcn(cpu, cs); + return ret; } @@ -1390,6 +1429,79 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) return true; } +static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) +{ + g_autofree uint8_t *buf = NULL; + RISCVCPU *cpu = RISCV_CPU(cs); + target_ulong num_bytes; + uint64_t addr; + unsigned char ch; + int ret; + + switch (run->riscv_sbi.function_id) { + case SBI_EXT_DBCN_CONSOLE_READ: + case SBI_EXT_DBCN_CONSOLE_WRITE: + num_bytes = run->riscv_sbi.args[0]; + + if (num_bytes == 0) { + run->riscv_sbi.ret[0] = SBI_SUCCESS; + run->riscv_sbi.ret[1] = 0; + break; + } + + addr = run->riscv_sbi.args[1]; + + /* + * Handle the case where a 32 bit CPU is running in a + * 64 bit addressing env. + */ + if (riscv_cpu_mxl(&cpu->env) == MXL_RV32) { + addr |= (uint64_t)run->riscv_sbi.args[2] << 32; + } + + buf = g_malloc0(num_bytes); + + if (run->riscv_sbi.function_id == SBI_EXT_DBCN_CONSOLE_READ) { + ret = qemu_chr_fe_read_all(serial_hd(0)->be, buf, num_bytes); + if (ret < 0) { + error_report("SBI_EXT_DBCN_CONSOLE_READ: error when " + "reading chardev"); + exit(1); + } + + cpu_physical_memory_write(addr, buf, ret); + } else { + cpu_physical_memory_read(addr, buf, num_bytes); + + ret = qemu_chr_fe_write_all(serial_hd(0)->be, buf, num_bytes); + if (ret < 0) { + error_report("SBI_EXT_DBCN_CONSOLE_WRITE: error when " + "writing chardev"); + exit(1); + } + } + + run->riscv_sbi.ret[0] = SBI_SUCCESS; + run->riscv_sbi.ret[1] = ret; + break; + case SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: + ch = run->riscv_sbi.args[0]; + ret = qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch)); + + if (ret < 0) { + error_report("SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: error when " + "writing chardev"); + exit(1); + } + + run->riscv_sbi.ret[0] = SBI_SUCCESS; + run->riscv_sbi.ret[1] = 0; + break; + default: + run->riscv_sbi.ret[0] = SBI_ERR_NOT_SUPPORTED; + } +} + static int kvm_riscv_handle_sbi(CPUState *cs, struct kvm_run *run) { int ret = 0; @@ -1408,6 +1520,9 @@ static int kvm_riscv_handle_sbi(CPUState *cs, struct kvm_run *run) } ret = 0; break; + case SBI_EXT_DBCN: + kvm_riscv_handle_sbi_dbcn(cs, run); + break; default: qemu_log_mask(LOG_UNIMP, "%s: un-handled SBI EXIT, specific reasons is %lu\n", @@ -1418,6 +1533,28 @@ static int kvm_riscv_handle_sbi(CPUState *cs, struct kvm_run *run) return ret; } +static int kvm_riscv_handle_csr(CPUState *cs, struct kvm_run *run) +{ + target_ulong csr_num = run->riscv_csr.csr_num; + target_ulong new_value = run->riscv_csr.new_value; + target_ulong write_mask = run->riscv_csr.write_mask; + int ret = 0; + + switch (csr_num) { + case CSR_SEED: + run->riscv_csr.ret_value = riscv_new_csr_seed(new_value, write_mask); + break; + default: + qemu_log_mask(LOG_UNIMP, + "%s: un-handled CSR EXIT for CSR %lx\n", + __func__, csr_num); + ret = -1; + break; + } + + return ret; +} + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) { int ret = 0; @@ -1425,6 +1562,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case KVM_EXIT_RISCV_SBI: ret = kvm_riscv_handle_sbi(cs, run); break; + case KVM_EXIT_RISCV_CSR: + ret = kvm_riscv_handle_csr(cs, run); + break; default: qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", __func__, run->exit_reason); @@ -1637,7 +1777,14 @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, } } - hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1; + + if (max_hart_per_socket > 1) { + max_hart_per_socket--; + hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1; + } else { + hart_bits = 0; + } + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, KVM_DEV_RISCV_AIA_CONFIG_HART_BITS, &hart_bits, true, NULL); diff --git a/target/riscv/meson.build b/target/riscv/meson.build index a5e0734e7f..a4bd61e52a 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -33,6 +33,7 @@ riscv_system_ss.add(files( 'monitor.c', 'machine.c', 'pmu.c', + 'th_csr.c', 'time_helper.c', 'riscv-qmp-cmds.c', )) diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index f414aaebdb..2baf5bc3ca 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -380,6 +380,17 @@ void helper_wfi(CPURISCVState *env) } } +void helper_wrs_nto(CPURISCVState *env) +{ + if (env->virt_enabled && (env->priv == PRV_S || env->priv == PRV_U) && + get_field(env->hstatus, HSTATUS_VTW) && + !get_field(env->mstatus, MSTATUS_TW)) { + riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC()); + } else if (env->priv != PRV_M && get_field(env->mstatus, MSTATUS_TW)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + } +} + void helper_tlb_flush(CPURISCVState *env) { CPUState *cs = env_cpu(env); diff --git a/target/riscv/sbi_ecall_interface.h b/target/riscv/sbi_ecall_interface.h index 43899d08f6..7dfe5f72c6 100644 --- a/target/riscv/sbi_ecall_interface.h +++ b/target/riscv/sbi_ecall_interface.h @@ -12,6 +12,17 @@ /* clang-format off */ +#define SBI_SUCCESS 0 +#define SBI_ERR_FAILED -1 +#define SBI_ERR_NOT_SUPPORTED -2 +#define SBI_ERR_INVALID_PARAM -3 +#define SBI_ERR_DENIED -4 +#define SBI_ERR_INVALID_ADDRESS -5 +#define SBI_ERR_ALREADY_AVAILABLE -6 +#define SBI_ERR_ALREADY_STARTED -7 +#define SBI_ERR_ALREADY_STOPPED -8 +#define SBI_ERR_NO_SHMEM -9 + /* SBI Extension IDs */ #define SBI_EXT_0_1_SET_TIMER 0x0 #define SBI_EXT_0_1_CONSOLE_PUTCHAR 0x1 @@ -27,6 +38,7 @@ #define SBI_EXT_IPI 0x735049 #define SBI_EXT_RFENCE 0x52464E43 #define SBI_EXT_HSM 0x48534D +#define SBI_EXT_DBCN 0x4442434E /* SBI function IDs for BASE extension */ #define SBI_EXT_BASE_GET_SPEC_VERSION 0x0 @@ -57,6 +69,11 @@ #define SBI_EXT_HSM_HART_STOP 0x1 #define SBI_EXT_HSM_HART_GET_STATUS 0x2 +/* SBI function IDs for DBCN extension */ +#define SBI_EXT_DBCN_CONSOLE_WRITE 0x0 +#define SBI_EXT_DBCN_CONSOLE_READ 0x1 +#define SBI_EXT_DBCN_CONSOLE_WRITE_BYTE 0x2 + #define SBI_HSM_HART_STATUS_STARTED 0x0 #define SBI_HSM_HART_STATUS_STOPPED 0x1 #define SBI_HSM_HART_STATUS_START_PENDING 0x2 diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index 40054a391a..683f604d9f 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -498,22 +498,31 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) /* The Zve64d extension depends on the Zve64f extension */ if (cpu->cfg.ext_zve64d) { + if (!riscv_has_ext(env, RVD)) { + error_setg(errp, "Zve64d/V extensions require D extension"); + return; + } cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64f), true); } - /* The Zve64f extension depends on the Zve32f extension */ + /* The Zve64f extension depends on the Zve64x and Zve32f extensions */ if (cpu->cfg.ext_zve64f) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64x), true); cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32f), true); } - if (cpu->cfg.ext_zve64d && !riscv_has_ext(env, RVD)) { - error_setg(errp, "Zve64d/V extensions require D extension"); - return; + /* The Zve64x extension depends on the Zve32x extension */ + if (cpu->cfg.ext_zve64x) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true); } - if (cpu->cfg.ext_zve32f && !riscv_has_ext(env, RVF)) { - error_setg(errp, "Zve32f/Zve64f extensions require F extension"); - return; + /* The Zve32f extension depends on the Zve32x extension */ + if (cpu->cfg.ext_zve32f) { + if (!riscv_has_ext(env, RVF)) { + error_setg(errp, "Zve32f/Zve64f extensions require F extension"); + return; + } + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true); } if (cpu->cfg.ext_zvfh) { @@ -658,22 +667,18 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true); } - /* - * In principle Zve*x would also suffice here, were they supported - * in qemu - */ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || - cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { + cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32x) { error_setg(errp, "Vector crypto extensions require V or Zve* extensions"); return; } - if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) { + if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64x) { error_setg( errp, - "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions"); + "Zvbc and Zvknhb extensions require V or Zve64x extensions"); return; } @@ -858,6 +863,21 @@ void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp) } } +void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu) +{ + GPtrArray *dynamic_decoders; + dynamic_decoders = g_ptr_array_sized_new(decoder_table_size); + for (size_t i = 0; i < decoder_table_size; ++i) { + if (decoder_table[i].guard_func && + decoder_table[i].guard_func(&cpu->cfg)) { + g_ptr_array_add(dynamic_decoders, + (gpointer)decoder_table[i].riscv_cpu_decode_fn); + } + } + + cpu->decoders = dynamic_decoders; +} + bool riscv_cpu_tcg_compatible(RISCVCPU *cpu) { return object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST) == NULL; @@ -1281,7 +1301,7 @@ static void riscv_init_max_cpu_extensions(Object *obj) const RISCVCPUMultiExtConfig *prop; /* Enable RVG, RVJ and RVV that are disabled by default */ - riscv_cpu_set_misa_ext(env, env->misa_ext | RVG | RVJ | RVV); + riscv_cpu_set_misa_ext(env, env->misa_ext | RVB | RVG | RVJ | RVV); for (prop = riscv_cpu_extensions; prop && prop->name; prop++) { isa_ext_update_enabled(cpu, prop->offset, true); diff --git a/target/riscv/tcg/tcg-cpu.h b/target/riscv/tcg/tcg-cpu.h index f7b32417f8..ce94253fe4 100644 --- a/target/riscv/tcg/tcg-cpu.h +++ b/target/riscv/tcg/tcg-cpu.h @@ -26,4 +26,19 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp); void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp); bool riscv_cpu_tcg_compatible(RISCVCPU *cpu); +struct DisasContext; +struct RISCVCPUConfig; +typedef struct RISCVDecoder { + bool (*guard_func)(const struct RISCVCPUConfig *); + bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t); +} RISCVDecoder; + +typedef bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t); + +extern const size_t decoder_table_size; + +extern const RISCVDecoder decoder_table[]; + +void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu); + #endif diff --git a/target/riscv/th_csr.c b/target/riscv/th_csr.c new file mode 100644 index 0000000000..6c970d4e81 --- /dev/null +++ b/target/riscv/th_csr.c @@ -0,0 +1,79 @@ +/* + * T-Head-specific CSRs. + * + * Copyright (c) 2024 VRULL GmbH + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "cpu_vendorid.h" + +#define CSR_TH_SXSTATUS 0x5c0 + +/* TH_SXSTATUS bits */ +#define TH_SXSTATUS_UCME BIT(16) +#define TH_SXSTATUS_MAEE BIT(21) +#define TH_SXSTATUS_THEADISAEE BIT(22) + +typedef struct { + int csrno; + int (*insertion_test)(RISCVCPU *cpu); + riscv_csr_operations csr_ops; +} riscv_csr; + +static RISCVException smode(CPURISCVState *env, int csrno) +{ + if (riscv_has_ext(env, RVS)) { + return RISCV_EXCP_NONE; + } + + return RISCV_EXCP_ILLEGAL_INST; +} + +static int test_thead_mvendorid(RISCVCPU *cpu) +{ + if (cpu->cfg.mvendorid != THEAD_VENDOR_ID) { + return -1; + } + + return 0; +} + +static RISCVException read_th_sxstatus(CPURISCVState *env, int csrno, + target_ulong *val) +{ + /* We don't set MAEE here, because QEMU does not implement MAEE. */ + *val = TH_SXSTATUS_UCME | TH_SXSTATUS_THEADISAEE; + return RISCV_EXCP_NONE; +} + +static riscv_csr th_csr_list[] = { + { + .csrno = CSR_TH_SXSTATUS, + .insertion_test = test_thead_mvendorid, + .csr_ops = { "th.sxstatus", smode, read_th_sxstatus } + } +}; + +void th_register_custom_csrs(RISCVCPU *cpu) +{ + for (size_t i = 0; i < ARRAY_SIZE(th_csr_list); i++) { + int csrno = th_csr_list[i].csrno; + riscv_csr_operations *csr_ops = &th_csr_list[i].csr_ops; + if (!th_csr_list[i].insertion_test(cpu)) { + riscv_set_csr_ops(csrno, csr_ops); + } + } +} diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 2c27fd4ce1..4cd6480558 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -35,6 +35,8 @@ #include "exec/helper-info.c.inc" #undef HELPER_H +#include "tcg/tcg-cpu.h" + /* global register indices */ static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc, cpu_vl, cpu_vstart; static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */ @@ -114,6 +116,7 @@ typedef struct DisasContext { /* FRM is known to contain a valid value. */ bool frm_valid; bool insn_start_updated; + const GPtrArray *decoders; } DisasContext; static inline bool has_ext(DisasContext *ctx, uint32_t ext) @@ -1123,21 +1126,16 @@ static inline int insn_len(uint16_t first_word) return (first_word & 3) == 3 ? 4 : 2; } +const RISCVDecoder decoder_table[] = { + { always_true_p, decode_insn32 }, + { has_xthead_p, decode_xthead}, + { has_XVentanaCondOps_p, decode_XVentanaCodeOps}, +}; + +const size_t decoder_table_size = ARRAY_SIZE(decoder_table); + static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) { - /* - * A table with predicate (i.e., guard) functions and decoder functions - * that are tested in-order until a decoder matches onto the opcode. - */ - static const struct { - bool (*guard_func)(const RISCVCPUConfig *); - bool (*decode_func)(DisasContext *, uint32_t); - } decoders[] = { - { always_true_p, decode_insn32 }, - { has_xthead_p, decode_xthead }, - { has_XVentanaCondOps_p, decode_XVentanaCodeOps }, - }; - ctx->virt_inst_excp = false; ctx->cur_insn_len = insn_len(opcode); /* Check for compressed insn */ @@ -1158,9 +1156,9 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) ctx->base.pc_next + 2)); ctx->opcode = opcode32; - for (size_t i = 0; i < ARRAY_SIZE(decoders); ++i) { - if (decoders[i].guard_func(ctx->cfg_ptr) && - decoders[i].decode_func(ctx, opcode32)) { + for (guint i = 0; i < ctx->decoders->len; ++i) { + riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i); + if (func(ctx, opcode32)) { return; } } @@ -1205,6 +1203,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->itrigger = FIELD_EX32(tb_flags, TB_FLAGS, ITRIGGER); ctx->zero = tcg_constant_tl(0); ctx->virt_inst_excp = false; + ctx->decoders = cpu->decoders; } static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c index 996c21eb31..05b2d01e58 100644 --- a/target/riscv/vector_internals.c +++ b/target/riscv/vector_internals.c @@ -30,6 +30,28 @@ void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, if (tot - cnt == 0) { return ; } + + if (HOST_BIG_ENDIAN) { + /* + * Deal the situation when the elements are insdie + * only one uint64 block including setting the + * masked-off element. + */ + if (((tot - 1) ^ cnt) < 8) { + memset(base + H1(tot - 1), -1, tot - cnt); + return; + } + /* + * Otherwise, at least cross two uint64_t blocks. + * Set first unaligned block. + */ + if (cnt % 8 != 0) { + uint32_t j = ROUND_UP(cnt, 8); + memset(base + H1(j - 1), -1, j - cnt); + cnt = j; + } + /* Set other 64bit aligend blocks */ + } memset(base + cnt, -1, tot - cnt); } diff --git a/tests/avocado/machine_aarch64_sbsaref.py b/tests/avocado/machine_aarch64_sbsaref.py index 98c76c1ff7..6bb82f2a03 100644 --- a/tests/avocado/machine_aarch64_sbsaref.py +++ b/tests/avocado/machine_aarch64_sbsaref.py @@ -37,18 +37,18 @@ class Aarch64SbsarefMachine(QemuSystemTest): Used components: - - Trusted Firmware 2.10.2 - - Tianocore EDK2 stable202402 - - Tianocore EDK2-platforms commit 085c2fb + - Trusted Firmware 2.11.0 + - Tianocore EDK2 stable202405 + - Tianocore EDK2-platforms commit 4bbd0ed """ # Secure BootRom (TF-A code) fs0_xz_url = ( "https://artifacts.codelinaro.org/artifactory/linaro-419-sbsa-ref/" - "20240313-116475/edk2/SBSA_FLASH0.fd.xz" + "20240528-140808/edk2/SBSA_FLASH0.fd.xz" ) - fs0_xz_hash = "637593749cc307dea7dc13265c32e5d020267552f22b18a31850b8429fc5e159" + fs0_xz_hash = "fa6004900b67172914c908b78557fec4d36a5f784f4c3dd08f49adb75e1892a9" tar_xz_path = self.fetch_asset(fs0_xz_url, asset_hash=fs0_xz_hash, algorithm='sha256') archive.extract(tar_xz_path, self.workdir) @@ -57,9 +57,9 @@ class Aarch64SbsarefMachine(QemuSystemTest): # Non-secure rom (UEFI and EFI variables) fs1_xz_url = ( "https://artifacts.codelinaro.org/artifactory/linaro-419-sbsa-ref/" - "20240313-116475/edk2/SBSA_FLASH1.fd.xz" + "20240528-140808/edk2/SBSA_FLASH1.fd.xz" ) - fs1_xz_hash = "cb0a5e8cf5e303c5d3dc106cfd5943ffe9714b86afddee7164c69ee1dd41991c" + fs1_xz_hash = "5f3747d4000bc416d9641e33ff4ac60c3cc8cb74ca51b6e932e58531c62eb6f7" tar_xz_path = self.fetch_asset(fs1_xz_url, asset_hash=fs1_xz_hash, algorithm='sha256') archive.extract(tar_xz_path, self.workdir) @@ -98,15 +98,15 @@ class Aarch64SbsarefMachine(QemuSystemTest): # AP Trusted ROM wait_for_console_pattern(self, "Booting Trusted Firmware") - wait_for_console_pattern(self, "BL1: v2.10.2(release):") + wait_for_console_pattern(self, "BL1: v2.11.0(release):") wait_for_console_pattern(self, "BL1: Booting BL2") # Trusted Boot Firmware - wait_for_console_pattern(self, "BL2: v2.10.2(release)") + wait_for_console_pattern(self, "BL2: v2.11.0(release)") wait_for_console_pattern(self, "Booting BL31") # EL3 Runtime Software - wait_for_console_pattern(self, "BL31: v2.10.2(release)") + wait_for_console_pattern(self, "BL31: v2.11.0(release)") # Non-trusted Firmware wait_for_console_pattern(self, "UEFI firmware (version 1.0") |