diff options
75 files changed, 781 insertions, 308 deletions
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c index 0f09e0ef38..3b6052fe97 100644 --- a/accel/tcg/tcg-runtime-gvec.c +++ b/accel/tcg/tcg-runtime-gvec.c @@ -1444,3 +1444,17 @@ void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) } clear_high(d, oprsz, desc); } + +void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(vec64)) { + vec64 aa = *(vec64 *)(a + i); + vec64 bb = *(vec64 *)(b + i); + vec64 cc = *(vec64 *)(c + i); + *(vec64 *)(d + i) = (bb & aa) | (cc & ~aa); + } + clear_high(d, oprsz, desc); +} diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index 6d73dc2d65..4fa61b49b4 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -303,3 +303,5 @@ DEF_HELPER_FLAGS_4(gvec_leu8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c index 76cc690579..83b920334d 100644 --- a/hw/arm/armsse.c +++ b/hw/arm/armsse.c @@ -17,7 +17,7 @@ #include "hw/sysbus.h" #include "hw/registerfields.h" #include "hw/arm/armsse.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" /* Format of the System Information block SYS_CONFIG register */ typedef enum SysConfigFormat { diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index c4b2a9a1f5..029572258f 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -13,7 +13,7 @@ #include "qemu-common.h" #include "cpu.h" #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/loader.h" #include "elf.h" #include "sysemu/qtest.h" diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 29d225ed14..415cff7a01 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -14,7 +14,7 @@ #include "qemu-common.h" #include "cpu.h" #include "exec/address-spaces.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/aspeed.h" #include "hw/arm/aspeed_soc.h" #include "hw/boards.h" diff --git a/hw/arm/boot.c b/hw/arm/boot.c index a830655e1a..7279185bd9 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -12,7 +12,7 @@ #include "qapi/error.h" #include <libfdt.h> #include "hw/hw.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/linux-boot-if.h" #include "sysemu/kvm.h" #include "sysemu/sysemu.h" diff --git a/hw/arm/collie.c b/hw/arm/collie.c index d12604c573..3db3c56004 100644 --- a/hw/arm/collie.c +++ b/hw/arm/collie.c @@ -14,7 +14,7 @@ #include "hw/sysbus.h" #include "hw/boards.h" #include "strongarm.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/block/flash.h" #include "exec/address-spaces.h" #include "cpu.h" diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c index af82e95542..e99e9cd11b 100644 --- a/hw/arm/exynos4210.c +++ b/hw/arm/exynos4210.c @@ -30,7 +30,7 @@ #include "hw/boards.h" #include "sysemu/sysemu.h" #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/loader.h" #include "hw/arm/exynos4210.h" #include "hw/sd/sdhci.h" @@ -96,6 +96,11 @@ /* EHCI */ #define EXYNOS4210_EHCI_BASE_ADDR 0x12580000 +/* DMA */ +#define EXYNOS4210_PL330_BASE0_ADDR 0x12680000 +#define EXYNOS4210_PL330_BASE1_ADDR 0x12690000 +#define EXYNOS4210_PL330_BASE2_ADDR 0x12850000 + static uint8_t chipid_and_omr[] = { 0x11, 0x02, 0x21, 0x43, 0x09, 0x00, 0x00, 0x00 }; @@ -160,9 +165,23 @@ static uint64_t exynos4210_calc_affinity(int cpu) return (0x9 << ARM_AFF1_SHIFT) | cpu; } -Exynos4210State *exynos4210_init(MemoryRegion *system_mem) +static void pl330_create(uint32_t base, qemu_irq irq, int nreq) +{ + SysBusDevice *busdev; + DeviceState *dev; + + dev = qdev_create(NULL, "pl330"); + qdev_prop_set_uint8(dev, "num_periph_req", nreq); + qdev_init_nofail(dev); + busdev = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(busdev, 0, base); + sysbus_connect_irq(busdev, 0, irq); +} + +static void exynos4210_realize(DeviceState *socdev, Error **errp) { - Exynos4210State *s = g_new0(Exynos4210State, 1); + Exynos4210State *s = EXYNOS4210_SOC(socdev); + MemoryRegion *system_mem = get_system_memory(); qemu_irq gate_irq[EXYNOS4210_NCPUS][EXYNOS4210_IRQ_GATE_NINPUTS]; SysBusDevice *busdev; DeviceState *dev; @@ -410,5 +429,32 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem) sysbus_create_simple(TYPE_EXYNOS4210_EHCI, EXYNOS4210_EHCI_BASE_ADDR, s->irq_table[exynos4210_get_irq(28, 3)]); - return s; + /*** DMA controllers ***/ + pl330_create(EXYNOS4210_PL330_BASE0_ADDR, + qemu_irq_invert(s->irq_table[exynos4210_get_irq(35, 1)]), 32); + pl330_create(EXYNOS4210_PL330_BASE1_ADDR, + qemu_irq_invert(s->irq_table[exynos4210_get_irq(36, 1)]), 32); + pl330_create(EXYNOS4210_PL330_BASE2_ADDR, + qemu_irq_invert(s->irq_table[exynos4210_get_irq(34, 1)]), 1); } + +static void exynos4210_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = exynos4210_realize; +} + +static const TypeInfo exynos4210_info = { + .name = TYPE_EXYNOS4210_SOC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Exynos4210State), + .class_init = exynos4210_class_init, +}; + +static void exynos4210_register_types(void) +{ + type_register_static(&exynos4210_info); +} + +type_init(exynos4210_register_types) diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c index ea8100f65a..71f58586c1 100644 --- a/hw/arm/exynos4_boards.c +++ b/hw/arm/exynos4_boards.c @@ -22,6 +22,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu-common.h" @@ -29,26 +30,12 @@ #include "sysemu/sysemu.h" #include "hw/sysbus.h" #include "net/net.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "exec/address-spaces.h" #include "hw/arm/exynos4210.h" #include "hw/net/lan9118.h" #include "hw/boards.h" -#undef DEBUG - -//#define DEBUG - -#ifdef DEBUG - #undef PRINT_DEBUG - #define PRINT_DEBUG(fmt, args...) \ - do { \ - fprintf(stderr, " [%s:%d] "fmt, __func__, __LINE__, ##args); \ - } while (0) -#else - #define PRINT_DEBUG(fmt, args...) do {} while (0) -#endif - #define SMDK_LAN9118_BASE_ADDR 0x05000000 typedef enum Exynos4BoardType { @@ -58,7 +45,7 @@ typedef enum Exynos4BoardType { } Exynos4BoardType; typedef struct Exynos4BoardState { - Exynos4210State *soc; + Exynos4210State soc; MemoryRegion dram0_mem; MemoryRegion dram1_mem; } Exynos4BoardState; @@ -74,8 +61,8 @@ static int exynos4_board_smp_bootreg_addr[EXYNOS4_NUM_OF_BOARDS] = { }; static unsigned long exynos4_board_ram_size[EXYNOS4_NUM_OF_BOARDS] = { - [EXYNOS4_BOARD_NURI] = 0x40000000, - [EXYNOS4_BOARD_SMDKC210] = 0x40000000, + [EXYNOS4_BOARD_NURI] = 1 * GiB, + [EXYNOS4_BOARD_SMDKC210] = 1 * GiB, }; static struct arm_boot_info exynos4_board_binfo = { @@ -140,20 +127,13 @@ exynos4_boards_init_common(MachineState *machine, exynos4_board_binfo.gic_cpu_if_addr = EXYNOS4210_SMP_PRIVATE_BASE_ADDR + 0x100; - PRINT_DEBUG("\n ram_size: %luMiB [0x%08lx]\n" - " kernel_filename: %s\n" - " kernel_cmdline: %s\n" - " initrd_filename: %s\n", - exynos4_board_ram_size[board_type] / 1048576, - exynos4_board_ram_size[board_type], - machine->kernel_filename, - machine->kernel_cmdline, - machine->initrd_filename); - exynos4_boards_init_ram(s, get_system_memory(), exynos4_board_ram_size[board_type]); - s->soc = exynos4210_init(get_system_memory()); + object_initialize(&s->soc, sizeof(s->soc), TYPE_EXYNOS4210_SOC); + qdev_set_parent_bus(DEVICE(&s->soc), sysbus_get_default()); + object_property_set_bool(OBJECT(&s->soc), true, "realized", + &error_fatal); return s; } @@ -171,7 +151,7 @@ static void smdkc210_init(MachineState *machine) EXYNOS4_BOARD_SMDKC210); lan9215_init(SMDK_LAN9118_BASE_ADDR, - qemu_irq_invert(s->soc->irq_table[exynos4210_get_irq(37, 1)])); + qemu_irq_invert(s->soc.irq_table[exynos4210_get_irq(37, 1)])); arm_load_kernel(ARM_CPU(first_cpu), &exynos4_board_binfo); } diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c index 96ccf18d86..a89a1d3a7c 100644 --- a/hw/arm/highbank.c +++ b/hw/arm/highbank.c @@ -20,7 +20,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/loader.h" #include "net/net.h" #include "sysemu/kvm.h" diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c index 0b6f24465e..d18caab8bd 100644 --- a/hw/arm/integratorcp.c +++ b/hw/arm/integratorcp.c @@ -13,7 +13,7 @@ #include "cpu.h" #include "hw/sysbus.h" #include "hw/boards.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/misc/arm_integrator_debug.h" #include "hw/net/smc91c111.h" #include "net/net.h" diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c index c1cec59037..cd1f904c6c 100644 --- a/hw/arm/mainstone.c +++ b/hw/arm/mainstone.c @@ -16,7 +16,7 @@ #include "qapi/error.h" #include "hw/hw.h" #include "hw/arm/pxa.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "net/net.h" #include "hw/net/smc91c111.h" #include "hw/boards.h" diff --git a/hw/arm/microbit.c b/hw/arm/microbit.c index da67bf6d9d..e9a891f7d3 100644 --- a/hw/arm/microbit.c +++ b/hw/arm/microbit.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/boards.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "sysemu/sysemu.h" #include "exec/address-spaces.h" diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c index 7832408bb7..c167a5fa59 100644 --- a/hw/arm/mps2-tz.c +++ b/hw/arm/mps2-tz.c @@ -40,7 +40,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/armv7m.h" #include "hw/or-irq.h" #include "hw/boards.h" diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c index 54b7395849..b74f1378c9 100644 --- a/hw/arm/mps2.c +++ b/hw/arm/mps2.c @@ -25,7 +25,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/armv7m.h" #include "hw/or-irq.h" #include "hw/boards.h" diff --git a/hw/arm/msf2-soc.c b/hw/arm/msf2-soc.c index 2702e90b45..d700b212f8 100644 --- a/hw/arm/msf2-soc.c +++ b/hw/arm/msf2-soc.c @@ -26,7 +26,6 @@ #include "qemu/units.h" #include "qapi/error.h" #include "qemu-common.h" -#include "hw/arm/arm.h" #include "exec/address-spaces.h" #include "hw/char/serial.h" #include "hw/boards.h" diff --git a/hw/arm/msf2-som.c b/hw/arm/msf2-som.c index 2432b5e935..8c550a8bdd 100644 --- a/hw/arm/msf2-som.c +++ b/hw/arm/msf2-som.c @@ -27,7 +27,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "hw/boards.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "exec/address-spaces.h" #include "hw/arm/msf2-soc.h" #include "cpu.h" diff --git a/hw/arm/musca.c b/hw/arm/musca.c index 23aff43f4b..825d80e75a 100644 --- a/hw/arm/musca.c +++ b/hw/arm/musca.c @@ -24,7 +24,7 @@ #include "qapi/error.h" #include "exec/address-spaces.h" #include "sysemu/sysemu.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/armsse.h" #include "hw/boards.h" #include "hw/char/pl011.h" diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c index 93ec3c5698..5645997b56 100644 --- a/hw/arm/musicpal.c +++ b/hw/arm/musicpal.c @@ -14,7 +14,7 @@ #include "qemu-common.h" #include "cpu.h" #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "net/net.h" #include "sysemu/sysemu.h" #include "hw/boards.h" diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c index f936017d4a..f57fc38f92 100644 --- a/hw/arm/netduino2.c +++ b/hw/arm/netduino2.c @@ -27,7 +27,7 @@ #include "hw/boards.h" #include "qemu/error-report.h" #include "hw/arm/stm32f205_soc.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" static void netduino2_init(MachineState *machine) { diff --git a/hw/arm/nrf51_soc.c b/hw/arm/nrf51_soc.c index 3e633d160e..ce618edc7b 100644 --- a/hw/arm/nrf51_soc.c +++ b/hw/arm/nrf51_soc.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/sysbus.h" #include "hw/boards.h" #include "hw/misc/unimp.h" diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c index 303f7a31e1..4a79f5c88b 100644 --- a/hw/arm/nseries.c +++ b/hw/arm/nseries.c @@ -25,7 +25,7 @@ #include "qemu/bswap.h" #include "sysemu/sysemu.h" #include "hw/arm/omap.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/irq.h" #include "ui/console.h" #include "hw/boards.h" diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c index 539d29ef9c..28fbe275a8 100644 --- a/hw/arm/omap1.c +++ b/hw/arm/omap1.c @@ -24,7 +24,7 @@ #include "cpu.h" #include "hw/boards.h" #include "hw/hw.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/omap.h" #include "sysemu/sysemu.h" #include "hw/arm/soc_dma.h" diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c index 446223906e..23e72db79e 100644 --- a/hw/arm/omap2.c +++ b/hw/arm/omap2.c @@ -26,7 +26,7 @@ #include "sysemu/qtest.h" #include "hw/boards.h" #include "hw/hw.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/omap.h" #include "sysemu/sysemu.h" #include "qemu/timer.h" diff --git a/hw/arm/omap_sx1.c b/hw/arm/omap_sx1.c index 95a4fe7e7f..cae78d0a36 100644 --- a/hw/arm/omap_sx1.c +++ b/hw/arm/omap_sx1.c @@ -31,7 +31,7 @@ #include "ui/console.h" #include "hw/arm/omap.h" #include "hw/boards.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/block/flash.h" #include "sysemu/qtest.h" #include "exec/address-spaces.h" diff --git a/hw/arm/palm.c b/hw/arm/palm.c index 139d27d1cc..9eb9612bce 100644 --- a/hw/arm/palm.c +++ b/hw/arm/palm.c @@ -25,7 +25,7 @@ #include "ui/console.h" #include "hw/arm/omap.h" #include "hw/boards.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/input/tsc2xxx.h" #include "hw/loader.h" #include "exec/address-spaces.h" diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c index fe2bb511b9..2b5fe10e2f 100644 --- a/hw/arm/raspi.c +++ b/hw/arm/raspi.c @@ -20,7 +20,7 @@ #include "qemu/error-report.h" #include "hw/boards.h" #include "hw/loader.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "sysemu/sysemu.h" #define SMPBOOT_ADDR 0x300 /* this should leave enough space for ATAGS */ diff --git a/hw/arm/realview.c b/hw/arm/realview.c index 05a244df25..d42a76e7a1 100644 --- a/hw/arm/realview.c +++ b/hw/arm/realview.c @@ -12,7 +12,7 @@ #include "qemu-common.h" #include "cpu.h" #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/primecell.h" #include "hw/net/lan9118.h" #include "hw/net/smc91c111.h" diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c index 22f5958b9d..723cf5d592 100644 --- a/hw/arm/spitz.c +++ b/hw/arm/spitz.c @@ -14,7 +14,7 @@ #include "qapi/error.h" #include "hw/hw.h" #include "hw/arm/pxa.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "sysemu/sysemu.h" #include "hw/pcmcia.h" #include "hw/i2c/i2c.h" diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index 5059aedbaa..499035f5c8 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -11,7 +11,7 @@ #include "qapi/error.h" #include "hw/sysbus.h" #include "hw/ssi/ssi.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "qemu/timer.h" #include "hw/i2c/i2c.h" #include "net/net.h" diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c index 980e5af13c..a5b6f7bda2 100644 --- a/hw/arm/stm32f205_soc.c +++ b/hw/arm/stm32f205_soc.c @@ -25,7 +25,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "exec/address-spaces.h" #include "hw/arm/stm32f205_soc.h" diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c index 644a9c45b4..a1ecbddaab 100644 --- a/hw/arm/strongarm.c +++ b/hw/arm/strongarm.c @@ -33,7 +33,7 @@ #include "hw/sysbus.h" #include "strongarm.h" #include "qemu/error-report.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "chardev/char-fe.h" #include "chardev/char-serial.h" #include "sysemu/sysemu.h" diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c index 9a1247797f..7843d68d46 100644 --- a/hw/arm/tosa.c +++ b/hw/arm/tosa.c @@ -15,7 +15,7 @@ #include "qapi/error.h" #include "hw/hw.h" #include "hw/arm/pxa.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/sharpsl.h" #include "hw/pcmcia.h" #include "hw/boards.h" diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c index 25166e1517..f471fb7025 100644 --- a/hw/arm/versatilepb.c +++ b/hw/arm/versatilepb.c @@ -12,7 +12,7 @@ #include "qemu-common.h" #include "cpu.h" #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/net/smc91c111.h" #include "net/net.h" #include "sysemu/sysemu.h" diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c index d8634f3dd2..2b3b0c2334 100644 --- a/hw/arm/vexpress.c +++ b/hw/arm/vexpress.c @@ -26,7 +26,7 @@ #include "qemu-common.h" #include "cpu.h" #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/primecell.h" #include "hw/net/lan9118.h" #include "hw/i2c/i2c.h" diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 5331ab71e2..bf54f10b51 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -33,7 +33,7 @@ #include "qemu/option.h" #include "qapi/error.h" #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/arm/primecell.h" #include "hw/arm/virt.h" #include "hw/block/flash.h" diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c index b3b8215759..198e3f9763 100644 --- a/hw/arm/xilinx_zynq.c +++ b/hw/arm/xilinx_zynq.c @@ -20,7 +20,7 @@ #include "qemu-common.h" #include "cpu.h" #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "net/net.h" #include "exec/address-spaces.h" #include "sysemu/sysemu.h" diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c index 5ee58c09be..e8e4278eb3 100644 --- a/hw/arm/xlnx-versal.c +++ b/hw/arm/xlnx-versal.c @@ -17,7 +17,7 @@ #include "net/net.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "kvm_arm.h" #include "hw/misc/unimp.h" #include "hw/intc/arm_gicv3_common.h" diff --git a/hw/arm/z2.c b/hw/arm/z2.c index 1f906ef20b..44aa748d39 100644 --- a/hw/arm/z2.c +++ b/hw/arm/z2.c @@ -14,7 +14,7 @@ #include "qemu/osdep.h" #include "hw/hw.h" #include "hw/arm/pxa.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/i2c/i2c.h" #include "hw/ssi/ssi.h" #include "hw/boards.h" diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index cbad6037f1..3b212d91c8 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -1856,7 +1856,7 @@ static void icc_ctlr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri, trace_gicv3_icc_ctlr_el3_write(gicv3_redist_affid(cs), value); /* *_EL1NS and *_EL1S bits are aliases into the ICC_CTLR_EL1 bits. */ - cs->icc_ctlr_el1[GICV3_NS] &= (ICC_CTLR_EL1_CBPR | ICC_CTLR_EL1_EOIMODE); + cs->icc_ctlr_el1[GICV3_NS] &= ~(ICC_CTLR_EL1_CBPR | ICC_CTLR_EL1_EOIMODE); if (value & ICC_CTLR_EL3_EOIMODE_EL1NS) { cs->icc_ctlr_el1[GICV3_NS] |= ICC_CTLR_EL1_EOIMODE; } @@ -1864,7 +1864,7 @@ static void icc_ctlr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri, cs->icc_ctlr_el1[GICV3_NS] |= ICC_CTLR_EL1_CBPR; } - cs->icc_ctlr_el1[GICV3_S] &= (ICC_CTLR_EL1_CBPR | ICC_CTLR_EL1_EOIMODE); + cs->icc_ctlr_el1[GICV3_S] &= ~(ICC_CTLR_EL1_CBPR | ICC_CTLR_EL1_EOIMODE); if (value & ICC_CTLR_EL3_EOIMODE_EL1S) { cs->icc_ctlr_el1[GICV3_S] |= ICC_CTLR_EL1_EOIMODE; } @@ -2366,7 +2366,7 @@ static void ich_vmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Enforce "writing BPRs to less than minimum sets them to the minimum" * by reading and writing back the fields. */ - write_vbpr(cs, GICV3_G1, read_vbpr(cs, GICV3_G0)); + write_vbpr(cs, GICV3_G0, read_vbpr(cs, GICV3_G0)); write_vbpr(cs, GICV3_G1, read_vbpr(cs, GICV3_G1)); gicv3_cpuif_virt_update(cs); diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 3a346a682a..815e720cfa 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -16,7 +16,6 @@ #include "cpu.h" #include "hw/sysbus.h" #include "qemu/timer.h" -#include "hw/arm/arm.h" #include "hw/intc/armv7m_nvic.h" #include "target/arm/cpu.h" #include "exec/exec-all.h" diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h index 389e128d0f..6305b9c586 100644 --- a/include/hw/arm/allwinner-a10.h +++ b/include/hw/arm/allwinner-a10.h @@ -3,7 +3,7 @@ #include "qemu-common.h" #include "qemu/error-report.h" #include "hw/char/serial.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/timer/allwinner-a10-pit.h" #include "hw/intc/allwinner-a10-pic.h" #include "hw/net/allwinner_emac.h" diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h index 11ec0179db..836b2ba8bf 100644 --- a/include/hw/arm/aspeed_soc.h +++ b/include/hw/arm/aspeed_soc.h @@ -12,7 +12,6 @@ #ifndef ASPEED_SOC_H #define ASPEED_SOC_H -#include "hw/arm/arm.h" #include "hw/intc/aspeed_vic.h" #include "hw/misc/aspeed_scu.h" #include "hw/misc/aspeed_sdmc.h" diff --git a/include/hw/arm/bcm2836.h b/include/hw/arm/bcm2836.h index 93248399ba..a2cb8454de 100644 --- a/include/hw/arm/bcm2836.h +++ b/include/hw/arm/bcm2836.h @@ -11,7 +11,6 @@ #ifndef BCM2836_H #define BCM2836_H -#include "hw/arm/arm.h" #include "hw/arm/bcm2835_peripherals.h" #include "hw/intc/bcm2836_control.h" diff --git a/include/hw/arm/arm.h b/include/hw/arm/boot.h index ffed39252d..c48cc4c2bc 100644 --- a/include/hw/arm/arm.h +++ b/include/hw/arm/boot.h @@ -1,5 +1,5 @@ /* - * Misc ARM declarations + * ARM kernel loader. * * Copyright (c) 2006 CodeSourcery. * Written by Paul Brook @@ -8,8 +8,8 @@ * */ -#ifndef HW_ARM_H -#define HW_ARM_H +#ifndef HW_ARM_BOOT_H +#define HW_ARM_BOOT_H #include "exec/memory.h" #include "target/arm/cpu-qom.h" @@ -167,8 +167,4 @@ void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu, const struct arm_boot_info *info, hwaddr mvbar_addr); -/* Multiplication factor to convert from system clock ticks to qemu timer - ticks. */ -extern int system_clock_scale; - -#endif /* HW_ARM_H */ +#endif /* HW_ARM_BOOT_H */ diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h index 098a69ec73..27c684e851 100644 --- a/include/hw/arm/exynos4210.h +++ b/include/hw/arm/exynos4210.h @@ -85,6 +85,9 @@ typedef struct Exynos4210Irq { } Exynos4210Irq; typedef struct Exynos4210State { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ ARMCPU *cpu[EXYNOS4210_NCPUS]; Exynos4210Irq irqs; qemu_irq *irq_table; @@ -98,11 +101,13 @@ typedef struct Exynos4210State { I2CBus *i2c_if[EXYNOS4210_I2C_NUMBER]; } Exynos4210State; +#define TYPE_EXYNOS4210_SOC "exynos4210" +#define EXYNOS4210_SOC(obj) \ + OBJECT_CHECK(Exynos4210State, obj, TYPE_EXYNOS4210_SOC) + void exynos4210_write_secondary(ARMCPU *cpu, const struct arm_boot_info *info); -Exynos4210State *exynos4210_init(MemoryRegion *system_mem); - /* Initialize exynos4210 IRQ subsystem stub */ qemu_irq *exynos4210_init_irq(Exynos4210Irq *env); diff --git a/include/hw/arm/fsl-imx25.h b/include/hw/arm/fsl-imx25.h index 65a73714ef..3280ab1fb0 100644 --- a/include/hw/arm/fsl-imx25.h +++ b/include/hw/arm/fsl-imx25.h @@ -17,7 +17,7 @@ #ifndef FSL_IMX25_H #define FSL_IMX25_H -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/intc/imx_avic.h" #include "hw/misc/imx25_ccm.h" #include "hw/char/imx_serial.h" diff --git a/include/hw/arm/fsl-imx31.h b/include/hw/arm/fsl-imx31.h index d408abbba0..e68a81efd7 100644 --- a/include/hw/arm/fsl-imx31.h +++ b/include/hw/arm/fsl-imx31.h @@ -17,7 +17,7 @@ #ifndef FSL_IMX31_H #define FSL_IMX31_H -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/intc/imx_avic.h" #include "hw/misc/imx31_ccm.h" #include "hw/char/imx_serial.h" diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h index 06f8aaeda4..1265a55c3b 100644 --- a/include/hw/arm/fsl-imx6.h +++ b/include/hw/arm/fsl-imx6.h @@ -17,7 +17,7 @@ #ifndef FSL_IMX6_H #define FSL_IMX6_H -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/cpu/a9mpcore.h" #include "hw/misc/imx6_ccm.h" #include "hw/misc/imx6_src.h" diff --git a/include/hw/arm/fsl-imx6ul.h b/include/hw/arm/fsl-imx6ul.h index 5897217194..9e94e98f8e 100644 --- a/include/hw/arm/fsl-imx6ul.h +++ b/include/hw/arm/fsl-imx6ul.h @@ -17,7 +17,7 @@ #ifndef FSL_IMX6UL_H #define FSL_IMX6UL_H -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/cpu/a15mpcore.h" #include "hw/misc/imx6ul_ccm.h" #include "hw/misc/imx6_src.h" diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h index d848262bfd..4101f80251 100644 --- a/include/hw/arm/fsl-imx7.h +++ b/include/hw/arm/fsl-imx7.h @@ -19,7 +19,7 @@ #ifndef FSL_IMX7_H #define FSL_IMX7_H -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/cpu/a15mpcore.h" #include "hw/intc/imx_gpcv2.h" #include "hw/misc/imx7_ccm.h" diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 424070924e..73005f05ae 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -34,7 +34,7 @@ #include "exec/hwaddr.h" #include "qemu/notify.h" #include "hw/boards.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/block/flash.h" #include "sysemu/kvm.h" #include "hw/intc/arm_gicv3_common.h" diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h index ec7c859d08..14405c1465 100644 --- a/include/hw/arm/xlnx-versal.h +++ b/include/hw/arm/xlnx-versal.h @@ -13,7 +13,7 @@ #define XLNX_VERSAL_H #include "hw/sysbus.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/intc/arm_gicv3.h" #define TYPE_XLNX_VERSAL "xlnx-versal" diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h index 591515c760..cd90b04310 100644 --- a/include/hw/arm/xlnx-zynqmp.h +++ b/include/hw/arm/xlnx-zynqmp.h @@ -18,7 +18,7 @@ #ifndef XLNX_ZYNQMP_H #include "qemu-common.h" -#include "hw/arm/arm.h" +#include "hw/arm/boot.h" #include "hw/intc/arm_gic.h" #include "hw/net/cadence_gem.h" #include "hw/char/cadence_uart.h" diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h index cca04defd8..25e5ceacc8 100644 --- a/include/hw/timer/armv7m_systick.h +++ b/include/hw/timer/armv7m_systick.h @@ -31,4 +31,26 @@ typedef struct SysTickState { qemu_irq irq; } SysTickState; +/* + * Multiplication factor to convert from system clock ticks to qemu timer + * ticks. This should be set (by board code, usually) to a value + * equal to NANOSECONDS_PER_SECOND / frq, where frq is the clock frequency + * in Hz of the CPU. + * + * This value is used by the systick device when it is running in + * its "use the CPU clock" mode (ie when SYST_CSR.CLKSOURCE == 1) to + * set how fast the timer should tick. + * + * TODO: we should refactor this so that rather than using a global + * we use a device property or something similar. This is complicated + * because (a) the property would need to be plumbed through from the + * board code down through various layers to the systick device + * and (b) the property needs to be modifiable after realize, because + * the stellaris board uses this to implement the behaviour where the + * guest can reprogram the PLL registers to downclock the CPU, and the + * systick device needs to react accordingly. Possibly this should + * be deferred until we have a good API for modelling clock trees. + */ +extern int system_clock_scale; + #endif diff --git a/target/arm/arm-semi.c b/target/arm/arm-semi.c index 8b5fd7bc6e..ddb94e0aba 100644 --- a/target/arm/arm-semi.c +++ b/target/arm/arm-semi.c @@ -29,7 +29,6 @@ #else #include "qemu-common.h" #include "exec/gdbstub.h" -#include "hw/arm/arm.h" #include "qemu/cutils.h" #endif diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 8eee1d8c59..9b23ac2c93 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -30,7 +30,6 @@ #if !defined(CONFIG_USER_ONLY) #include "hw/loader.h" #endif -#include "hw/arm/arm.h" #include "sysemu/sysemu.h" #include "sysemu/hw_accel.h" #include "kvm_arm.h" diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 835f73cceb..0ec8cd41f1 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -25,7 +25,6 @@ #if !defined(CONFIG_USER_ONLY) #include "hw/loader.h" #endif -#include "hw/arm/arm.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" #include "kvm_arm.h" diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 5995634612..fe4f461d4e 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -23,7 +23,6 @@ #include "cpu.h" #include "trace.h" #include "internals.h" -#include "hw/arm/arm.h" #include "hw/pci/pci.h" #include "exec/memattrs.h" #include "exec/address-spaces.h" diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index 327375f625..4e54e372a6 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -20,7 +20,6 @@ #include "sysemu/kvm.h" #include "kvm_arm.h" #include "internals.h" -#include "hw/arm/arm.h" #include "qemu/log.h" static inline void set_feature(uint64_t *features, int feature) diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index e3ba149248..998d21f399 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -26,7 +26,6 @@ #include "sysemu/kvm.h" #include "kvm_arm.h" #include "internals.h" -#include "hw/arm/arm.h" static bool have_guest_debug; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index b7c5a928b4..42999c5801 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -4043,8 +4043,8 @@ static void disas_bitfield(DisasContext *s, uint32_t insn) tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); return; } - /* opc == 1, BXFIL fall through to deposit */ - tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len); + /* opc == 1, BFXIL fall through to deposit */ + tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); pos = 0; } else { /* Handle the ri > si case with a deposit @@ -4062,7 +4062,7 @@ static void disas_bitfield(DisasContext *s, uint32_t insn) len = ri; } - if (opc == 1) { /* BFM, BXFIL */ + if (opc == 1) { /* BFM, BFXIL */ tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); } else { /* SBFM or UBFM: We start with zero, and we haven't modified @@ -4114,25 +4114,27 @@ static void disas_extract(DisasContext *s, uint32_t insn) } else { tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm)); } - } else if (rm == rn) { /* ROR */ + } else { tcg_rm = cpu_reg(s, rm); + tcg_rn = cpu_reg(s, rn); + if (sf) { - tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm); + /* Specialization to ROR happens in EXTRACT2. */ + tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm); } else { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(tmp, tcg_rm); - tcg_gen_rotri_i32(tmp, tmp, imm); - tcg_gen_extu_i32_i64(tcg_rd, tmp); - tcg_temp_free_i32(tmp); - } - } else { - tcg_rm = read_cpu_reg(s, rm, sf); - tcg_rn = read_cpu_reg(s, rn, sf); - tcg_gen_shri_i64(tcg_rm, tcg_rm, imm); - tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm); - tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn); - if (!sf) { - tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + TCGv_i32 t0 = tcg_temp_new_i32(); + + tcg_gen_extrl_i64_i32(t0, tcg_rm); + if (rm == rn) { + tcg_gen_rotri_i32(t0, t0, imm); + } else { + TCGv_i32 t1 = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t1, tcg_rn); + tcg_gen_extract2_i32(t0, t0, t1, imm); + tcg_temp_free_i32(t1); + } + tcg_gen_extu_i32_i64(tcg_rd, t0); + tcg_temp_free_i32(t0); } } } diff --git a/target/arm/translate.c b/target/arm/translate.c index dd053c80d6..298c262825 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -6598,13 +6598,13 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), rn_ofs, rm_ofs, vec_size, vec_size, (u ? uqadd_op : sqadd_op) + size); - break; + return 0; case NEON_3R_VQSUB: tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), rn_ofs, rm_ofs, vec_size, vec_size, (u ? uqsub_op : sqsub_op) + size); - break; + return 0; case NEON_3R_VMUL: /* VMUL */ if (u) { diff --git a/tcg/README b/tcg/README index cbdfd3b6bc..21fcdf737f 100644 --- a/tcg/README +++ b/tcg/README @@ -627,6 +627,17 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. Compare vectors by element, storing -1 for true and 0 for false. +* bitsel_vec v0, v1, v2, v3 + + Bitwise select, v0 = (v2 & v1) | (v3 & ~v1), across the entire vector. + +* cmpsel_vec v0, c1, c2, v3, v4, cond + + Select elements based on comparison results: + for (i = 0; i < n; ++i) { + v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i]. + } + ********* Note 1: Some shortcuts are defined when the last operand is known to be diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index e43554c3c7..ca214f6909 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -140,6 +140,8 @@ typedef enum { #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 +#define TCG_TARGET_HAS_bitsel_vec 1 +#define TCG_TARGET_HAS_cmpsel_vec 0 #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 40bf35079a..9e1dad9696 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -119,6 +119,8 @@ static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, #define TCG_CT_CONST_LIMM 0x200 #define TCG_CT_CONST_ZERO 0x400 #define TCG_CT_CONST_MONE 0x800 +#define TCG_CT_CONST_ORRI 0x1000 +#define TCG_CT_CONST_ANDI 0x2000 /* parse target specific constraints */ static const char *target_parse_constraint(TCGArgConstraint *ct, @@ -154,6 +156,12 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, case 'M': /* minus one */ ct->ct |= TCG_CT_CONST_MONE; break; + case 'O': /* vector orr/bic immediate */ + ct->ct |= TCG_CT_CONST_ORRI; + break; + case 'N': /* vector orr/bic immediate, inverted */ + ct->ct |= TCG_CT_CONST_ANDI; + break; case 'Z': /* zero */ ct->ct |= TCG_CT_CONST_ZERO; break; @@ -190,104 +198,117 @@ static inline bool is_limm(uint64_t val) return (val & (val - 1)) == 0; } -/* Match a constant that is valid for vectors. */ -static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8) +/* Return true if v16 is a valid 16-bit shifted immediate. */ +static bool is_shimm16(uint16_t v16, int *cmode, int *imm8) { - int i; + if (v16 == (v16 & 0xff)) { + *cmode = 0x8; + *imm8 = v16 & 0xff; + return true; + } else if (v16 == (v16 & 0xff00)) { + *cmode = 0xa; + *imm8 = v16 >> 8; + return true; + } + return false; +} - *op = 0; - /* Match replication across 8 bits. */ - if (v64 == dup_const(MO_8, v64)) { - *cmode = 0xe; - *imm8 = v64 & 0xff; +/* Return true if v32 is a valid 32-bit shifted immediate. */ +static bool is_shimm32(uint32_t v32, int *cmode, int *imm8) +{ + if (v32 == (v32 & 0xff)) { + *cmode = 0x0; + *imm8 = v32 & 0xff; + return true; + } else if (v32 == (v32 & 0xff00)) { + *cmode = 0x2; + *imm8 = (v32 >> 8) & 0xff; + return true; + } else if (v32 == (v32 & 0xff0000)) { + *cmode = 0x4; + *imm8 = (v32 >> 16) & 0xff; + return true; + } else if (v32 == (v32 & 0xff000000)) { + *cmode = 0x6; + *imm8 = v32 >> 24; return true; } - /* Match replication across 16 bits. */ - if (v64 == dup_const(MO_16, v64)) { - uint16_t v16 = v64; + return false; +} - if (v16 == (v16 & 0xff)) { - *cmode = 0x8; - *imm8 = v16 & 0xff; - return true; - } else if (v16 == (v16 & 0xff00)) { - *cmode = 0xa; - *imm8 = v16 >> 8; - return true; - } +/* Return true if v32 is a valid 32-bit shifting ones immediate. */ +static bool is_soimm32(uint32_t v32, int *cmode, int *imm8) +{ + if ((v32 & 0xffff00ff) == 0xff) { + *cmode = 0xc; + *imm8 = (v32 >> 8) & 0xff; + return true; + } else if ((v32 & 0xff00ffff) == 0xffff) { + *cmode = 0xd; + *imm8 = (v32 >> 16) & 0xff; + return true; } - /* Match replication across 32 bits. */ - if (v64 == dup_const(MO_32, v64)) { - uint32_t v32 = v64; + return false; +} - if (v32 == (v32 & 0xff)) { - *cmode = 0x0; - *imm8 = v32 & 0xff; - return true; - } else if (v32 == (v32 & 0xff00)) { - *cmode = 0x2; - *imm8 = (v32 >> 8) & 0xff; - return true; - } else if (v32 == (v32 & 0xff0000)) { - *cmode = 0x4; - *imm8 = (v32 >> 16) & 0xff; - return true; - } else if (v32 == (v32 & 0xff000000)) { - *cmode = 0x6; - *imm8 = v32 >> 24; - return true; - } else if ((v32 & 0xffff00ff) == 0xff) { - *cmode = 0xc; - *imm8 = (v32 >> 8) & 0xff; - return true; - } else if ((v32 & 0xff00ffff) == 0xffff) { - *cmode = 0xd; - *imm8 = (v32 >> 16) & 0xff; - return true; - } - /* Match forms of a float32. */ - if (extract32(v32, 0, 19) == 0 - && (extract32(v32, 25, 6) == 0x20 - || extract32(v32, 25, 6) == 0x1f)) { - *cmode = 0xf; - *imm8 = (extract32(v32, 31, 1) << 7) - | (extract32(v32, 25, 1) << 6) - | extract32(v32, 19, 6); - return true; - } +/* Return true if v32 is a valid float32 immediate. */ +static bool is_fimm32(uint32_t v32, int *cmode, int *imm8) +{ + if (extract32(v32, 0, 19) == 0 + && (extract32(v32, 25, 6) == 0x20 + || extract32(v32, 25, 6) == 0x1f)) { + *cmode = 0xf; + *imm8 = (extract32(v32, 31, 1) << 7) + | (extract32(v32, 25, 1) << 6) + | extract32(v32, 19, 6); + return true; } - /* Match forms of a float64. */ + return false; +} + +/* Return true if v64 is a valid float64 immediate. */ +static bool is_fimm64(uint64_t v64, int *cmode, int *imm8) +{ if (extract64(v64, 0, 48) == 0 && (extract64(v64, 54, 9) == 0x100 || extract64(v64, 54, 9) == 0x0ff)) { *cmode = 0xf; - *op = 1; *imm8 = (extract64(v64, 63, 1) << 7) | (extract64(v64, 54, 1) << 6) | extract64(v64, 48, 6); return true; } - /* Match bytes of 0x00 and 0xff. */ - for (i = 0; i < 64; i += 8) { - uint64_t byte = extract64(v64, i, 8); - if (byte != 0 && byte != 0xff) { + return false; +} + +/* + * Return non-zero if v32 can be formed by MOVI+ORR. + * Place the parameters for MOVI in (cmode, imm8). + * Return the cmode for ORR; the imm8 can be had via extraction from v32. + */ +static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8) +{ + int i; + + for (i = 6; i > 0; i -= 2) { + /* Mask out one byte we can add with ORR. */ + uint32_t tmp = v32 & ~(0xffu << (i * 4)); + if (is_shimm32(tmp, cmode, imm8) || + is_soimm32(tmp, cmode, imm8)) { break; } } - if (i == 64) { - *cmode = 0xe; - *op = 1; - *imm8 = (extract64(v64, 0, 1) << 0) - | (extract64(v64, 8, 1) << 1) - | (extract64(v64, 16, 1) << 2) - | (extract64(v64, 24, 1) << 3) - | (extract64(v64, 32, 1) << 4) - | (extract64(v64, 40, 1) << 5) - | (extract64(v64, 48, 1) << 6) - | (extract64(v64, 56, 1) << 7); - return true; + return i; +} + +/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */ +static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) +{ + if (v32 == deposit32(v32, 16, 16, v32)) { + return is_shimm16(v32, cmode, imm8); + } else { + return is_shimm32(v32, cmode, imm8); } - return false; } static int tcg_target_const_match(tcg_target_long val, TCGType type, @@ -314,6 +335,23 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, return 1; } + switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) { + case 0: + break; + case TCG_CT_CONST_ANDI: + val = ~val; + /* fallthru */ + case TCG_CT_CONST_ORRI: + if (val == deposit64(val, 32, 32, val)) { + int cmode, imm8; + return is_shimm1632(val, &cmode, &imm8); + } + break; + default: + /* Both bits should not be set for the same insn. */ + g_assert_not_reached(); + } + return 0; } @@ -511,6 +549,9 @@ typedef enum { /* AdvSIMD modified immediate */ I3606_MOVI = 0x0f000400, + I3606_MVNI = 0x2f000400, + I3606_BIC = 0x2f001400, + I3606_ORR = 0x0f001400, /* AdvSIMD shift by immediate */ I3614_SSHR = 0x0f000400, @@ -523,6 +564,9 @@ typedef enum { I3616_ADD = 0x0e208400, I3616_AND = 0x0e201c00, I3616_BIC = 0x0e601c00, + I3616_BIF = 0x2ee01c00, + I3616_BIT = 0x2ea01c00, + I3616_BSL = 0x2e601c00, I3616_EOR = 0x2e201c00, I3616_MUL = 0x0e209c00, I3616_ORR = 0x0ea01c00, @@ -814,11 +858,98 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg rd, tcg_target_long v64) { - int op, cmode, imm8; + bool q = type == TCG_TYPE_V128; + int cmode, imm8, i; + + /* Test all bytes equal first. */ + if (v64 == dup_const(MO_8, v64)) { + imm8 = (uint8_t)v64; + tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8); + return; + } - if (is_fimm(v64, &op, &cmode, &imm8)) { - tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8); - } else if (type == TCG_TYPE_V128) { + /* + * Test all bytes 0x00 or 0xff second. This can match cases that + * might otherwise take 2 or 3 insns for MO_16 or MO_32 below. + */ + for (i = imm8 = 0; i < 8; i++) { + uint8_t byte = v64 >> (i * 8); + if (byte == 0xff) { + imm8 |= 1 << i; + } else if (byte != 0) { + goto fail_bytes; + } + } + tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8); + return; + fail_bytes: + + /* + * Tests for various replications. For each element width, if we + * cannot find an expansion there's no point checking a larger + * width because we already know by replication it cannot match. + */ + if (v64 == dup_const(MO_16, v64)) { + uint16_t v16 = v64; + + if (is_shimm16(v16, &cmode, &imm8)) { + tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); + return; + } + if (is_shimm16(~v16, &cmode, &imm8)) { + tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); + return; + } + + /* + * Otherwise, all remaining constants can be loaded in two insns: + * rd = v16 & 0xff, rd |= v16 & 0xff00. + */ + tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff); + tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8); + return; + } else if (v64 == dup_const(MO_32, v64)) { + uint32_t v32 = v64; + uint32_t n32 = ~v32; + + if (is_shimm32(v32, &cmode, &imm8) || + is_soimm32(v32, &cmode, &imm8) || + is_fimm32(v32, &cmode, &imm8)) { + tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); + return; + } + if (is_shimm32(n32, &cmode, &imm8) || + is_soimm32(n32, &cmode, &imm8)) { + tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); + return; + } + + /* + * Restrict the set of constants to those we can load with + * two instructions. Others we load from the pool. + */ + i = is_shimm32_pair(v32, &cmode, &imm8); + if (i) { + tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8); + tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8)); + return; + } + i = is_shimm32_pair(n32, &cmode, &imm8); + if (i) { + tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8); + tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8)); + return; + } + } else if (is_fimm64(v64, &cmode, &imm8)) { + tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8); + return; + } + + /* + * As a last resort, load from the constant pool. Sadly there + * is no LD1R (literal), so store the full 16-byte vector. + */ + if (type == TCG_TYPE_V128) { new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64); tcg_out_insn(s, 3305, LDR_v128, 0, rd); } else { @@ -2181,7 +2312,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, TCGType type = vecl + TCG_TYPE_V64; unsigned is_q = vecl; - TCGArg a0, a1, a2; + TCGArg a0, a1, a2, a3; + int cmode, imm8; a0 = args[0]; a1 = args[1]; @@ -2213,20 +2345,56 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); break; case INDEX_op_and_vec: + if (const_args[2]) { + is_shimm1632(~a2, &cmode, &imm8); + if (a0 == a1) { + tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); + return; + } + tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); + a2 = a0; + } tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); break; case INDEX_op_or_vec: + if (const_args[2]) { + is_shimm1632(a2, &cmode, &imm8); + if (a0 == a1) { + tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); + return; + } + tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); + a2 = a0; + } tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2); break; - case INDEX_op_xor_vec: - tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); - break; case INDEX_op_andc_vec: + if (const_args[2]) { + is_shimm1632(a2, &cmode, &imm8); + if (a0 == a1) { + tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8); + return; + } + tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8); + a2 = a0; + } tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2); break; case INDEX_op_orc_vec: + if (const_args[2]) { + is_shimm1632(~a2, &cmode, &imm8); + if (a0 == a1) { + tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8); + return; + } + tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8); + a2 = a0; + } tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2); break; + case INDEX_op_xor_vec: + tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); + break; case INDEX_op_ssadd_vec: tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); break; @@ -2304,6 +2472,20 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_bitsel_vec: + a3 = args[3]; + if (a0 == a3) { + tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1); + } else if (a0 == a2) { + tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1); + } else { + if (a0 != a1) { + tcg_out_mov(s, type, a0, a1); + } + tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3); + } + break; + case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ @@ -2334,6 +2516,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_usadd_vec: case INDEX_op_ussub_vec: case INDEX_op_shlv_vec: + case INDEX_op_bitsel_vec: return 1; case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: @@ -2394,6 +2577,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } }; static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } }; static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } }; + static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } }; + static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } }; static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } }; static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } }; static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } }; @@ -2408,6 +2593,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } }; static const TCGTargetOpDef add2 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } }; + static const TCGTargetOpDef w_w_w_w + = { .args_ct_str = { "w", "w", "w", "w" } }; switch (op) { case INDEX_op_goto_ptr: @@ -2547,11 +2734,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_add_vec: case INDEX_op_sub_vec: case INDEX_op_mul_vec: - case INDEX_op_and_vec: - case INDEX_op_or_vec: case INDEX_op_xor_vec: - case INDEX_op_andc_vec: - case INDEX_op_orc_vec: case INDEX_op_ssadd_vec: case INDEX_op_sssub_vec: case INDEX_op_usadd_vec: @@ -2578,8 +2761,16 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) return &w_r; case INDEX_op_dup_vec: return &w_wr; + case INDEX_op_or_vec: + case INDEX_op_andc_vec: + return &w_w_wO; + case INDEX_op_and_vec: + case INDEX_op_orc_vec: + return &w_w_wN; case INDEX_op_cmp_vec: return &w_w_wZ; + case INDEX_op_bitsel_vec: + return &w_w_w_w; default: return NULL; diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 66f16fbe3c..928e8b87bb 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -190,6 +190,8 @@ extern bool have_avx2; #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 +#define TCG_TARGET_HAS_bitsel_vec 0 +#define TCG_TARGET_HAS_cmpsel_vec -1 #define TCG_TARGET_deposit_i32_valid(ofs, len) \ (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \ diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index aafd01cb49..c0443da4af 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -358,6 +358,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_MOVBE_MyGy (0xf1 | P_EXT38) #define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16) #define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16) +#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2) #define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16) #define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16) #define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3) @@ -921,7 +922,7 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, } else { switch (vece) { case MO_64: - tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset); + tcg_out_vex_modrm_offset(s, OPC_MOVDDUP, r, 0, base, offset); break; case MO_32: tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset); @@ -963,12 +964,12 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, } else if (have_avx2) { tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret); } else { - tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD, ret); + tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret); } new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); } else { if (have_avx2) { - tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD + vex_l, ret); + tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret); } else { tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); } @@ -1081,14 +1082,24 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, } /* FALLTHRU */ case TCG_TYPE_V64: + /* There is no instruction that can validate 8-byte alignment. */ tcg_debug_assert(ret >= 16); tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2); break; case TCG_TYPE_V128: + /* + * The gvec infrastructure is asserts that v128 vector loads + * and stores use a 16-byte aligned offset. Validate that the + * final pointer is aligned by using an insn that will SIGSEGV. + */ tcg_debug_assert(ret >= 16); - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx, ret, 0, arg1, arg2); + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2); break; case TCG_TYPE_V256: + /* + * The gvec infrastructure only requires 16-byte alignment, + * so here we must use an unaligned load. + */ tcg_debug_assert(ret >= 16); tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL, ret, 0, arg1, arg2); @@ -1116,14 +1127,24 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, } /* FALLTHRU */ case TCG_TYPE_V64: + /* There is no instruction that can validate 8-byte alignment. */ tcg_debug_assert(arg >= 16); tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2); break; case TCG_TYPE_V128: + /* + * The gvec infrastructure is asserts that v128 vector loads + * and stores use a 16-byte aligned offset. Validate that the + * final pointer is aligned by using an insn that will SIGSEGV. + */ tcg_debug_assert(arg >= 16); - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx, arg, 0, arg1, arg2); + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2); break; case TCG_TYPE_V256: + /* + * The gvec infrastructure only requires 16-byte alignment, + * so here we must use an unaligned store. + */ tcg_debug_assert(arg >= 16); tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL, arg, 0, arg1, arg2); @@ -3245,6 +3266,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_andc_vec: return 1; case INDEX_op_cmp_vec: + case INDEX_op_cmpsel_vec: return -1; case INDEX_op_shli_vec: @@ -3295,7 +3317,6 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_smax_vec: case INDEX_op_umin_vec: case INDEX_op_umax_vec: - return vece <= MO_32 ? 1 : -1; case INDEX_op_abs_vec: return vece <= MO_32; @@ -3463,32 +3484,65 @@ static void expand_vec_mul(TCGType type, unsigned vece, } } -static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec v2, TCGCond cond) +static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0, + TCGv_vec v1, TCGv_vec v2, TCGCond cond) { enum { - NEED_SWAP = 1, - NEED_INV = 2, - NEED_BIAS = 4 - }; - static const uint8_t fixups[16] = { - [0 ... 15] = -1, - [TCG_COND_EQ] = 0, - [TCG_COND_NE] = NEED_INV, - [TCG_COND_GT] = 0, - [TCG_COND_LT] = NEED_SWAP, - [TCG_COND_LE] = NEED_INV, - [TCG_COND_GE] = NEED_SWAP | NEED_INV, - [TCG_COND_GTU] = NEED_BIAS, - [TCG_COND_LTU] = NEED_BIAS | NEED_SWAP, - [TCG_COND_LEU] = NEED_BIAS | NEED_INV, - [TCG_COND_GEU] = NEED_BIAS | NEED_SWAP | NEED_INV, + NEED_INV = 1, + NEED_SWAP = 2, + NEED_BIAS = 4, + NEED_UMIN = 8, + NEED_UMAX = 16, }; TCGv_vec t1, t2; uint8_t fixup; - fixup = fixups[cond & 15]; - tcg_debug_assert(fixup != 0xff); + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_GT: + fixup = 0; + break; + case TCG_COND_NE: + case TCG_COND_LE: + fixup = NEED_INV; + break; + case TCG_COND_LT: + fixup = NEED_SWAP; + break; + case TCG_COND_GE: + fixup = NEED_SWAP | NEED_INV; + break; + case TCG_COND_LEU: + if (vece <= MO_32) { + fixup = NEED_UMIN; + } else { + fixup = NEED_BIAS | NEED_INV; + } + break; + case TCG_COND_GTU: + if (vece <= MO_32) { + fixup = NEED_UMIN | NEED_INV; + } else { + fixup = NEED_BIAS; + } + break; + case TCG_COND_GEU: + if (vece <= MO_32) { + fixup = NEED_UMAX; + } else { + fixup = NEED_BIAS | NEED_SWAP | NEED_INV; + } + break; + case TCG_COND_LTU: + if (vece <= MO_32) { + fixup = NEED_UMAX | NEED_INV; + } else { + fixup = NEED_BIAS | NEED_SWAP; + } + break; + default: + g_assert_not_reached(); + } if (fixup & NEED_INV) { cond = tcg_invert_cond(cond); @@ -3499,7 +3553,16 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, } t1 = t2 = NULL; - if (fixup & NEED_BIAS) { + if (fixup & (NEED_UMIN | NEED_UMAX)) { + t1 = tcg_temp_new_vec(type); + if (fixup & NEED_UMIN) { + tcg_gen_umin_vec(vece, t1, v1, v2); + } else { + tcg_gen_umax_vec(vece, t1, v1, v2); + } + v2 = t1; + cond = TCG_COND_EQ; + } else if (fixup & NEED_BIAS) { t1 = tcg_temp_new_vec(type); t2 = tcg_temp_new_vec(type); tcg_gen_dupi_vec(vece, t2, 1ull << ((8 << vece) - 1)); @@ -3521,28 +3584,32 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, tcg_temp_free_vec(t2); } } - if (fixup & NEED_INV) { + return fixup & NEED_INV; +} + +static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, + TCGv_vec v1, TCGv_vec v2, TCGCond cond) +{ + if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) { tcg_gen_not_vec(vece, v0, v0); } } -static void expand_vec_minmax(TCGType type, unsigned vece, - TCGCond cond, bool min, - TCGv_vec v0, TCGv_vec v1, TCGv_vec v2) +static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0, + TCGv_vec c1, TCGv_vec c2, + TCGv_vec v3, TCGv_vec v4, TCGCond cond) { - TCGv_vec t1 = tcg_temp_new_vec(type); - - tcg_debug_assert(vece == MO_64); + TCGv_vec t = tcg_temp_new_vec(type); - tcg_gen_cmp_vec(cond, vece, t1, v1, v2); - if (min) { - TCGv_vec t2; - t2 = v1, v1 = v2, v2 = t2; + if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) { + /* Invert the sense of the compare by swapping arguments. */ + TCGv_vec x; + x = v3, v3 = v4, v4 = x; } vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece, - tcgv_vec_arg(v0), tcgv_vec_arg(v1), - tcgv_vec_arg(v2), tcgv_vec_arg(t1)); - tcg_temp_free_vec(t1); + tcgv_vec_arg(v0), tcgv_vec_arg(v4), + tcgv_vec_arg(v3), tcgv_vec_arg(t)); + tcg_temp_free_vec(t); } void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, @@ -3550,7 +3617,7 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, { va_list va; TCGArg a2; - TCGv_vec v0, v1, v2; + TCGv_vec v0, v1, v2, v3, v4; va_start(va, a0); v0 = temp_tcgv_vec(arg_temp(a0)); @@ -3577,21 +3644,11 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); break; - case INDEX_op_smin_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - expand_vec_minmax(type, vece, TCG_COND_GT, true, v0, v1, v2); - break; - case INDEX_op_smax_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - expand_vec_minmax(type, vece, TCG_COND_GT, false, v0, v1, v2); - break; - case INDEX_op_umin_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - expand_vec_minmax(type, vece, TCG_COND_GTU, true, v0, v1, v2); - break; - case INDEX_op_umax_vec: + case INDEX_op_cmpsel_vec: v2 = temp_tcgv_vec(arg_temp(a2)); - expand_vec_minmax(type, vece, TCG_COND_GTU, false, v0, v1, v2); + v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg)); break; default: diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 338ddd9d9e..f18464cf07 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -1446,36 +1446,35 @@ void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz, void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz) { + check_size_align(oprsz, maxsz, dofs); if (vece <= MO_64) { - TCGType type = choose_vector_type(0, vece, oprsz, 0); + TCGType type = choose_vector_type(NULL, vece, oprsz, 0); if (type != 0) { TCGv_vec t_vec = tcg_temp_new_vec(type); tcg_gen_dup_mem_vec(vece, t_vec, cpu_env, aofs); do_dup_store(type, dofs, oprsz, maxsz, t_vec); tcg_temp_free_vec(t_vec); - return; - } - } - if (vece <= MO_32) { - TCGv_i32 in = tcg_temp_new_i32(); - switch (vece) { - case MO_8: - tcg_gen_ld8u_i32(in, cpu_env, aofs); - break; - case MO_16: - tcg_gen_ld16u_i32(in, cpu_env, aofs); - break; - case MO_32: - tcg_gen_ld_i32(in, cpu_env, aofs); - break; + } else if (vece <= MO_32) { + TCGv_i32 in = tcg_temp_new_i32(); + switch (vece) { + case MO_8: + tcg_gen_ld8u_i32(in, cpu_env, aofs); + break; + case MO_16: + tcg_gen_ld16u_i32(in, cpu_env, aofs); + break; + default: + tcg_gen_ld_i32(in, cpu_env, aofs); + break; + } + do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0); + tcg_temp_free_i32(in); + } else { + TCGv_i64 in = tcg_temp_new_i64(); + tcg_gen_ld_i64(in, cpu_env, aofs); + do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0); + tcg_temp_free_i64(in); } - tcg_gen_gvec_dup_i32(vece, dofs, oprsz, maxsz, in); - tcg_temp_free_i32(in); - } else if (vece == MO_64) { - TCGv_i64 in = tcg_temp_new_i64(); - tcg_gen_ld_i64(in, cpu_env, aofs); - tcg_gen_gvec_dup_i64(MO_64, dofs, oprsz, maxsz, in); - tcg_temp_free_i64(in); } else { /* 128-bit duplicate. */ /* ??? Dup to 256-bit vector. */ @@ -1504,6 +1503,9 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_temp_free_i64(in0); tcg_temp_free_i64(in1); } + if (oprsz < maxsz) { + expand_clr(dofs + oprsz, maxsz - oprsz); + } } } @@ -3193,3 +3195,26 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, expand_clr(dofs + oprsz, maxsz - oprsz); } } + +static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_and_i64(t, b, a); + tcg_gen_andc_i64(d, c, a); + tcg_gen_or_i64(d, d, t); + tcg_temp_free_i64(t); +} + +void tcg_gen_gvec_bitsel(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, + uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen4 g = { + .fni8 = tcg_gen_bitsel_i64, + .fniv = tcg_gen_bitsel_vec, + .fno = gen_helper_gvec_bitsel, + }; + + tcg_gen_gvec_4(dofs, aofs, bofs, cofs, oprsz, maxsz, &g); +} diff --git a/tcg/tcg-op-gvec.h b/tcg/tcg-op-gvec.h index 52a398c190..2a9e0c7c0a 100644 --- a/tcg/tcg-op-gvec.h +++ b/tcg/tcg-op-gvec.h @@ -343,6 +343,13 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz); /* + * Perform vector bit select: d = (b & a) | (c & ~a). + */ +void tcg_gen_gvec_bitsel(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, + uint32_t oprsz, uint32_t maxsz); + +/* * 64-bit vector operations. Use these when the register has been allocated * with tcg_global_mem_new_i64, and so we cannot also address it via pointer. * OPRSZ = MAXSZ = 8. diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c index 543508d545..501d9630a2 100644 --- a/tcg/tcg-op-vec.c +++ b/tcg/tcg-op-vec.c @@ -88,6 +88,7 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list, case INDEX_op_dup2_vec: case INDEX_op_ld_vec: case INDEX_op_st_vec: + case INDEX_op_bitsel_vec: /* These opcodes are mandatory and should not be listed. */ g_assert_not_reached(); default: @@ -118,6 +119,15 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list, continue; } break; + case INDEX_op_cmpsel_vec: + case INDEX_op_smin_vec: + case INDEX_op_smax_vec: + case INDEX_op_umin_vec: + case INDEX_op_umax_vec: + if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { + continue; + } + break; default: break; } @@ -158,6 +168,20 @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, op->args[3] = c; } +static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, + TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) +{ + TCGOp *op = tcg_emit_op(opc); + TCGOP_VECL(op) = type - TCG_TYPE_V64; + TCGOP_VECE(op) = vece; + op->args[0] = r; + op->args[1] = a; + op->args[2] = b; + op->args[3] = c; + op->args[4] = d; + op->args[5] = e; +} + static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) { TCGTemp *rt = tcgv_vec_temp(r); @@ -542,7 +566,7 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, } } -static void do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, +static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b, TCGOpcode opc) { TCGTemp *rt = tcgv_vec_temp(r); @@ -560,82 +584,99 @@ static void do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, can = tcg_can_emit_vec_op(opc, type, vece); if (can > 0) { vec_gen_3(opc, type, vece, ri, ai, bi); - } else { + } else if (can < 0) { const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); - tcg_debug_assert(can < 0); tcg_expand_vec_op(opc, type, vece, ri, ai, bi); tcg_swap_vecop_list(hold_list); + } else { + return false; } + return true; +} + +static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, + TCGv_vec b, TCGOpcode opc) +{ + bool ok = do_op3(vece, r, a, b, opc); + tcg_debug_assert(ok); } void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_add_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); } void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_sub_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); } void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_mul_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); } void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_ssadd_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); } void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_usadd_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec); } void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_sssub_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); } void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_ussub_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec); +} + +static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, + TCGv_vec b, TCGOpcode opc, TCGCond cond) +{ + if (!do_op3(vece, r, a, b, opc)) { + tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); + } } void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_smin_vec); + do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); } void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_umin_vec); + do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); } void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_smax_vec); + do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); } void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_umax_vec); + do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); } void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_shlv_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); } void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_shrv_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); } void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - do_op3(vece, r, a, b, INDEX_op_sarv_vec); + do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); } static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, @@ -671,7 +712,7 @@ static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, } else { tcg_gen_dup_i32_vec(vece, vec_s, s); } - do_op3(vece, r, a, vec_s, opc_v); + do_op3_nofail(vece, r, a, vec_s, opc_v); tcg_temp_free_vec(vec_s); } tcg_swap_vecop_list(hold_list); @@ -691,3 +732,68 @@ void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) { do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec); } + +void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, + TCGv_vec b, TCGv_vec c) +{ + TCGTemp *rt = tcgv_vec_temp(r); + TCGTemp *at = tcgv_vec_temp(a); + TCGTemp *bt = tcgv_vec_temp(b); + TCGTemp *ct = tcgv_vec_temp(c); + TCGType type = rt->base_type; + + tcg_debug_assert(at->base_type >= type); + tcg_debug_assert(bt->base_type >= type); + tcg_debug_assert(ct->base_type >= type); + + if (TCG_TARGET_HAS_bitsel_vec) { + vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, + temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); + } else { + TCGv_vec t = tcg_temp_new_vec(type); + tcg_gen_and_vec(MO_8, t, a, b); + tcg_gen_andc_vec(MO_8, r, c, a); + tcg_gen_or_vec(MO_8, r, r, t); + tcg_temp_free_vec(t); + } +} + +void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, + TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) +{ + TCGTemp *rt = tcgv_vec_temp(r); + TCGTemp *at = tcgv_vec_temp(a); + TCGTemp *bt = tcgv_vec_temp(b); + TCGTemp *ct = tcgv_vec_temp(c); + TCGTemp *dt = tcgv_vec_temp(d); + TCGArg ri = temp_arg(rt); + TCGArg ai = temp_arg(at); + TCGArg bi = temp_arg(bt); + TCGArg ci = temp_arg(ct); + TCGArg di = temp_arg(dt); + TCGType type = rt->base_type; + const TCGOpcode *hold_list; + int can; + + tcg_debug_assert(at->base_type >= type); + tcg_debug_assert(bt->base_type >= type); + tcg_debug_assert(ct->base_type >= type); + tcg_debug_assert(dt->base_type >= type); + + tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); + hold_list = tcg_swap_vecop_list(NULL); + can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); + + if (can > 0) { + vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); + } else if (can < 0) { + tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, + ri, ai, bi, ci, di, cond); + } else { + TCGv_vec t = tcg_temp_new_vec(type); + tcg_gen_cmp_vec(cond, vece, t, a, b); + tcg_gen_bitsel_vec(vece, r, t, c, d); + tcg_temp_free_vec(t); + } + tcg_swap_vecop_list(hold_list); +} diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 660fe205d0..2d4dd5cd7d 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -1000,6 +1000,11 @@ void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); +void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, + TCGv_vec b, TCGv_vec c); +void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, + TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d); + void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset); void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset); void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 4a2dd116eb..242d608e6d 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -35,7 +35,7 @@ DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) DEF(br, 0, 0, 1, TCG_OPF_BB_END) -#define IMPL(X) (__builtin_constant_p(X) && !(X) ? TCG_OPF_NOT_PRESENT : 0) +#define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0) #if TCG_TARGET_REG_BITS == 32 # define IMPL64 TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT #else @@ -256,6 +256,9 @@ DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) DEF(cmp_vec, 1, 2, 1, IMPLVEC) +DEF(bitsel_vec, 1, 3, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_bitsel_vec)) +DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec)) + DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT) #if TCG_TARGET_MAYBE_vec diff --git a/tcg/tcg.c b/tcg/tcg.c index 24083b8c00..02a2680169 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1646,6 +1646,10 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_smax_vec: case INDEX_op_umax_vec: return have_vec && TCG_TARGET_HAS_minmax_vec; + case INDEX_op_bitsel_vec: + return have_vec && TCG_TARGET_HAS_bitsel_vec; + case INDEX_op_cmpsel_vec: + return have_vec && TCG_TARGET_HAS_cmpsel_vec; default: tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); @@ -2026,6 +2030,7 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) case INDEX_op_setcond_i64: case INDEX_op_movcond_i64: case INDEX_op_cmp_vec: + case INDEX_op_cmpsel_vec: if (op->args[k] < ARRAY_SIZE(cond_name) && cond_name[op->args[k]]) { col += qemu_log(",%s", cond_name[op->args[k++]]); diff --git a/tcg/tcg.h b/tcg/tcg.h index 0e01a70d66..21cd6f1249 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -187,6 +187,8 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_mul_vec 0 #define TCG_TARGET_HAS_sat_vec 0 #define TCG_TARGET_HAS_minmax_vec 0 +#define TCG_TARGET_HAS_bitsel_vec 0 +#define TCG_TARGET_HAS_cmpsel_vec 0 #else #define TCG_TARGET_MAYBE_vec 1 #endif |