diff options
| -rw-r--r-- | hw/m68k/bootinfo.h | 48 | ||||
| -rw-r--r-- | hw/m68k/q800.c | 76 | ||||
| -rw-r--r-- | hw/m68k/virt.c | 57 | ||||
| -rw-r--r-- | include/standard-headers/asm-m68k/bootinfo-virt.h | 4 | ||||
| -rw-r--r-- | include/standard-headers/asm-m68k/bootinfo.h | 8 | ||||
| -rw-r--r-- | target/i386/cpu.c | 8 | ||||
| -rw-r--r-- | target/i386/cpu.h | 3 | ||||
| -rw-r--r-- | target/i386/ops_sse.h | 116 | ||||
| -rw-r--r-- | target/i386/ops_sse_header.h | 17 | ||||
| -rw-r--r-- | target/i386/tcg/decode-new.c.inc | 48 | ||||
| -rw-r--r-- | target/i386/tcg/decode-new.h | 3 | ||||
| -rw-r--r-- | target/i386/tcg/emit.c.inc | 68 | ||||
| -rw-r--r-- | target/i386/tcg/fpu_helper.c | 60 | ||||
| -rw-r--r-- | target/i386/tcg/translate.c | 1 | ||||
| -rw-r--r-- | tests/tcg/i386/test-avx.c | 17 | ||||
| -rwxr-xr-x | tests/tcg/i386/test-avx.py | 8 |
16 files changed, 377 insertions, 165 deletions
diff --git a/hw/m68k/bootinfo.h b/hw/m68k/bootinfo.h index 897162b818..a3d37e3c80 100644 --- a/hw/m68k/bootinfo.h +++ b/hw/m68k/bootinfo.h @@ -12,66 +12,66 @@ #ifndef HW_M68K_BOOTINFO_H #define HW_M68K_BOOTINFO_H -#define BOOTINFO0(as, base, id) \ +#define BOOTINFO0(base, id) \ do { \ - stw_phys(as, base, id); \ + stw_p(base, id); \ base += 2; \ - stw_phys(as, base, sizeof(struct bi_record)); \ + stw_p(base, sizeof(struct bi_record)); \ base += 2; \ } while (0) -#define BOOTINFO1(as, base, id, value) \ +#define BOOTINFO1(base, id, value) \ do { \ - stw_phys(as, base, id); \ + stw_p(base, id); \ base += 2; \ - stw_phys(as, base, sizeof(struct bi_record) + 4); \ + stw_p(base, sizeof(struct bi_record) + 4); \ base += 2; \ - stl_phys(as, base, value); \ + stl_p(base, value); \ base += 4; \ } while (0) -#define BOOTINFO2(as, base, id, value1, value2) \ +#define BOOTINFO2(base, id, value1, value2) \ do { \ - stw_phys(as, base, id); \ + stw_p(base, id); \ base += 2; \ - stw_phys(as, base, sizeof(struct bi_record) + 8); \ + stw_p(base, sizeof(struct bi_record) + 8); \ base += 2; \ - stl_phys(as, base, value1); \ + stl_p(base, value1); \ base += 4; \ - stl_phys(as, base, value2); \ + stl_p(base, value2); \ base += 4; \ } while (0) -#define BOOTINFOSTR(as, base, id, string) \ +#define BOOTINFOSTR(base, id, string) \ do { \ int i; \ - stw_phys(as, base, id); \ + stw_p(base, id); \ base += 2; \ - stw_phys(as, base, \ + stw_p(base, \ (sizeof(struct bi_record) + strlen(string) + \ 1 /* null termination */ + 3 /* padding */) & ~3); \ base += 2; \ for (i = 0; string[i]; i++) { \ - stb_phys(as, base++, string[i]); \ + stb_p(base++, string[i]); \ } \ - stb_phys(as, base++, 0); \ - base = (base + 3) & ~3; \ + stb_p(base++, 0); \ + base = QEMU_ALIGN_PTR_UP(base, 4); \ } while (0) -#define BOOTINFODATA(as, base, id, data, len) \ +#define BOOTINFODATA(base, id, data, len) \ do { \ int i; \ - stw_phys(as, base, id); \ + stw_p(base, id); \ base += 2; \ - stw_phys(as, base, \ + stw_p(base, \ (sizeof(struct bi_record) + len + \ 2 /* length field */ + 3 /* padding */) & ~3); \ base += 2; \ - stw_phys(as, base, len); \ + stw_p(base, len); \ base += 2; \ for (i = 0; i < len; ++i) { \ - stb_phys(as, base++, data[i]); \ + stb_p(base++, data[i]); \ } \ - base = (base + 3) & ~3; \ + base = QEMU_ALIGN_PTR_UP(base, 4); \ } while (0) #endif diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c index 101ab0f803..e09e244ddc 100644 --- a/hw/m68k/q800.c +++ b/hw/m68k/q800.c @@ -23,6 +23,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "qemu/datadir.h" +#include "qemu/guest-random.h" #include "sysemu/sysemu.h" #include "cpu.h" #include "hw/boards.h" @@ -320,11 +321,22 @@ static const TypeInfo glue_info = { }, }; +typedef struct { + M68kCPU *cpu; + struct bi_record *rng_seed; +} ResetInfo; + static void main_cpu_reset(void *opaque) { - M68kCPU *cpu = opaque; + ResetInfo *reset_info = opaque; + M68kCPU *cpu = reset_info->cpu; CPUState *cs = CPU(cpu); + if (reset_info->rng_seed) { + qemu_guest_getrandom_nofail((void *)reset_info->rng_seed->data + 2, + be16_to_cpu(*(uint16_t *)reset_info->rng_seed->data)); + } + cpu_reset(cs); cpu->env.aregs[7] = ldl_phys(cs->as, 0); cpu->env.pc = ldl_phys(cs->as, 4); @@ -385,6 +397,8 @@ static void q800_init(MachineState *machine) NubusBus *nubus; DeviceState *glue; DriveInfo *dinfo; + ResetInfo *reset_info; + uint8_t rng_seed[32]; linux_boot = (kernel_filename != NULL); @@ -394,9 +408,12 @@ static void q800_init(MachineState *machine) exit(1); } + reset_info = g_new0(ResetInfo, 1); + /* init CPUs */ cpu = M68K_CPU(cpu_create(machine->cpu_type)); - qemu_register_reset(main_cpu_reset, cpu); + reset_info->cpu = cpu; + qemu_register_reset(main_cpu_reset, reset_info); /* RAM */ memory_region_add_subregion(get_system_memory(), 0, machine->ram); @@ -596,6 +613,14 @@ static void q800_init(MachineState *machine) cs = CPU(cpu); if (linux_boot) { uint64_t high; + void *param_blob, *param_ptr, *param_rng_seed; + + if (kernel_cmdline) { + param_blob = g_malloc(strlen(kernel_cmdline) + 1024); + } else { + param_blob = g_malloc(1024); + } + kernel_size = load_elf(kernel_filename, NULL, NULL, NULL, &elf_entry, NULL, &high, NULL, 1, EM_68K, 0, 0); @@ -605,23 +630,24 @@ static void q800_init(MachineState *machine) } stl_phys(cs->as, 4, elf_entry); /* reset initial PC */ parameters_base = (high + 1) & ~1; - - BOOTINFO1(cs->as, parameters_base, BI_MACHTYPE, MACH_MAC); - BOOTINFO1(cs->as, parameters_base, BI_FPUTYPE, FPU_68040); - BOOTINFO1(cs->as, parameters_base, BI_MMUTYPE, MMU_68040); - BOOTINFO1(cs->as, parameters_base, BI_CPUTYPE, CPU_68040); - BOOTINFO1(cs->as, parameters_base, BI_MAC_CPUID, CPUB_68040); - BOOTINFO1(cs->as, parameters_base, BI_MAC_MODEL, MAC_MODEL_Q800); - BOOTINFO1(cs->as, parameters_base, + param_ptr = param_blob; + + BOOTINFO1(param_ptr, BI_MACHTYPE, MACH_MAC); + BOOTINFO1(param_ptr, BI_FPUTYPE, FPU_68040); + BOOTINFO1(param_ptr, BI_MMUTYPE, MMU_68040); + BOOTINFO1(param_ptr, BI_CPUTYPE, CPU_68040); + BOOTINFO1(param_ptr, BI_MAC_CPUID, CPUB_68040); + BOOTINFO1(param_ptr, BI_MAC_MODEL, MAC_MODEL_Q800); + BOOTINFO1(param_ptr, BI_MAC_MEMSIZE, ram_size >> 20); /* in MB */ - BOOTINFO2(cs->as, parameters_base, BI_MEMCHUNK, 0, ram_size); - BOOTINFO1(cs->as, parameters_base, BI_MAC_VADDR, + BOOTINFO2(param_ptr, BI_MEMCHUNK, 0, ram_size); + BOOTINFO1(param_ptr, BI_MAC_VADDR, VIDEO_BASE + macfb_mode->offset); - BOOTINFO1(cs->as, parameters_base, BI_MAC_VDEPTH, graphic_depth); - BOOTINFO1(cs->as, parameters_base, BI_MAC_VDIM, + BOOTINFO1(param_ptr, BI_MAC_VDEPTH, graphic_depth); + BOOTINFO1(param_ptr, BI_MAC_VDIM, (graphic_height << 16) | graphic_width); - BOOTINFO1(cs->as, parameters_base, BI_MAC_VROW, macfb_mode->stride); - BOOTINFO1(cs->as, parameters_base, BI_MAC_SCCBASE, SCC_BASE); + BOOTINFO1(param_ptr, BI_MAC_VROW, macfb_mode->stride); + BOOTINFO1(param_ptr, BI_MAC_SCCBASE, SCC_BASE); rom = g_malloc(sizeof(*rom)); memory_region_init_ram_ptr(rom, NULL, "m68k_fake_mac.rom", @@ -630,10 +656,16 @@ static void q800_init(MachineState *machine) memory_region_add_subregion(get_system_memory(), MACROM_ADDR, rom); if (kernel_cmdline) { - BOOTINFOSTR(cs->as, parameters_base, BI_COMMAND_LINE, + BOOTINFOSTR(param_ptr, BI_COMMAND_LINE, kernel_cmdline); } + /* Pass seed to RNG. */ + param_rng_seed = param_ptr; + qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); + BOOTINFODATA(param_ptr, BI_RNG_SEED, + rng_seed, sizeof(rng_seed)); + /* load initrd */ if (initrd_filename) { initrd_size = get_image_size(initrd_filename); @@ -646,13 +678,19 @@ static void q800_init(MachineState *machine) initrd_base = (ram_size - initrd_size) & TARGET_PAGE_MASK; load_image_targphys(initrd_filename, initrd_base, ram_size - initrd_base); - BOOTINFO2(cs->as, parameters_base, BI_RAMDISK, initrd_base, + BOOTINFO2(param_ptr, BI_RAMDISK, initrd_base, initrd_size); } else { initrd_base = 0; initrd_size = 0; } - BOOTINFO0(cs->as, parameters_base, BI_LAST); + BOOTINFO0(param_ptr, BI_LAST); + rom_add_blob_fixed_as("bootinfo", param_blob, param_ptr - param_blob, + parameters_base, cs->as); + reset_info->rng_seed = rom_ptr_for_as(cs->as, parameters_base, + param_ptr - param_blob) + + (param_rng_seed - param_blob); + g_free(param_blob); } else { uint8_t *ptr; /* allocate and load BIOS */ diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index 2f3ffc0de6..89c4108eb5 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -89,6 +89,7 @@ typedef struct { M68kCPU *cpu; hwaddr initial_pc; hwaddr initial_stack; + struct bi_record *rng_seed; } ResetInfo; static void main_cpu_reset(void *opaque) @@ -97,6 +98,11 @@ static void main_cpu_reset(void *opaque) M68kCPU *cpu = reset_info->cpu; CPUState *cs = CPU(cpu); + if (reset_info->rng_seed) { + qemu_guest_getrandom_nofail((void *)reset_info->rng_seed->data + 2, + be16_to_cpu(*(uint16_t *)reset_info->rng_seed->data)); + } + cpu_reset(cs); cpu->env.aregs[7] = reset_info->initial_stack; cpu->env.pc = reset_info->initial_pc; @@ -212,6 +218,13 @@ static void virt_init(MachineState *machine) if (kernel_filename) { CPUState *cs = CPU(cpu); uint64_t high; + void *param_blob, *param_ptr, *param_rng_seed; + + if (kernel_cmdline) { + param_blob = g_malloc(strlen(kernel_cmdline) + 1024); + } else { + param_blob = g_malloc(1024); + } kernel_size = load_elf(kernel_filename, NULL, NULL, NULL, &elf_entry, NULL, &high, NULL, 1, @@ -222,36 +235,38 @@ static void virt_init(MachineState *machine) } reset_info->initial_pc = elf_entry; parameters_base = (high + 1) & ~1; + param_ptr = param_blob; - BOOTINFO1(cs->as, parameters_base, BI_MACHTYPE, MACH_VIRT); - BOOTINFO1(cs->as, parameters_base, BI_FPUTYPE, FPU_68040); - BOOTINFO1(cs->as, parameters_base, BI_MMUTYPE, MMU_68040); - BOOTINFO1(cs->as, parameters_base, BI_CPUTYPE, CPU_68040); - BOOTINFO2(cs->as, parameters_base, BI_MEMCHUNK, 0, ram_size); + BOOTINFO1(param_ptr, BI_MACHTYPE, MACH_VIRT); + BOOTINFO1(param_ptr, BI_FPUTYPE, FPU_68040); + BOOTINFO1(param_ptr, BI_MMUTYPE, MMU_68040); + BOOTINFO1(param_ptr, BI_CPUTYPE, CPU_68040); + BOOTINFO2(param_ptr, BI_MEMCHUNK, 0, ram_size); - BOOTINFO1(cs->as, parameters_base, BI_VIRT_QEMU_VERSION, + BOOTINFO1(param_ptr, BI_VIRT_QEMU_VERSION, ((QEMU_VERSION_MAJOR << 24) | (QEMU_VERSION_MINOR << 16) | (QEMU_VERSION_MICRO << 8))); - BOOTINFO2(cs->as, parameters_base, BI_VIRT_GF_PIC_BASE, + BOOTINFO2(param_ptr, BI_VIRT_GF_PIC_BASE, VIRT_GF_PIC_MMIO_BASE, VIRT_GF_PIC_IRQ_BASE); - BOOTINFO2(cs->as, parameters_base, BI_VIRT_GF_RTC_BASE, + BOOTINFO2(param_ptr, BI_VIRT_GF_RTC_BASE, VIRT_GF_RTC_MMIO_BASE, VIRT_GF_RTC_IRQ_BASE); - BOOTINFO2(cs->as, parameters_base, BI_VIRT_GF_TTY_BASE, + BOOTINFO2(param_ptr, BI_VIRT_GF_TTY_BASE, VIRT_GF_TTY_MMIO_BASE, VIRT_GF_TTY_IRQ_BASE); - BOOTINFO2(cs->as, parameters_base, BI_VIRT_CTRL_BASE, + BOOTINFO2(param_ptr, BI_VIRT_CTRL_BASE, VIRT_CTRL_MMIO_BASE, VIRT_CTRL_IRQ_BASE); - BOOTINFO2(cs->as, parameters_base, BI_VIRT_VIRTIO_BASE, + BOOTINFO2(param_ptr, BI_VIRT_VIRTIO_BASE, VIRT_VIRTIO_MMIO_BASE, VIRT_VIRTIO_IRQ_BASE); if (kernel_cmdline) { - BOOTINFOSTR(cs->as, parameters_base, BI_COMMAND_LINE, + BOOTINFOSTR(param_ptr, BI_COMMAND_LINE, kernel_cmdline); } - /* Pass seed to RNG. */ - qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); - BOOTINFODATA(cs->as, parameters_base, BI_VIRT_RNG_SEED, - rng_seed, sizeof(rng_seed)); + /* Pass seed to RNG. */ + param_rng_seed = param_ptr; + qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); + BOOTINFODATA(param_ptr, BI_RNG_SEED, + rng_seed, sizeof(rng_seed)); /* load initrd */ if (initrd_filename) { @@ -265,13 +280,19 @@ static void virt_init(MachineState *machine) initrd_base = (ram_size - initrd_size) & TARGET_PAGE_MASK; load_image_targphys(initrd_filename, initrd_base, ram_size - initrd_base); - BOOTINFO2(cs->as, parameters_base, BI_RAMDISK, initrd_base, + BOOTINFO2(param_ptr, BI_RAMDISK, initrd_base, initrd_size); } else { initrd_base = 0; initrd_size = 0; } - BOOTINFO0(cs->as, parameters_base, BI_LAST); + BOOTINFO0(param_ptr, BI_LAST); + rom_add_blob_fixed_as("bootinfo", param_blob, param_ptr - param_blob, + parameters_base, cs->as); + reset_info->rng_seed = rom_ptr_for_as(cs->as, parameters_base, + param_ptr - param_blob) + + (param_rng_seed - param_blob); + g_free(param_blob); } } diff --git a/include/standard-headers/asm-m68k/bootinfo-virt.h b/include/standard-headers/asm-m68k/bootinfo-virt.h index 1b1ffd4705..75ac6bbd7d 100644 --- a/include/standard-headers/asm-m68k/bootinfo-virt.h +++ b/include/standard-headers/asm-m68k/bootinfo-virt.h @@ -12,7 +12,9 @@ #define BI_VIRT_GF_TTY_BASE 0x8003 #define BI_VIRT_VIRTIO_BASE 0x8004 #define BI_VIRT_CTRL_BASE 0x8005 -#define BI_VIRT_RNG_SEED 0x8006 + +/* No longer used -- replaced with BI_RNG_SEED -- but don't reuse this index: + * #define BI_VIRT_RNG_SEED 0x8006 */ #define VIRT_BOOTI_VERSION MK_BI_VERSION(2, 0) diff --git a/include/standard-headers/asm-m68k/bootinfo.h b/include/standard-headers/asm-m68k/bootinfo.h index 7b790e8ec8..b7a8dd2514 100644 --- a/include/standard-headers/asm-m68k/bootinfo.h +++ b/include/standard-headers/asm-m68k/bootinfo.h @@ -57,7 +57,13 @@ struct mem_info { /* (struct mem_info) */ #define BI_COMMAND_LINE 0x0007 /* kernel command line parameters */ /* (string) */ - +/* + * A random seed used to initialize the RNG. Record format: + * + * - length [ 2 bytes, 16-bit big endian ] + * - seed data [ `length` bytes, padded to preserve 4-byte struct alignment ] + */ +#define BI_RNG_SEED 0x0008 /* * Linux/m68k Architectures (BI_MACHTYPE) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 0ebd610faa..22b681ca37 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -625,13 +625,13 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \ CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR | \ - CPUID_EXT_RDRAND | CPUID_EXT_AVX) + CPUID_EXT_RDRAND | CPUID_EXT_AVX | CPUID_EXT_F16C | \ + CPUID_EXT_FMA) /* missing: CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX, - CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA, + CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA, - CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, - CPUID_EXT_F16C */ + CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER */ #ifdef TARGET_X86_64 #define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index dad2b2db8d..d4bc19577a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1258,6 +1258,7 @@ typedef union ZMMReg { uint16_t _w_ZMMReg[512 / 16]; uint32_t _l_ZMMReg[512 / 32]; uint64_t _q_ZMMReg[512 / 64]; + float16 _h_ZMMReg[512 / 16]; float32 _s_ZMMReg[512 / 32]; float64 _d_ZMMReg[512 / 64]; XMMReg _x_ZMMReg[512 / 128]; @@ -1282,6 +1283,7 @@ typedef struct BNDCSReg { #define ZMM_B(n) _b_ZMMReg[63 - (n)] #define ZMM_W(n) _w_ZMMReg[31 - (n)] #define ZMM_L(n) _l_ZMMReg[15 - (n)] +#define ZMM_H(n) _h_ZMMReg[31 - (n)] #define ZMM_S(n) _s_ZMMReg[15 - (n)] #define ZMM_Q(n) _q_ZMMReg[7 - (n)] #define ZMM_D(n) _d_ZMMReg[7 - (n)] @@ -1301,6 +1303,7 @@ typedef struct BNDCSReg { #define ZMM_B(n) _b_ZMMReg[n] #define ZMM_W(n) _w_ZMMReg[n] #define ZMM_L(n) _l_ZMMReg[n] +#define ZMM_H(n) _h_ZMMReg[n] #define ZMM_S(n) _s_ZMMReg[n] #define ZMM_Q(n) _q_ZMMReg[n] #define ZMM_D(n) _d_ZMMReg[n] diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index d35fc15c65..3cbc36a59d 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -586,6 +586,35 @@ void glue(helper_cvtpd2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) } } +#if SHIFT >= 1 +void glue(helper_cvtph2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + int i; + + for (i = 2 << SHIFT; --i >= 0; ) { + d->ZMM_S(i) = float16_to_float32(s->ZMM_H(i), true, &env->sse_status); + } +} + +void glue(helper_cvtps2ph, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, int mode) +{ + int i; + FloatRoundMode prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) { + set_x86_rounding_mode(mode & 3, &env->sse_status); + } + + for (i = 0; i < 2 << SHIFT; i++) { + d->ZMM_H(i) = float32_to_float16(s->ZMM_S(i), true, &env->sse_status); + } + for (i >>= 2; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } + + env->sse_status.float_rounding_mode = prev_rounding_mode; +} +#endif + #if SHIFT == 1 void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *v, Reg *s) { @@ -1684,20 +1713,7 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, prev_rounding_mode = env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { - switch (mode & 3) { - case 0: - set_float_rounding_mode(float_round_nearest_even, &env->sse_status); - break; - case 1: - set_float_rounding_mode(float_round_down, &env->sse_status); - break; - case 2: - set_float_rounding_mode(float_round_up, &env->sse_status); - break; - case 3: - set_float_rounding_mode(float_round_to_zero, &env->sse_status); - break; - } + set_x86_rounding_mode(mode & 3, &env->sse_status); } for (i = 0; i < 2 << SHIFT; i++) { @@ -1721,20 +1737,7 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, prev_rounding_mode = env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { - switch (mode & 3) { - case 0: - set_float_rounding_mode(float_round_nearest_even, &env->sse_status); - break; - case 1: - set_float_rounding_mode(float_round_down, &env->sse_status); - break; - case 2: - set_float_rounding_mode(float_round_up, &env->sse_status); - break; - case 3: - set_float_rounding_mode(float_round_to_zero, &env->sse_status); - break; - } + set_x86_rounding_mode(mode & 3, &env->sse_status); } for (i = 0; i < 1 << SHIFT; i++) { @@ -1759,20 +1762,7 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, prev_rounding_mode = env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { - switch (mode & 3) { - case 0: - set_float_rounding_mode(float_round_nearest_even, &env->sse_status); - break; - case 1: - set_float_rounding_mode(float_round_down, &env->sse_status); - break; - case 2: - set_float_rounding_mode(float_round_up, &env->sse_status); - break; - case 3: - set_float_rounding_mode(float_round_to_zero, &env->sse_status); - break; - } + set_x86_rounding_mode(mode & 3, &env->sse_status); } d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status); @@ -1797,20 +1787,7 @@ void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, prev_rounding_mode = env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { - switch (mode & 3) { - case 0: - set_float_rounding_mode(float_round_nearest_even, &env->sse_status); - break; - case 1: - set_float_rounding_mode(float_round_down, &env->sse_status); - break; - case 2: - set_float_rounding_mode(float_round_up, &env->sse_status); - break; - case 3: - set_float_rounding_mode(float_round_to_zero, &env->sse_status); - break; - } + set_x86_rounding_mode(mode & 3, &env->sse_status); } d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); @@ -2545,6 +2522,33 @@ void helper_vpermd_ymm(Reg *d, Reg *v, Reg *s) } #endif +/* FMA3 op helpers */ +#if SHIFT == 1 +#define SSE_HELPER_FMAS(name, elem, F) \ + void name(CPUX86State *env, Reg *d, Reg *a, Reg *b, Reg *c, int flags) \ + { \ + d->elem(0) = F(a->elem(0), b->elem(0), c->elem(0), flags, &env->sse_status); \ + } +#define SSE_HELPER_FMAP(name, elem, num, F) \ + void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *a, Reg *b, Reg *c, \ + int flags, int flip) \ + { \ + int i; \ + for (i = 0; i < num; i++) { \ + d->elem(i) = F(a->elem(i), b->elem(i), c->elem(i), flags, &env->sse_status); \ + flags ^= flip; \ + } \ + } + +SSE_HELPER_FMAS(helper_fma4ss, ZMM_S, float32_muladd) +SSE_HELPER_FMAS(helper_fma4sd, ZMM_D, float64_muladd) +#endif + +#if SHIFT >= 1 +SSE_HELPER_FMAP(helper_fma4ps, ZMM_S, 2 << SHIFT, float32_muladd) +SSE_HELPER_FMAP(helper_fma4pd, ZMM_D, 1 << SHIFT, float64_muladd) +#endif + #undef SSE_HELPER_S #undef LANE_WIDTH diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 2f1f811f9f..8a7b2f4e2f 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -353,6 +353,23 @@ DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_5(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, Reg, i32) #endif +/* F16C helpers */ +#if SHIFT >= 1 +DEF_HELPER_3(glue(cvtph2ps, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(cvtps2ph, SUFFIX), void, env, Reg, Reg, int) +#endif + +/* FMA3 helpers */ +#if SHIFT == 1 +DEF_HELPER_6(fma4ss, void, env, Reg, Reg, Reg, Reg, int) +DEF_HELPER_6(fma4sd, void, env, Reg, Reg, Reg, Reg, int) +#endif + +#if SHIFT >= 1 +DEF_HELPER_7(glue(fma4ps, SUFFIX), void, env, Reg, Reg, Reg, Reg, int, int) +DEF_HELPER_7(glue(fma4pd, SUFFIX), void, env, Reg, Reg, Reg, Reg, int, int) +#endif + /* AVX helpers */ #if SHIFT >= 1 DEF_HELPER_4(glue(vpermilpd, SUFFIX), void, env, Reg, Reg, Reg) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 8e1eb9db42..e4878b967f 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -336,6 +336,7 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = { [0x07] = X86_OP_ENTRY3(PHSUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), [0x10] = X86_OP_ENTRY2(PBLENDVB, V,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x, W,ph, vex11 cpuid(F16C) p_66), [0x14] = X86_OP_ENTRY2(BLENDVPS, V,x, W,x, vex4 cpuid(SSE41) p_66), [0x15] = X86_OP_ENTRY2(BLENDVPD, V,x, W,x, vex4 cpuid(SSE41) p_66), /* Listed incorrectly as type 4 */ @@ -375,6 +376,16 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = { [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x, H,x, M,d, vex12 cpuid(AVX2) p_66), /* vgatherdps/d */ [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x, H,x, M,q, vex12 cpuid(AVX2) p_66), /* vgatherqps/d */ + /* Should be exception type 2 but they do not have legacy SSE equivalents? */ + [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x08] = X86_OP_ENTRY3(PSIGNB, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), [0x09] = X86_OP_ENTRY3(PSIGNW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), [0x0a] = X86_OP_ENTRY3(PSIGND, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), @@ -420,6 +431,34 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = { [0x8c] = X86_OP_ENTRY3(VPMASKMOV, V,x, H,x, WM,x, vex6 cpuid(AVX2) p_66), [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x, V,x, H,x, vex6 cpuid(AVX2) p_66), + /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */ + [0x98] = X86_OP_ENTRY3(VFMADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x99] = X86_OP_ENTRY3(VFMADD132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9a] = X86_OP_ENTRY3(VFMSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0xa8] = X86_OP_ENTRY3(VFMADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xa9] = X86_OP_ENTRY3(VFMADD213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xaa] = X86_OP_ENTRY3(VFMSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xab] = X86_OP_ENTRY3(VFMSUB213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0xb8] = X86_OP_ENTRY3(VFMADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xb9] = X86_OP_ENTRY3(VFMADD231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xba] = X86_OP_ENTRY3(VFMSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xdb] = X86_OP_ENTRY3(VAESIMC, V,dq, None,None, W,dq, vex4 cpuid(AES) p_66), [0xdc] = X86_OP_ENTRY3(VAESENC, V,x, H,x, W,x, vex4 cpuid(AES) p_66), [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66), @@ -525,6 +564,7 @@ static const X86OpEntry opcodes_0F3A[256] = { [0x15] = X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE41) zext0 p_66), [0x16] = X86_OP_ENTRY3(PEXTR, E,y, V,dq, I,b, vex5 cpuid(SSE41) p_66), [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d, V,dq, I,b, vex5 cpuid(SSE41) p_66), + [0x1d] = X86_OP_ENTRY3(VCVTPS2PH, W,ph, V,x, I,b, vex11 cpuid(F16C) p_66), [0x20] = X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE41) zext2 p_66), [0x21] = X86_OP_GROUP0(VINSERTPS), @@ -1051,6 +1091,10 @@ static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot = s->vex_l ? MO_256 : MO_128; return true; + case X86_SIZE_ph: /* SSE/AVX packed half precision */ + *ot = s->vex_l ? MO_128 : MO_64; + return true; + case X86_SIZE_d64: /* Default to 64-bit in 64-bit mode */ *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag; return true; @@ -1342,6 +1386,10 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) switch (cpuid) { case X86_FEAT_None: return true; + case X86_FEAT_F16C: + return (s->cpuid_ext_features & CPUID_EXT_F16C); + case X86_FEAT_FMA: + return (s->cpuid_ext_features & CPUID_EXT_FMA); case X86_FEAT_MOVBE: return (s->cpuid_ext_features & CPUID_EXT_MOVBE); case X86_FEAT_PCLMULQDQ: diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index f159c26850..cb6b8bcf67 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -92,6 +92,7 @@ typedef enum X86OpSize { /* Custom */ X86_SIZE_d64, X86_SIZE_f64, + X86_SIZE_ph, /* SSE/AVX packed half precision */ } X86OpSize; typedef enum X86CPUIDFeature { @@ -103,6 +104,8 @@ typedef enum X86CPUIDFeature { X86_FEAT_AVX2, X86_FEAT_BMI1, X86_FEAT_BMI2, + X86_FEAT_F16C, + X86_FEAT_FMA, X86_FEAT_MOVBE, X86_FEAT_PCLMULQDQ, X86_FEAT_SSE, diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 27eca591a9..7037ff91c6 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -39,6 +39,11 @@ typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv val); typedef void (*SSEFunc_0_epppti)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr reg_c, TCGv a0, TCGv_i32 scale); +typedef void (*SSEFunc_0_eppppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 flags); +typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even, + TCGv_i32 odd); static inline TCGv_i32 tcg_constant8u_i32(uint8_t val) { @@ -296,7 +301,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv case X86_OP_MMX: break; case X86_OP_SSE: - if ((s->prefix & PREFIX_VEX) && op->ot == MO_128) { + if (!op->has_ea && (s->prefix & PREFIX_VEX) && op->ot <= MO_128) { tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[op->n].ZMM_X(1)), 16, 16, 0); @@ -491,6 +496,52 @@ FP_SSE(VMIN, min) FP_SSE(VDIV, div) FP_SSE(VMAX, max) +#define FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, even, odd) \ +static void gen_##uname##Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + SSEFunc_0_eppppii xmm = s->vex_w ? gen_helper_fma4pd_xmm : gen_helper_fma4ps_xmm; \ + SSEFunc_0_eppppii ymm = s->vex_w ? gen_helper_fma4pd_ymm : gen_helper_fma4ps_ymm; \ + SSEFunc_0_eppppii fn = s->vex_l ? ymm : xmm; \ + \ + fn(cpu_env, OP_PTR0, ptr0, ptr1, ptr2, \ + tcg_constant_i32(even), \ + tcg_constant_i32((even) ^ (odd))); \ +} + +#define FMA_SSE(uname, ptr0, ptr1, ptr2, flags) \ +FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, flags, flags) \ +static void gen_##uname##Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + SSEFunc_0_eppppi fn = s->vex_w ? gen_helper_fma4sd : gen_helper_fma4ss; \ + \ + fn(cpu_env, OP_PTR0, ptr0, ptr1, ptr2, \ + tcg_constant_i32(flags)); \ +} \ + +FMA_SSE(VFMADD231, OP_PTR1, OP_PTR2, OP_PTR0, 0) +FMA_SSE(VFMADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0) +FMA_SSE(VFMADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0) + +FMA_SSE(VFNMADD231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_product) +FMA_SSE(VFNMADD213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_product) +FMA_SSE(VFNMADD132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_product) + +FMA_SSE(VFMSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c) +FMA_SSE(VFMSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c) +FMA_SSE(VFMSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c) + +FMA_SSE(VFNMSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c|float_muladd_negate_product) +FMA_SSE(VFNMSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c|float_muladd_negate_product) +FMA_SSE(VFNMSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c|float_muladd_negate_product) + +FMA_SSE_PACKED(VFMADDSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c, 0) +FMA_SSE_PACKED(VFMADDSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c, 0) +FMA_SSE_PACKED(VFMADDSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c, 0) + +FMA_SSE_PACKED(VFMSUBADD231, OP_PTR1, OP_PTR2, OP_PTR0, 0, float_muladd_negate_c) +FMA_SSE_PACKED(VFMSUBADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0, float_muladd_negate_c) +FMA_SSE_PACKED(VFMSUBADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0, float_muladd_negate_c) + #define FP_UNPACK_SSE(uname, lname) \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ { \ @@ -852,6 +903,7 @@ UNARY_INT_SSE(VCVTTPD2DQ, cvttpd2dq) UNARY_INT_SSE(VCVTDQ2PS, cvtdq2ps) UNARY_INT_SSE(VCVTPS2DQ, cvtps2dq) UNARY_INT_SSE(VCVTTPS2DQ, cvttps2dq) +UNARY_INT_SSE(VCVTPH2PS, cvtph2ps) static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, @@ -1868,6 +1920,20 @@ static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec gen_helper_cvtsd2ss, gen_helper_cvtss2sd); } +static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_unary_imm_fp_sse(s, env, decode, + gen_helper_cvtps2ph_xmm, + gen_helper_cvtps2ph_ymm); + /* + * VCVTPS2PH is the only instruction that performs an operation on a + * register source and then *stores* into memory. + */ + if (decode->op[0].has_ea) { + gen_store_sse(s, decode, decode->op[0].offset); + } +} + static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index a6a90a1817..6f3741b635 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -32,7 +32,8 @@ #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) #define ST1 ST(1) -#define FPU_RC_MASK 0xc00 +#define FPU_RC_SHIFT 10 +#define FPU_RC_MASK (3 << FPU_RC_SHIFT) #define FPU_RC_NEAR 0x000 #define FPU_RC_DOWN 0x400 #define FPU_RC_UP 0x800 @@ -685,28 +686,26 @@ uint32_t helper_fnstcw(CPUX86State *env) return env->fpuc; } +static void set_x86_rounding_mode(unsigned mode, float_status *status) +{ + static FloatRoundMode x86_round_mode[4] = { + float_round_nearest_even, + float_round_down, + float_round_up, + float_round_to_zero + }; + assert(mode < ARRAY_SIZE(x86_round_mode)); + set_float_rounding_mode(x86_round_mode[mode], status); +} + void update_fp_status(CPUX86State *env) { - FloatRoundMode rnd_mode; + int rnd_mode; FloatX80RoundPrec rnd_prec; /* set rounding mode */ - switch (env->fpuc & FPU_RC_MASK) { - default: - case FPU_RC_NEAR: - rnd_mode = float_round_nearest_even; - break; - case FPU_RC_DOWN: - rnd_mode = float_round_down; - break; - case FPU_RC_UP: - rnd_mode = float_round_up; - break; - case FPU_RC_CHOP: - rnd_mode = float_round_to_zero; - break; - } - set_float_rounding_mode(rnd_mode, &env->fp_status); + rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; + set_x86_rounding_mode(rnd_mode, &env->fp_status); switch ((env->fpuc >> 8) & 3) { case 0: @@ -3038,11 +3037,8 @@ void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) /* XXX: optimize by storing fptt and fptags in the static cpu state */ #define SSE_DAZ 0x0040 -#define SSE_RC_MASK 0x6000 -#define SSE_RC_NEAR 0x0000 -#define SSE_RC_DOWN 0x2000 -#define SSE_RC_UP 0x4000 -#define SSE_RC_CHOP 0x6000 +#define SSE_RC_SHIFT 13 +#define SSE_RC_MASK (3 << SSE_RC_SHIFT) #define SSE_FZ 0x8000 void update_mxcsr_status(CPUX86State *env) @@ -3051,22 +3047,8 @@ void update_mxcsr_status(CPUX86State *env) int rnd_type; /* set rounding mode */ - switch (mxcsr & SSE_RC_MASK) { - default: - case SSE_RC_NEAR: - rnd_type = float_round_nearest_even; - break; - case SSE_RC_DOWN: - rnd_type = float_round_down; - break; - case SSE_RC_UP: - rnd_type = float_round_up; - break; - case SSE_RC_CHOP: - rnd_type = float_round_to_zero; - break; - } - set_float_rounding_mode(rnd_type, &env->sse_status); + rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; + set_x86_rounding_mode(rnd_type, &env->sse_status); /* Set exception flags. */ set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index e19d5c1c64..85be2e58c2 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -26,6 +26,7 @@ #include "tcg/tcg-op-gvec.h" #include "exec/cpu_ldst.h" #include "exec/translator.h" +#include "fpu/softfloat.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" diff --git a/tests/tcg/i386/test-avx.c b/tests/tcg/i386/test-avx.c index 953e2906fe..c39c0e5bce 100644 --- a/tests/tcg/i386/test-avx.c +++ b/tests/tcg/i386/test-avx.c @@ -28,6 +28,7 @@ typedef struct { } TestDef; reg_state initI; +reg_state initF16; reg_state initF32; reg_state initF64; @@ -221,6 +222,7 @@ static void run_all(void) #define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) +uint16_t val_f16[] = { 0x4000, 0xbc00, 0x44cd, 0x3a66, 0x4200, 0x7a1a, 0x4780, 0x4826 }; float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3}; double val_f64[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5}; v4di val_i64[] = { @@ -241,6 +243,12 @@ v4di indexd = {0x00000002000000efull, 0xfffffff500000010ull, v4di gather_mem[0x20]; +void init_f16reg(v4di *r) +{ + memset(r, 0, sizeof(*r)); + memcpy(r, val_f16, sizeof(val_f16)); +} + void init_f32reg(v4di *r) { static int n; @@ -315,6 +323,15 @@ int main(int argc, char *argv[]) printf("Int:\n"); dump_regs(&initI); + init_all(&initF16); + init_f16reg(&initF16.ymm[10]); + init_f16reg(&initF16.ymm[11]); + init_f16reg(&initF16.ymm[12]); + init_f16reg(&initF16.mem0[1]); + initF16.ff = 16; + printf("F16:\n"); + dump_regs(&initF16); + init_all(&initF32); init_f32reg(&initF32.ymm[10]); init_f32reg(&initF32.ymm[11]); diff --git a/tests/tcg/i386/test-avx.py b/tests/tcg/i386/test-avx.py index 02982329f1..d9ca00a49e 100755 --- a/tests/tcg/i386/test-avx.py +++ b/tests/tcg/i386/test-avx.py @@ -9,6 +9,7 @@ from fnmatch import fnmatch archs = [ "SSE", "SSE2", "SSE3", "SSSE3", "SSE4_1", "SSE4_2", "AES", "AVX", "AVX2", "AES+AVX", "VAES+AVX", + "F16C", "FMA", ] ignore = set(["FISTTP", @@ -19,6 +20,7 @@ imask = { 'vBLENDPS': 0x0f, 'CMP[PS][SD]': 0x07, 'VCMP[PS][SD]': 0x1f, + 'vCVTPS2PH': 0x7, 'vDPPD': 0x33, 'vDPPS': 0xff, 'vEXTRACTPS': 0x03, @@ -221,8 +223,10 @@ def ArgGenerator(arg, op): class InsnGenerator: def __init__(self, op, args): self.op = op - if op[-2:] in ["PS", "PD", "SS", "SD"]: - if op[-1] == 'S': + if op[-2:] in ["PH", "PS", "PD", "SS", "SD"]: + if op[-1] == 'H': + self.optype = 'F16' + elif op[-1] == 'S': self.optype = 'F32' else: self.optype = 'F64' |