diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-06 13:47:47 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-06 13:47:47 +0200 |
| commit | 0cedc70eb36e0c136bfa90be1e275c8792052195 (patch) | |
| tree | 266012e48b5960bd8b3b21e88b705d587b05530a /src | |
| parent | 1612687acfd6200515fc38fd46180f09f5a41661 (diff) | |
| download | box64-0cedc70eb36e0c136bfa90be1e275c8792052195.tar.gz box64-0cedc70eb36e0c136bfa90be1e275c8792052195.zip | |
[ARM64_DYNAREC] Added a new small batch of AVX/BMI2 opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/core.c | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_0f38.c | 100 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 49 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c | 18 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f20f.c | 14 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f30f.c | 23 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 2 | ||||
| -rw-r--r-- | src/include/debug.h | 1 | ||||
| -rw-r--r-- | src/tools/my_cpuid.c | 3 |
10 files changed, 207 insertions, 10 deletions
diff --git a/src/core.c b/src/core.c index 01e40fd9..6391b302 100644 --- a/src/core.c +++ b/src/core.c @@ -92,6 +92,7 @@ int arm64_aes = 0; int arm64_pmull = 0; int arm64_crc32 = 0; int arm64_atomics = 0; +int arm64_asimdhp = 0; int arm64_sha1 = 0; int arm64_sha2 = 0; int arm64_uscat = 0; @@ -404,6 +405,8 @@ HWCAP2_ECV // ATOMIC use are disable for now. They crashes Batman Arkham Knight, bossibly other (also seems to make steamwebhelper unstable) if(hwcap&HWCAP_ATOMICS) arm64_atomics = 1; + if(hwcap&HWCAP_ASIMDHP) + arm64_asimdhp = 1; #ifdef HWCAP_SHA1 if(hwcap&HWCAP_SHA1) arm64_sha1 = 1; @@ -442,6 +445,8 @@ HWCAP2_ECV printf_log(LOG_INFO, " PMULL"); if(arm64_atomics) printf_log(LOG_INFO, " ATOMICS"); + if(arm64_asimdhp) + printf_log(LOG_INFO, " ASIMDHP"); if(arm64_sha1) printf_log(LOG_INFO, " SHA1"); if(arm64_sha2) diff --git a/src/dynarec/arm64/dynarec_arm64_avx.c b/src/dynarec/arm64/dynarec_arm64_avx.c index 4a89afd1..ec00dddd 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx.c +++ b/src/dynarec/arm64/dynarec_arm64_avx.c @@ -51,6 +51,8 @@ uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ni if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_NONE)) addr = dynarec64_AVX_0F(dyn, addr, ip, ninst, vex, ok, need_epilog); + else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_NONE)) + addr = dynarec64_AVX_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog); else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_66)) addr = dynarec64_AVX_66_0F(dyn, addr, ip, ninst, vex, ok, need_epilog); else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_F2)) diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c new file mode 100644 index 00000000..8894405d --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c @@ -0,0 +1,100 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "my_cpuid.h" +#include "emu/x87emu_private.h" +#include "emu/x64shaext.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog) +{ + (void)ip; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop, u8; + uint8_t gd, ed, vd; + uint8_t wback, wb1, wb2; + uint8_t eb1, eb2, gb1, gb2; + int32_t i32, i32_; + int cacheupd = 0; + int v0, v1, v2; + int q0, q1, q2; + int d0, d1, d2; + int s0; + uint64_t tmp64u; + int64_t j64; + int64_t fixedaddress; + int unscaled; + MAYUSE(wb1); + MAYUSE(wb2); + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(q0); + MAYUSE(q1); + MAYUSE(d0); + MAYUSE(d1); + MAYUSE(s0); + MAYUSE(j64); + MAYUSE(cacheupd); + + rex_t rex = vex.rex; + + switch(opcode) { + + case 0xF5: + INST_NAME("BZHI Gd, Ed, Vd"); + nextop = F8; + SETFLAGS(X_ALL, SF_SET); + GETGD; + GETED(0); + GETVD; + UXTBw(x1, vd); + CMPSw_U12(x1, rex.w?64:32); + CSETxw(x2, cPL); + IFX(F_CF) { + BFIw(xFlags, x2, F_CF, 1); + } + MVNxw_REG(x2, x2); //prepare mask + B_MARK(cPL); + LSLxw_REG(x2, x2, x1); + MARK; + IFX(X_ZF) { + BICSxw(gd, ed, x2); + CSETw(x3, cEQ); + BFIw(xFlags, x3, F_ZF, 1); + } else + BICxw(gd, ed, x2); + IFX(X_SF) { + LSRxw(x3, gd, rex.w?63:31); + BFIw(xFlags, x3, F_SF, 1); + } + IFX(X_AF) BFCw(xFlags, F_AF, 1); + IFX(X_PF) BFCw(xFlags, F_PF, 1); + IFX(X_OF) BFCw(xFlags, F_OF, 1); + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 783b77c2..a2a45435 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -840,6 +840,23 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip YMM0(gd); break; + case 0x9D: + INST_NAME("VFNMADD132SS/D Gx, Vx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETVX(v2, 0); + if(rex.w) {GETEXSD(v1, 0, 0);} else {GETEXSS(v1, 0, 0);} + q0 = fpu_get_scratch(dyn, ninst); + if(rex.w) { + FMSUB_64(q0, v2, v1, v0); + VMOVeD(v0, 0, q0, 0); + } else { + FMSUB_32(q0, v2, v1, v0); + VMOVeS(v0, 0, q0, 0); + } + YMM0(gd); + break; + case 0xA8: INST_NAME("VFMADD213PS/D Gx, Vx, Ex"); nextop = F8; @@ -886,7 +903,39 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } if(!vex.l) YMM0(gd); break; + case 0xAB: + INST_NAME("VFMSUB213SS/D Gx, Vx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETVX(v2, 0); + if(rex.w) {GETEXSD(v1, 0, 0);} else {GETEXSS(v1, 0, 0);} + q0 = fpu_get_scratch(dyn, ninst); + if(rex.w) { + FNMSUB_64(q0, v1, v0, v2); + VMOVeD(v0, 0, q0, 0); + } else { + FNMSUB_32(q0, v1, v0, v2); + VMOVeS(v0, 0, q0, 0); + } + YMM0(gd); + break; + case 0xAD: + INST_NAME("VFMNADD213SS/D Gx, Vx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETVX(v2, 0); + if(rex.w) {GETEXSD(v1, 0, 0);} else {GETEXSS(v1, 0, 0);} + q0 = fpu_get_scratch(dyn, ninst); + if(rex.w) { + FMSUB_64(q0, v1, v0, v2); + VMOVeD(v0, 0, q0, 0); + } else { + FMSUB_32(q0, v1, v0, v2); + VMOVeS(v0, 0, q0, 0); + } + YMM0(gd); + break; case 0xAE: INST_NAME("VFNMSUB213PS/D Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c index 8e2ed65c..08a04825 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c @@ -448,15 +448,15 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, GETEXSS(v1, 0, 1); GETGX_empty_VX(v0, v2); u8 = F8; - if(((u8&15)!=12) && ((u8&15)!=15)) { - if(((u8&15)==12)||((u8&15)==13)||((u8&15)==9)||((u8&15)==10)) + if(((u8&15)!=11) && ((u8&15)!=15)) { + if((u8&15)>7) FCMPS(v1, v2); else FCMPS(v2, v1); } // TODO: create a test for this one, there might be an issue with cases 9, 10 and 13 if(v0!=v2) VMOVQ(v0, v2); - switch(u8&7) { + switch(u8&15) { case 0x00: CSETMw(x2, cEQ); break; // Equal case 0x01: CSETMw(x2, cCC); break; // Less than case 0x02: CSETMw(x2, cLS); break; // Less or equal @@ -465,13 +465,13 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, case 0x05: CSETMw(x2, cCS); break; // Greater or equal or unordered case 0x06: CSETMw(x2, cHI); break; // Greater or unordered case 0x07: CSETMw(x2, cVC); break; // not NaN - case 0x08: CSETMw(x2, cEQ); CSETMw(x3, cVS); ORRw_REG(x2, x2, x3); break; // Equal than or ordered - case 0x09: CSETMw(x2, cCS); break; // Less than or unordered - case 0x0a: CSETMw(x2, cHI); break; // Less or equal or unordered + case 0x08: CSETMw(x2, cEQ); CSETMw(x3, cVS); ORRw_REG(x2, x2, x3); break; // Equal or unordered + case 0x09: CSETMw(x2, cHI); break; // Less than or unordered + case 0x0a: CSETMw(x2, cCS); break; // Less or equal or unordered case 0x0b: MOV32w(x2, 0); break; // false - case 0x0c: CSETMw(x2, cNE); CSETMw(x3, cVC); ANDw_REG(x2, x2, x3); break; // Not Equal not unordered - case 0x0d: CSETMw(x2, cCC); break; // Greater or equal not unordered - case 0x0e: CSETMw(x2, cLS); break; // Greater not unordered + case 0x0c: CSETMw(x2, cNE); CSETMw(x3, cVS); BICw(x2, x2, x3); break; // Not Equal not unordered + case 0x0d: CSETMw(x2, cLS); break; // Greater or equal not unordered + case 0x0e: CSETMw(x2, cCC); break; // Greater not unordered case 0x0f: MOV32w(x2, 0xffffffff); break; // true } VMOVQSfrom(v0, 0, x2); diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index d7eefb17..8c0cb3b9 100644 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -413,6 +413,20 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VUZP2Q_32(v0, v0, v1); VFSUBQS(v0, d0, v0); break; + + case 0xAE: + nextop = F8; + switch((nextop>>3)&7) { + case 6: + INST_NAME("(unsupported) UWAIT Ed"); + FAKEED; + UDF(0); + break; + default: + DEFAULT; + } + break; + case 0xC2: INST_NAME("CMPSD Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index a3144f21..7787e4d8 100644 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -410,6 +410,29 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } break; + case 0xAE: + nextop = F8; + switch((nextop>>3)&7) { + case 2: + INST_NAME("(unsupported) WRFSBASE Ed"); + FAKEED; + UDF(0); + break; + case 3: + INST_NAME("(unsupported) WRGSBASE Ed"); + FAKEED; + UDF(0); + break; + case 6: + INST_NAME("(unsupported) UMONITOR Ed"); + FAKEED; + UDF(0); + break; + default: + DEFAULT; + } + break; + case 0xB8: INST_NAME("POPCNT Gd, Ed"); SETFLAGS(X_ALL, SF_SET); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index cb0e181c..34822661 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -1181,6 +1181,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define dynarec64_F30F STEPNAME(dynarec64_F30F) #define dynarec64_AVX STEPNAME(dynarec64_AVX) #define dynarec64_AVX_0F STEPNAME(dynarec64_AVX_0F) +#define dynarec64_AVX_0F38 STEPNAME(dynarec64_AVX_0F38) #define dynarec64_AVX_66_0F STEPNAME(dynarec64_AVX_66_0F) #define dynarec64_AVX_F2_0F STEPNAME(dynarec64_AVX_F2_0F) #define dynarec64_AVX_F3_0F STEPNAME(dynarec64_AVX_F3_0F) @@ -1608,6 +1609,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); +uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); diff --git a/src/include/debug.h b/src/include/debug.h index be48b59e..52429f47 100644 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -40,6 +40,7 @@ extern int arm64_aes; extern int arm64_pmull; extern int arm64_crc32; extern int arm64_atomics; +extern int arm64_asimdhp; extern int arm64_sha1; extern int arm64_sha2; extern int arm64_uscat; diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c index 23f3d19b..0b71e4f7 100644 --- a/src/tools/my_cpuid.c +++ b/src/tools/my_cpuid.c @@ -264,6 +264,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) | 1<<26 // xsave | 1<<27 // osxsave | box64_avx<<28 // AVX + | box64_avx<<29 // F16C ; break; case 0x2: // TLB and Cache info. Sending 1st gen P4 info... @@ -325,7 +326,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) box64_avx<<3 | // BMI1 box64_avx2<<5 | //AVX2 box64_avx2<<8 | //BMI2 - box64_avx2<<9 | //VAES + box64_avx<<9 | //VAES box64_avx2<<19 | //ADX 1<<29| // SHA extension 0; |