diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-09-30 19:13:47 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-09-30 19:13:47 +0200 |
| commit | 7b67bbe6988d00d557d72c9867b99826320d1835 (patch) | |
| tree | c3f57cbd1749acebad85ddf296462103e0985efb /src | |
| parent | 7692371ccbdb050fdb758ac56f80822881e89e00 (diff) | |
| download | box64-7b67bbe6988d00d557d72c9867b99826320d1835.tar.gz box64-7b67bbe6988d00d557d72c9867b99826320d1835.zip | |
Added PCLMULQDQ ([DYNAREC] Too, using PMULL if present) extension (improve a lot AES-XTS score of Geekbench 5)
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64/arm64_emitter.h | 9 | ||||
| -rwxr-xr-x | src/dynarec/arm64/arm64_printer.c | 11 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 45 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_functions.c | 16 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_functions.h | 1 | ||||
| -rw-r--r-- | src/emu/x64run660f.c | 19 | ||||
| -rwxr-xr-x | src/tools/box64stack.c | 18 | ||||
| -rw-r--r-- | src/tools/my_cpuid.c | 1 | ||||
| -rwxr-xr-x | src/wrapped/wrappedlibc.c | 2 |
9 files changed, 114 insertions, 8 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index dcd6ee64..4b7d985a 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1764,7 +1764,7 @@ #define SQDMULHQ_16(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b01, Vm, Vn, Vd)) #define SQDMULHQ_32(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b10, Vm, Vn, Vd)) -// AES extensions +// AES extension #define AES_gen(D, Rn, Rd) (0b01001110<<24 | 0b00<<22 | 0b10100<<17 | 0b0010<<13 | (D)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) #define AESD(Vd, Vn) EMIT(AES_gen(1, Vn, Vd)) #define AESE(Vd, Vn) EMIT(AES_gen(0, Vn, Vd)) @@ -1773,4 +1773,11 @@ #define AESIMC(Vd, Vn) EMIT(AESMC_gen(1, Vn, Vd)) #define AESMC(Vd, Vn) EMIT(AESMC_gen(0, Vn, Vd)) +// PMULL extension is PMULL_128 +#define PMULL_gen(Q, size, Rm, Rn, Rd) (0<<31 | (Q)<<30 | 0b001110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (Rn)<<5 | (Rd)) +#define PMULL(Rd, Rn, Rm) EMIT(PMULL_gen(0, 0b00, Rm, Rn, Rd)) +#define PMULL2(Rd, Rn, Rm) EMIT(PMULL_gen(1, 0b00, Rm, Rn, Rd)) +#define PMULL_128(Rd, Rn, Rm) EMIT(PMULL_gen(0, 0b11, Rm, Rn, Rd)) +#define PMULL2_128(Rd, Rn, Rm) EMIT(PMULL_gen(1, 0b11, Rm, Rn, Rd)) + #endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index a156c00e..cccf33db 100755 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1351,6 +1351,17 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "AES%sMC V%d.16B, V%d.16B", sf?"I":"", Rd, Rn); return buff; } + // PMULL + if(isMask(opcode, "0Q001110ff1mmmmm111000nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "??", "??", "??", "??", "1D", "2D"}; + const char* Z[] = {"8H", "??", "??", "1Q"}; + int sz = sf; + const char* Vn = Y[(sz<<1)|a.Q]; + const char* Vd = Z[sz]; + snprintf(buff, sizeof(buff), "PMULL%s V%d.%s, V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn); + return buff; + } + // DMB ISH if(isMask(opcode, "11010101000000110011nnnn10111111", &a)) { snprintf(buff, sizeof(buff), "DMB %s", (Rn==0b1011)?"ISH":"???"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 4ff74907..19af27b9 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -676,6 +676,51 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } break; + case 0x44: + INST_NAME("PCLMULQDQ Gx, Ex, Ib"); + nextop = F8; + if(arm64_pmull) { + GETGX(q0, 1); + GETEX(q1, 0, 1); + u8 = F8; + switch (u8&0b00010001) { + case 0b00000000: + PMULL_128(q0, q0, q1); + break; + case 0b00010001: + PMULL2_128(q0, q0, q1); + break; + case 0b00000001: + VEXTQ_8(q0, q0, q0, 8); // Swap Up/Lower 64bits parts + PMULL_128(q0, q0, q1); + break; + case 0b00010000: + VEXTQ_8(q0, q0, q0, 8); // Swap Up/Lower 64bits parts + PMULL2_128(q0, q0, q1); + break; + } + } else { + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); // gx + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_forget_reg(dyn, ninst, ed); + MOV32w(x2, ed); + MOV32w(x3, 0); //p = NULL + } else { + MOV32w(x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0, 0, rex, NULL, 0, 1); + if(ed!=x3) { + MOVx_REG(x3, ed); + } + } + u8 = F8; + MOV32w(x4, u8); + CALL(arm_pclmul, -1); + } + break; + case 0xDF: INST_NAME("AESKEYGENASSIST Gx, Ex, Ib"); // AES-NI nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index a41dfad7..d51f5a3a 100755 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -332,6 +332,22 @@ void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) GX->ud[3] ^= u8; } +void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) +{ + sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex]; + sse_regs_t *GX = &emu->xmm[gx]; + int g = (u8&1)?1:0; + int e = (u8&0b10000)?1:0; + __int128 result = 0; + __int128 op2 = EX->q[e]; + for (int i=0; i<64; ++i) + if(GX->q[g]&(1LL<<i)) + result ^= (op2<<i); + + GX->q[0] = result&0xffffffffffffffffLL; + GX->q[1] = (result>>64)&0xffffffffffffffffLL; +} + void arm_clflush(x64emu_t* emu, void* p) { cleanDBFromAddressRange((uintptr_t)p, 8, 0); diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index a5e15a1e..7183fd6d 100755 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -35,6 +35,7 @@ void arm_aesdlast(x64emu_t* emu, int xmm); void arm_aeselast(x64emu_t* emu, int xmm); void arm_aesimc(x64emu_t* emu, int xmm); void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8); +void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8); void arm_clflush(x64emu_t* emu, void* p); diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c index f182e34f..7561fff2 100644 --- a/src/emu/x64run660f.c +++ b/src/emu/x64run660f.c @@ -797,6 +797,25 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GX->f[i] = (tmp8u&(1<<i))?tmpf:0.0f; break; + case 0x44: /* PCLMULQDQ Gx, Ex, Ib */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + { + int g = (tmp8u&1)?1:0; + int e = (tmp8u&0b10000)?1:0; + __int128 result = 0; + __int128 op2 = EX->q[e]; + for (int i=0; i<64; ++i) + if(GX->q[g]&(1LL<<i)) + result ^= (op2<<i); + + GX->q[0] = result&0xffffffffffffffffLL; + GX->q[1] = (result>>64)&0xffffffffffffffffLL; + } + break; + case 0xDF: // AESKEYGENASSIST Gx, Ex, u8 nextop = F8; GETEX(1); diff --git a/src/tools/box64stack.c b/src/tools/box64stack.c index a46fcbd6..ff0de001 100755 --- a/src/tools/box64stack.c +++ b/src/tools/box64stack.c @@ -155,12 +155,18 @@ void SetupInitialStack(x64emu_t *emu) Push(emu, real_getauxval(13)); Push(emu, 13); //AT_GID(13) Push(emu, real_getauxval(14)); Push(emu, 14); //AT_EGID(14) Push(emu, p_x86_64); Push(emu, 15); //AT_PLATFORM(15)=&"x86_64" - // Push HWCAP: - // FPU: 1<<0 ; VME: 1<<1 ; DE : 1<<2 ; PSE: 1<<3 ; TSC: 1<<4 ; MSR: 1<<5 ; PAE: 1<<6 ; MCE: 1<<7 - // CX8: 1<<8 ; APIC:1<<9 ; SEP: 1<<11; MTRR:1<<12; PGE: 1<<13; MCA: 1<<14; CMOV:1<<15 - // FCMOV:1<<16; ; MMX: 1<<23 - // OSFXR:1<<24; XMM: 1<<25;XMM2: 1<<26; AMD3D:1<<31 - Push(emu, (1<<0) | (1<<1) | (1<<2) | (1<<3) | (1<<4) | (1<<8) | (1<<15) | (1<<16) | (1<<23) | (1<<25) | (1<<26)); + // Push HWCAP: same as CPUID 1.EDX + Push(emu, 1 // fpu + | 1<<4 // rdtsc + | 1<<8 // cmpxchg8 + | 1<<11 // sep (sysenter & sysexit) + | 1<<15 // cmov + | 1<<19 // clflush (seems to be with SSE2) + | 1<<23 // mmx + | 1<<24 // fxsr (fxsave, fxrestore) + | 1<<25 // SSE + | 1<<26 // SSE2 + ); Push(emu, 16); //AT_HWCAP(16)=... //Push(emu, sysconf(_SC_CLK_TCK)); Push(emu, 17); //AT_CLKTCK(17)=times() frequency Push(emu, real_getauxval(23)); Push(emu, 23); //AT_SECURE(23) diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c index 1c1a1871..7ab19db0 100644 --- a/src/tools/my_cpuid.c +++ b/src/tools/my_cpuid.c @@ -61,6 +61,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) | 1<<26 // SSE2 ; R_ECX = 1<<0 // SSE3 + | 1<<1 // PCLMULQDQ | 1<<9 // SSSE3 | 1<<12 // fma | 1<<13 // cx16 (cmpxchg16) diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c index ad6e804a..37a6b175 100755 --- a/src/wrapped/wrappedlibc.c +++ b/src/wrapped/wrappedlibc.c @@ -1392,7 +1392,7 @@ void CreateCPUInfoFile(int fd) P; sprintf(buff, "bogomips\t: %g\n", bogoMips); P; - sprintf(buff, "flags\t\t: fpu cx8 sep cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fma fxsr cpuid cx16 aes movbe pni\n"); + sprintf(buff, "flags\t\t: fpu cx8 sep cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fma fxsr cpuid pclmulqdq cx16 aes movbe pni\n"); P; sprintf(buff, "address sizes\t: 46 bits physical, 48 bits virtual\n"); P; |