diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-09-30 19:13:47 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-09-30 19:13:47 +0200 |
| commit | 7b67bbe6988d00d557d72c9867b99826320d1835 (patch) | |
| tree | c3f57cbd1749acebad85ddf296462103e0985efb /src/dynarec | |
| parent | 7692371ccbdb050fdb758ac56f80822881e89e00 (diff) | |
| download | box64-7b67bbe6988d00d557d72c9867b99826320d1835.tar.gz box64-7b67bbe6988d00d557d72c9867b99826320d1835.zip | |
Added PCLMULQDQ ([DYNAREC] Too, using PMULL if present) extension (improve a lot AES-XTS score of Geekbench 5)
Diffstat (limited to 'src/dynarec')
| -rwxr-xr-x | src/dynarec/arm64/arm64_emitter.h | 9 | ||||
| -rwxr-xr-x | src/dynarec/arm64/arm64_printer.c | 11 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 45 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_functions.c | 16 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_functions.h | 1 |
5 files changed, 81 insertions, 1 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index dcd6ee64..4b7d985a 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1764,7 +1764,7 @@ #define SQDMULHQ_16(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b01, Vm, Vn, Vd)) #define SQDMULHQ_32(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b10, Vm, Vn, Vd)) -// AES extensions +// AES extension #define AES_gen(D, Rn, Rd) (0b01001110<<24 | 0b00<<22 | 0b10100<<17 | 0b0010<<13 | (D)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) #define AESD(Vd, Vn) EMIT(AES_gen(1, Vn, Vd)) #define AESE(Vd, Vn) EMIT(AES_gen(0, Vn, Vd)) @@ -1773,4 +1773,11 @@ #define AESIMC(Vd, Vn) EMIT(AESMC_gen(1, Vn, Vd)) #define AESMC(Vd, Vn) EMIT(AESMC_gen(0, Vn, Vd)) +// PMULL extension is PMULL_128 +#define PMULL_gen(Q, size, Rm, Rn, Rd) (0<<31 | (Q)<<30 | 0b001110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (Rn)<<5 | (Rd)) +#define PMULL(Rd, Rn, Rm) EMIT(PMULL_gen(0, 0b00, Rm, Rn, Rd)) +#define PMULL2(Rd, Rn, Rm) EMIT(PMULL_gen(1, 0b00, Rm, Rn, Rd)) +#define PMULL_128(Rd, Rn, Rm) EMIT(PMULL_gen(0, 0b11, Rm, Rn, Rd)) +#define PMULL2_128(Rd, Rn, Rm) EMIT(PMULL_gen(1, 0b11, Rm, Rn, Rd)) + #endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index a156c00e..cccf33db 100755 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1351,6 +1351,17 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "AES%sMC V%d.16B, V%d.16B", sf?"I":"", Rd, Rn); return buff; } + // PMULL + if(isMask(opcode, "0Q001110ff1mmmmm111000nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "??", "??", "??", "??", "1D", "2D"}; + const char* Z[] = {"8H", "??", "??", "1Q"}; + int sz = sf; + const char* Vn = Y[(sz<<1)|a.Q]; + const char* Vd = Z[sz]; + snprintf(buff, sizeof(buff), "PMULL%s V%d.%s, V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn); + return buff; + } + // DMB ISH if(isMask(opcode, "11010101000000110011nnnn10111111", &a)) { snprintf(buff, sizeof(buff), "DMB %s", (Rn==0b1011)?"ISH":"???"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 4ff74907..19af27b9 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -676,6 +676,51 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } break; + case 0x44: + INST_NAME("PCLMULQDQ Gx, Ex, Ib"); + nextop = F8; + if(arm64_pmull) { + GETGX(q0, 1); + GETEX(q1, 0, 1); + u8 = F8; + switch (u8&0b00010001) { + case 0b00000000: + PMULL_128(q0, q0, q1); + break; + case 0b00010001: + PMULL2_128(q0, q0, q1); + break; + case 0b00000001: + VEXTQ_8(q0, q0, q0, 8); // Swap Up/Lower 64bits parts + PMULL_128(q0, q0, q1); + break; + case 0b00010000: + VEXTQ_8(q0, q0, q0, 8); // Swap Up/Lower 64bits parts + PMULL2_128(q0, q0, q1); + break; + } + } else { + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); // gx + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_forget_reg(dyn, ninst, ed); + MOV32w(x2, ed); + MOV32w(x3, 0); //p = NULL + } else { + MOV32w(x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0, 0, rex, NULL, 0, 1); + if(ed!=x3) { + MOVx_REG(x3, ed); + } + } + u8 = F8; + MOV32w(x4, u8); + CALL(arm_pclmul, -1); + } + break; + case 0xDF: INST_NAME("AESKEYGENASSIST Gx, Ex, Ib"); // AES-NI nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index a41dfad7..d51f5a3a 100755 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -332,6 +332,22 @@ void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) GX->ud[3] ^= u8; } +void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) +{ + sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex]; + sse_regs_t *GX = &emu->xmm[gx]; + int g = (u8&1)?1:0; + int e = (u8&0b10000)?1:0; + __int128 result = 0; + __int128 op2 = EX->q[e]; + for (int i=0; i<64; ++i) + if(GX->q[g]&(1LL<<i)) + result ^= (op2<<i); + + GX->q[0] = result&0xffffffffffffffffLL; + GX->q[1] = (result>>64)&0xffffffffffffffffLL; +} + void arm_clflush(x64emu_t* emu, void* p) { cleanDBFromAddressRange((uintptr_t)p, 8, 0); diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index a5e15a1e..7183fd6d 100755 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -35,6 +35,7 @@ void arm_aesdlast(x64emu_t* emu, int xmm); void arm_aeselast(x64emu_t* emu, int xmm); void arm_aesimc(x64emu_t* emu, int xmm); void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8); +void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8); void arm_clflush(x64emu_t* emu, void* p); |