diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-11-21 18:01:41 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-11-21 18:01:41 +0100 |
| commit | 4c7ac85ae885b61fe8c30daf9b7aba7886ee3f82 (patch) | |
| tree | 52cc6c97e8aaf1dafab4ab251b6642723e49a737 /src | |
| parent | afe6fbe3b330929bde1a36712346bda2ffc7d9f9 (diff) | |
| download | box64-4c7ac85ae885b61fe8c30daf9b7aba7886ee3f82.tar.gz box64-4c7ac85ae885b61fe8c30daf9b7aba7886ee3f82.zip | |
[ARM64_DYNAREC] Added full support for SHA cpu extension, using SHA1/SHA2 extension if present
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 38 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 57 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 248 |
3 files changed, 342 insertions, 1 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 23842f28..8a828d00 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1497,7 +1497,6 @@ #define VTRNQ1_32(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b10, Vm, 0, Vn, Vd)) #define VTRNQ1_16(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b01, Vm, 0, Vn, Vd)) #define VTRNQ1_8(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b00, Vm, 0, Vn, Vd)) -#define VSWP(Vd, Vn) VTRNQ1_64(Vd, Vn, Vn) #define VTRNQ2_64(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b11, Vm, 1, Vn, Vd)) #define VTRNQ2_32(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b10, Vm, 1, Vn, Vd)) #define VTRNQ2_16(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b01, Vm, 1, Vn, Vd)) @@ -2167,4 +2166,41 @@ #define CRC32CX(Wd, Wn, Xm) EMIT(CRC32C_gen(1, Xm, 0b11, Wn, Wd)) #define CRC32Cxw(Wd, Wn, Rm) EMIT(CRC32C_gen(rex.w, Rm, 0b10|rex.w, Wn, Wd)) +// SHA1 extension +#define SHA1H_gen(Rn, Rd) (0b01011110<<24 | 0b10100<<17 | 0b10<<10 | (Rn)<<5 | (Rd)) +// SHA1 fixed rotate (ROL 30 of 32bits value) +#define SHA1H(Sd, Sn) EMIT(SHA1H_gen(Sn, Sd)) + +#define SHA1SU1_gen(Rn, Rd) (0b01011110<<24 | 0b10100<<17 | 0b00001<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// SHA1 schedule update 1 +#define SHA1SU1(Vd, Vn) EMIT(SHA1SU1_gen(Vn, Vd)) + +#define SHA1C_gen(Rm, Rn, Rd) (0b01011110<<24 | (Rm)<<16 | (Rn)<<5 | (Rd)) +// SHA1 hash update (choose) +#define SHA1C(Qd, Sn, Vm) EMIT(SHA1C_gen(Vm, Sn, Qd)) + +#define SHA1M_gen(Rm, Rn, Rd) (0b01011110<<24 | (Rm)<<16 | 0b010<<12 | (Rn)<<5 | (Rd)) +// SHA1 hash update (majority) +#define SHA1M(Qd, Sn, Vm) EMIT(SHA1M_gen(Vm, Sn, Qd)) + +#define SHA1P_gen(Rm, Rn, Rd) (0b01011110<<24 | (Rm)<<16 | 0b001<<12 | (Rn)<<5 | (Rd)) +// SHA1 hash update (parity) +#define SHA1P(Qd, Sn, Vm) EMIT(SHA1P_gen(Vm, Sn, Qd)) + +#define SHA256SU0_gen(Rn,Rd) (0b01011110<<24 | 0b10100<<17 | 0b00010<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +//SHA256 schedule update 0 +#define SHA256SU0(Vd, Vn) EMIT(SHA256SU0_gen(Vn, Vd)) + +#define SHA256SU1_gen(Rm, Rn, Rd) (0b01011110<<24 | (Rm)<<16 | 0b110<<12 | (Rn)<<5 | (Rd)) +//SHA256 schedule update 1 +#define SHA256SU1(Vd, Vn, Vm) EMIT(SHA256SU1_gen(Vm, Vn, Vd)) + +#define SHA256H_gen(Rm, Rn, Rd) (0b01011110<<24 | (Rm)<<16 | 0b100<<12 | (Rn)<<5 | (Rd)) +//SHA256 hash update (part 1) +#define SHA256H(Vd, Vn, Vm) EMIT(SHA256H_gen(Vm, Vn, Vd)) + +#define SHA256H2_gen(Rm, Rn, Rd) (0b01011110<<24 | (Rm)<<16 | 0b101<<12 | (Rn)<<5 | (Rd)) +//SHA256 hash update (part 2) +#define SHA256H2(Vd, Vn, Vm) EMIT(SHA256H2_gen(Vm, Vn, Vd)) + #endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index f8d09c89..eca8c632 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1692,6 +1692,63 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "SETF%d %s", 8<<sf, Xt[Rn]); return buff; } + // REV64 + if(isMask(opcode, "0Q001110ff100000000010nnnnnddddd", &a)) { + const char* T[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"}; + int sz = sf; + const char* Vn = T[(sz<<1)|a.Q]; + const char* Vd = T[(sz<<1)|a.Q]; + snprintf(buff, sizeof(buff), "REV64 V%d.%s, V%d.%s",Rd, Vd, Rn, Vn); + return buff; + } + //TRNx + if(isMask(opcode, "0Q001110ff0mmmmm0o1010nnnnnddddd", &a)) { + const char* T[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + int sz = sf; + const char* Vn = T[(sz<<1)|a.Q]; + const char* Vm = T[(sz<<1)|a.Q]; + const char* Vd = T[(sz<<1)|a.Q]; + snprintf(buff, sizeof(buff), "TRN%d V%d.%s, V%d.%s, V%d.%s", a.o+1, Rd, Vd, Rn, Vn, Rm, Vm); + return buff; + } + //SHA1 stuffs + if(isMask(opcode, "0101111000101000000010nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "SHA1H S%d, S%d", Rd, Rn); + return buff; + } + if(isMask(opcode, "0101111000101000000110nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "SHA1SU1 V%d.4S, V%d.4S", Rd, Rn); + return buff; + } + if(isMask(opcode, "01011110000mmmmm000000nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "SHA1C Q%d, S%d, V%d.4S", Rd, Rn, Rm); + return buff; + } + if(isMask(opcode, "01011110000mmmmm001000nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "SHA1M Q%d, S%d, V%d.4S", Rd, Rn, Rm); + return buff; + } + if(isMask(opcode, "01011110000mmmmm000100nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "SHA1P Q%d, S%d, V%d.4S", Rd, Rn, Rm); + return buff; + } + //SHA256 stuffs + if(isMask(opcode, "0101111000101000001010nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "SHA256SU0 V%d.4S, V%d.4S", Rd, Rn); + return buff; + } + if(isMask(opcode, "01011110000mmmmm011000nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "SHA256SU1 V%d.4S, V%d.4S, V%d.4S", Rd, Rn, Rm); + return buff; + } + if(isMask(opcode, "01011110000mmmmm010000nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "SHA256H Q%d, Q%d, V%d.4S", Rd, Rn, Rm); + return buff; + } + if(isMask(opcode, "01011110000mmmmm010100nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "SHA256H2 Q%d, Q%d, V%d.4S", Rd, Rn, Rm); + return buff; + } snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); return buff; diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index b91128b9..7565c924 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -17,6 +17,7 @@ #include "dynarec_native.h" #include "my_cpuid.h" #include "emu/x87emu_private.h" +#include "emu/x64shaext.h" #include "arm64_printer.h" #include "dynarec_arm64_private.h" @@ -559,6 +560,178 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEM(q1, 0); ABS_32(q0, q1); break; + + case 0xC8: + INST_NAME("SHA1NEXTE Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + VEORQ(v0, v0, v0); + if(arm64_sha1) { + v1 = fpu_get_scratch(dyn); + VMOVeS(v1, 0, q0, 3); + SHA1H(v1, v1); + VMOVeS(v0, 3, v1, 0); + } else { + VMOVSto(x1, q0, 3); + RORw(x1, x1, 2); // i.e. ROL 30 + VMOVQSfrom(v0, 3, x1); + } + VADDQ_32(q0, v0, q1); + break; + case 0xC9: + INST_NAME("SHA1MSG1 Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + VEXTQ_8(v0, q1, q0, 8); + VEORQ(q0, q0, v0); + break; + case 0xCA: + INST_NAME("SHA1MSG2 Gx, Ex"); + nextop = F8; + if(arm64_sha1) { + GETGX(q0, 1); + GETEX(q1, 0, 0); + VEXTQ_8(q0, q0, q0, 8); + VREV64Q_32(q0, q0); + if(MODREG) { + if(q0==q1) + v0 = q0; + else { + v0 = fpu_get_scratch(dyn); + VEXTQ_8(v0, q1, q1, 8); + VREV64Q_32(v0, v0); + } + } else { + v0 = q1; + VEXTQ_8(v0, q1, q1, 8); + VREV64Q_32(v0, v0); + } + SHA1SU1(q0, v0); + VEXTQ_8(q0, q0, q0, 8); + VREV64Q_32(q0, q0); + } else { + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_reflect_reg(dyn, ninst, ed); + ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + if(wback!=x2) { + MOVx_REG(x2, wback); + } + } + GETG; + sse_forget_reg(dyn, ninst, gd); + ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd])); + CALL(sha1msg2, -1); + } + break; + case 0xCB: + INST_NAME("SHA256RNDS2 Gx, Ex (, XMM0)"); + nextop = F8; + if(arm64_sha2) { + GETGX(q0, 1); + GETEX(q1, 0, 0); + d0 = sse_get_reg(dyn, ninst, x1, 0, 0); + v0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + if(MODREG) { + v1 = fpu_get_scratch(dyn); + } else + v1 = q1; + VREV64Q_32(q0, q0); + VREV64Q_32(v1, q1); + VZIP1Q_64(v0, v1, q0); + VZIP2Q_64(v1, v1, q0); + SHA256H(v1, v0, d0); + VREV64Q_32(d1, q1); + VZIP2Q_64(d1, d1, q0); + SHA256H2(v0, d1, d0); + VZIP2Q_64(q0, v0, v1); + VREV64Q_32(q0, q0); + } else { + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_reflect_reg(dyn, ninst, ed); + ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + if(wback!=x2) { + MOVx_REG(x2, wback); + } + } + GETG; + sse_forget_reg(dyn, ninst, gd); + ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd])); + sse_reflect_reg(dyn, ninst, 0); + CALL(sha256rnds2, -1); + } + break; + case 0xCC: + INST_NAME("SHA256MSG1 Gx, Ex"); + nextop = F8; + if(arm64_sha2) { + GETGX(q0, 1); + GETEX(q1, 0, 0); + SHA256SU0(q0, q1); + } else { + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_reflect_reg(dyn, ninst, ed); + ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + if(wback!=x2) { + MOVx_REG(x2, wback); + } + } + GETG; + sse_forget_reg(dyn, ninst, gd); + ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd])); + CALL(sha256msg1, -1); + } + break; + case 0xCD: + INST_NAME("SHA256MSG2 Gx, Ex"); + nextop = F8; + if(arm64_sha2) { + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VEORQ(v1, v1, v1); + VMOVQ(v0, q0); + SHA256SU1(v0, v1, q1); // low v0 are ok + VTRNQ1_64(v0, v0, v0); // duplicate low to hi + VEXTQ_8(d0, q0, q0, 8); // swap high/low + SHA256SU1(d0, v1, v0); // low is destination high + VEXTQ_8(q0, v0, d0, 8); + } else { + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_reflect_reg(dyn, ninst, ed); + ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + if(wback!=x2) { + MOVx_REG(x2, wback); + } + } + GETG; + sse_forget_reg(dyn, ninst, gd); + ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd])); + CALL(sha256msg2, -1); + } + break; + case 0xF0: INST_NAME("MOVBE Gd, Ed"); nextop=F8; @@ -610,6 +783,81 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VEXT_8(q0, q1, q0, u8); } break; + + case 0xCC: + INST_NAME("SHA1RNDS4 Gx, Ex, Ib"); + nextop = F8; + if(arm64_sha1) { + GETGX(q0, 1); + GETEX(q1, 0, 1); + u8 = F8&3; + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + v0 = fpu_get_scratch(dyn); + VEXTQ_8(v0, q0, q0, 8); + VREV64Q_32(v0, v0); + VEORQ(d1, d1, d1); + if(MODREG) { + if(q0==q1) + v1 = v0; + else + v1 = fpu_get_scratch(dyn); + } else + v1 = q1; + if(v1!=v0) { + VEXTQ_8(v1, q1, q1, 8); + VREV64Q_32(v1, v1); + } + switch(u8) { + case 0: + MOV32w(x1, 0x5A827999); + VDUPQS(d0, x1); + VADDQ_32(v1, v1, d0); + SHA1C(v0, d1, v1); + break; + case 1: + MOV32w(x1, 0x6ED9EBA1); + VDUPQS(d0, x1); + VADDQ_32(v1, v1, d0); + SHA1P(v0, d1, v1); + break; + case 2: + MOV32w(x1, 0X8F1BBCDC); + VDUPQS(d0, x1); + VADDQ_32(v1, v1, d0); + SHA1M(v0, d1, v1); + break; + case 3: + MOV32w(x1, 0xCA62C1D6); + VDUPQS(d0, x1); + VADDQ_32(v1, v1, d0); + SHA1P(v0, d1, v1); + break; + } + VREV64Q_32(v0, v0); + VEXTQ_8(q0, v0, v0, 8); + break; + } else { + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_reflect_reg(dyn, ninst, ed); + ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1); + if(wback!=x2) { + MOVx_REG(x2, wback); + } + } + u8 = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd])); + MOV32w(x3, u8); + CALL(sha1rnds4, -1); + } + break; + default: DEFAULT; } |