about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-11-21 18:01:41 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-11-21 18:01:41 +0100
commit4c7ac85ae885b61fe8c30daf9b7aba7886ee3f82 (patch)
tree52cc6c97e8aaf1dafab4ab251b6642723e49a737 /src
parentafe6fbe3b330929bde1a36712346bda2ffc7d9f9 (diff)
downloadbox64-4c7ac85ae885b61fe8c30daf9b7aba7886ee3f82.tar.gz
box64-4c7ac85ae885b61fe8c30daf9b7aba7886ee3f82.zip
[ARM64_DYNAREC] Added full support for SHA cpu extension, using SHA1/SHA2 extension if present
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h38
-rw-r--r--src/dynarec/arm64/arm64_printer.c57
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c248
3 files changed, 342 insertions, 1 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 23842f28..8a828d00 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1497,7 +1497,6 @@
 #define VTRNQ1_32(Vd, Vn, Vm)       EMIT(TRN_gen(1, 0b10, Vm, 0, Vn, Vd))
 #define VTRNQ1_16(Vd, Vn, Vm)       EMIT(TRN_gen(1, 0b01, Vm, 0, Vn, Vd))
 #define VTRNQ1_8(Vd, Vn, Vm)        EMIT(TRN_gen(1, 0b00, Vm, 0, Vn, Vd))
-#define VSWP(Vd, Vn)                VTRNQ1_64(Vd, Vn, Vn)
 #define VTRNQ2_64(Vd, Vn, Vm)       EMIT(TRN_gen(1, 0b11, Vm, 1, Vn, Vd))
 #define VTRNQ2_32(Vd, Vn, Vm)       EMIT(TRN_gen(1, 0b10, Vm, 1, Vn, Vd))
 #define VTRNQ2_16(Vd, Vn, Vm)       EMIT(TRN_gen(1, 0b01, Vm, 1, Vn, Vd))
@@ -2167,4 +2166,41 @@
 #define CRC32CX(Wd, Wn, Xm)         EMIT(CRC32C_gen(1, Xm, 0b11, Wn, Wd))
 #define CRC32Cxw(Wd, Wn, Rm)        EMIT(CRC32C_gen(rex.w, Rm, 0b10|rex.w, Wn, Wd))
 
+// SHA1 extension
+#define SHA1H_gen(Rn, Rd)       (0b01011110<<24 | 0b10100<<17 | 0b10<<10 | (Rn)<<5 | (Rd))
+// SHA1 fixed rotate (ROL 30 of 32bits value)
+#define SHA1H(Sd, Sn)           EMIT(SHA1H_gen(Sn, Sd))
+
+#define SHA1SU1_gen(Rn, Rd)     (0b01011110<<24 | 0b10100<<17 | 0b00001<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
+// SHA1 schedule update 1
+#define SHA1SU1(Vd, Vn)         EMIT(SHA1SU1_gen(Vn, Vd))
+
+#define SHA1C_gen(Rm, Rn, Rd)   (0b01011110<<24 | (Rm)<<16 | (Rn)<<5 | (Rd))
+// SHA1 hash update (choose)
+#define SHA1C(Qd, Sn, Vm)       EMIT(SHA1C_gen(Vm, Sn, Qd))
+
+#define SHA1M_gen(Rm, Rn, Rd)   (0b01011110<<24 | (Rm)<<16 | 0b010<<12 | (Rn)<<5 | (Rd))
+// SHA1 hash update (majority)
+#define SHA1M(Qd, Sn, Vm)       EMIT(SHA1M_gen(Vm, Sn, Qd))
+
+#define SHA1P_gen(Rm, Rn, Rd)   (0b01011110<<24 | (Rm)<<16 | 0b001<<12 | (Rn)<<5 | (Rd))
+// SHA1 hash update (parity)
+#define SHA1P(Qd, Sn, Vm)       EMIT(SHA1P_gen(Vm, Sn, Qd))
+
+#define SHA256SU0_gen(Rn,Rd)    (0b01011110<<24 | 0b10100<<17 | 0b00010<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
+//SHA256 schedule update 0
+#define SHA256SU0(Vd, Vn)       EMIT(SHA256SU0_gen(Vn, Vd))
+
+#define SHA256SU1_gen(Rm, Rn, Rd)   (0b01011110<<24 | (Rm)<<16 | 0b110<<12 | (Rn)<<5 | (Rd))
+//SHA256 schedule update 1
+#define SHA256SU1(Vd, Vn, Vm)       EMIT(SHA256SU1_gen(Vm, Vn, Vd))
+
+#define SHA256H_gen(Rm, Rn, Rd) (0b01011110<<24 | (Rm)<<16 | 0b100<<12 | (Rn)<<5 | (Rd))
+//SHA256 hash update (part 1)
+#define SHA256H(Vd, Vn, Vm)     EMIT(SHA256H_gen(Vm, Vn, Vd))
+
+#define SHA256H2_gen(Rm, Rn, Rd)    (0b01011110<<24 | (Rm)<<16 | 0b101<<12 | (Rn)<<5 | (Rd))
+//SHA256 hash update (part 2)
+#define SHA256H2(Vd, Vn, Vm)        EMIT(SHA256H2_gen(Vm, Vn, Vd))
+
 #endif  //__ARM64_EMITTER_H__
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index f8d09c89..eca8c632 100644
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -1692,6 +1692,63 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "SETF%d %s", 8<<sf, Xt[Rn]);

         return buff;

     }

+    // REV64

+    if(isMask(opcode, "0Q001110ff100000000010nnnnnddddd", &a)) {

+        const char* T[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"};

+        int sz = sf;

+        const char* Vn = T[(sz<<1)|a.Q];

+        const char* Vd = T[(sz<<1)|a.Q];

+        snprintf(buff, sizeof(buff), "REV64 V%d.%s, V%d.%s",Rd, Vd, Rn, Vn);

+        return buff;

+    }

+    //TRNx

+    if(isMask(opcode, "0Q001110ff0mmmmm0o1010nnnnnddddd", &a)) {

+        const char* T[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};

+        int sz = sf;

+        const char* Vn = T[(sz<<1)|a.Q];

+        const char* Vm = T[(sz<<1)|a.Q];

+        const char* Vd = T[(sz<<1)|a.Q];

+        snprintf(buff, sizeof(buff), "TRN%d V%d.%s, V%d.%s, V%d.%s", a.o+1, Rd, Vd, Rn, Vn, Rm, Vm);

+        return buff;

+    }

+    //SHA1 stuffs

+    if(isMask(opcode, "0101111000101000000010nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "SHA1H S%d, S%d", Rd, Rn);

+        return buff;

+    }

+    if(isMask(opcode, "0101111000101000000110nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "SHA1SU1 V%d.4S, V%d.4S", Rd, Rn);

+        return buff;

+    }

+    if(isMask(opcode, "01011110000mmmmm000000nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "SHA1C Q%d, S%d, V%d.4S", Rd, Rn, Rm);

+        return buff;

+    }

+    if(isMask(opcode, "01011110000mmmmm001000nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "SHA1M Q%d, S%d, V%d.4S", Rd, Rn, Rm);

+        return buff;

+    }

+    if(isMask(opcode, "01011110000mmmmm000100nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "SHA1P Q%d, S%d, V%d.4S", Rd, Rn, Rm);

+        return buff;

+    }

+    //SHA256 stuffs

+    if(isMask(opcode, "0101111000101000001010nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "SHA256SU0 V%d.4S, V%d.4S", Rd, Rn);

+        return buff;

+    }

+    if(isMask(opcode, "01011110000mmmmm011000nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "SHA256SU1 V%d.4S, V%d.4S, V%d.4S", Rd, Rn, Rm);

+        return buff;

+    }

+    if(isMask(opcode, "01011110000mmmmm010000nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "SHA256H Q%d, Q%d, V%d.4S", Rd, Rn, Rm);

+        return buff;

+    }

+    if(isMask(opcode, "01011110000mmmmm010100nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "SHA256H2 Q%d, Q%d, V%d.4S", Rd, Rn, Rm);

+        return buff;

+    }

 

     snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode));

     return buff;

diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index b91128b9..7565c924 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -17,6 +17,7 @@
 #include "dynarec_native.h"

 #include "my_cpuid.h"

 #include "emu/x87emu_private.h"

+#include "emu/x64shaext.h"

 

 #include "arm64_printer.h"

 #include "dynarec_arm64_private.h"

@@ -559,6 +560,178 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     GETEM(q1, 0);

                     ABS_32(q0, q1);

                     break;

+                

+                case 0xC8:

+                    INST_NAME("SHA1NEXTE Gx, Ex");

+                    nextop = F8;

+                    GETGX(q0, 1);

+                    GETEX(q1, 0, 0);

+                    v0 = fpu_get_scratch(dyn);

+                    VEORQ(v0, v0, v0);

+                    if(arm64_sha1) {

+                        v1 = fpu_get_scratch(dyn);

+                        VMOVeS(v1, 0, q0, 3);

+                        SHA1H(v1, v1);

+                        VMOVeS(v0, 3, v1, 0);

+                    } else {

+                        VMOVSto(x1, q0, 3);

+                        RORw(x1, x1, 2);    // i.e. ROL 30

+                        VMOVQSfrom(v0, 3, x1);

+                    }

+                    VADDQ_32(q0, v0, q1);

+                    break;

+                case 0xC9:

+                    INST_NAME("SHA1MSG1 Gx, Ex");

+                    nextop = F8;

+                    GETGX(q0, 1);

+                    GETEX(q1, 0, 0);

+                    v0 = fpu_get_scratch(dyn);

+                    VEXTQ_8(v0, q1, q0, 8);

+                    VEORQ(q0, q0, v0);

+                    break;

+                case 0xCA:

+                    INST_NAME("SHA1MSG2 Gx, Ex");

+                    nextop = F8;

+                    if(arm64_sha1) {

+                        GETGX(q0, 1);

+                        GETEX(q1, 0, 0);

+                        VEXTQ_8(q0, q0, q0, 8);

+                        VREV64Q_32(q0, q0);

+                        if(MODREG) {

+                            if(q0==q1)

+                                v0 = q0;

+                            else {

+                                v0 = fpu_get_scratch(dyn);

+                                VEXTQ_8(v0, q1, q1, 8);

+                                VREV64Q_32(v0, v0);

+                            }

+                        } else {

+                            v0 = q1;

+                            VEXTQ_8(v0, q1, q1, 8);

+                            VREV64Q_32(v0, v0);

+                        }

+                        SHA1SU1(q0, v0);

+                        VEXTQ_8(q0, q0, q0, 8);

+                        VREV64Q_32(q0, q0);

+                    } else {

+                        if(MODREG) {

+                            ed = (nextop&7)+(rex.b<<3);

+                            sse_reflect_reg(dyn, ninst, ed);

+                            ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed]));

+                        } else {

+                            SMREAD();

+                            addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);

+                            if(wback!=x2) {

+                                MOVx_REG(x2, wback);

+                            }

+                        }

+                        GETG;

+                        sse_forget_reg(dyn, ninst, gd);

+                        ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd]));

+                        CALL(sha1msg2, -1);

+                    }

+                    break;

+                case 0xCB:

+                    INST_NAME("SHA256RNDS2 Gx, Ex (, XMM0)");

+                    nextop = F8;

+                    if(arm64_sha2) {

+                        GETGX(q0, 1);

+                        GETEX(q1, 0, 0);

+                        d0 = sse_get_reg(dyn, ninst, x1, 0, 0);

+                        v0 = fpu_get_scratch(dyn);

+                        d1 = fpu_get_scratch(dyn);

+                        if(MODREG) {

+                            v1 = fpu_get_scratch(dyn);

+                        } else

+                            v1 = q1;

+                        VREV64Q_32(q0, q0);

+                        VREV64Q_32(v1, q1);

+                        VZIP1Q_64(v0, v1, q0);

+                        VZIP2Q_64(v1, v1, q0);

+                        SHA256H(v1, v0, d0);

+                        VREV64Q_32(d1, q1);

+                        VZIP2Q_64(d1, d1, q0);

+                        SHA256H2(v0, d1, d0);

+                        VZIP2Q_64(q0, v0, v1);

+                        VREV64Q_32(q0, q0);

+                    } else {

+                        if(MODREG) {

+                            ed = (nextop&7)+(rex.b<<3);

+                            sse_reflect_reg(dyn, ninst, ed);

+                            ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed]));

+                        } else {

+                            SMREAD();

+                            addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);

+                            if(wback!=x2) {

+                                MOVx_REG(x2, wback);

+                            }

+                        }

+                        GETG;

+                        sse_forget_reg(dyn, ninst, gd);

+                        ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd]));

+                        sse_reflect_reg(dyn, ninst, 0);

+                        CALL(sha256rnds2, -1);

+                    }

+                    break;

+                case 0xCC:

+                    INST_NAME("SHA256MSG1 Gx, Ex");

+                    nextop = F8;

+                    if(arm64_sha2) {

+                        GETGX(q0, 1);

+                        GETEX(q1, 0, 0);

+                        SHA256SU0(q0, q1);

+                    } else {

+                        if(MODREG) {

+                            ed = (nextop&7)+(rex.b<<3);

+                            sse_reflect_reg(dyn, ninst, ed);

+                            ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed]));

+                        } else {

+                            SMREAD();

+                            addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);

+                            if(wback!=x2) {

+                                MOVx_REG(x2, wback);

+                            }

+                        }

+                        GETG;

+                        sse_forget_reg(dyn, ninst, gd);

+                        ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd]));

+                        CALL(sha256msg1, -1);

+                    }

+                    break;

+                case 0xCD:

+                    INST_NAME("SHA256MSG2 Gx, Ex");

+                    nextop = F8;

+                    if(arm64_sha2) {

+                        GETGX(q0, 1);

+                        GETEX(q1, 0, 0);

+                        v0 = fpu_get_scratch(dyn);

+                        v1 = fpu_get_scratch(dyn);

+                        VEORQ(v1, v1, v1);

+                        VMOVQ(v0, q0);

+                        SHA256SU1(v0, v1, q1);  // low v0 are ok

+                        VTRNQ1_64(v0, v0, v0);  // duplicate low to hi

+                        VEXTQ_8(d0, q0, q0, 8); // swap high/low

+                        SHA256SU1(d0, v1, v0);  // low is destination high

+                        VEXTQ_8(q0, v0, d0, 8);

+                    } else {

+                        if(MODREG) {

+                            ed = (nextop&7)+(rex.b<<3);

+                            sse_reflect_reg(dyn, ninst, ed);

+                            ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed]));

+                        } else {

+                            SMREAD();

+                            addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);

+                            if(wback!=x2) {

+                                MOVx_REG(x2, wback);

+                            }

+                        }

+                        GETG;

+                        sse_forget_reg(dyn, ninst, gd);

+                        ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd]));

+                        CALL(sha256msg2, -1);

+                    }

+                    break;

+

                 case 0xF0:

                     INST_NAME("MOVBE Gd, Ed");

                     nextop=F8;

@@ -610,6 +783,81 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         VEXT_8(q0, q1, q0, u8);

                     }

                     break;

+

+                case 0xCC:

+                    INST_NAME("SHA1RNDS4 Gx, Ex, Ib");

+                    nextop = F8;

+                    if(arm64_sha1) {

+                        GETGX(q0, 1);

+                        GETEX(q1, 0, 1);

+                        u8 = F8&3;

+                        d0 = fpu_get_scratch(dyn);

+                        d1 = fpu_get_scratch(dyn);

+                        v0 = fpu_get_scratch(dyn);

+                        VEXTQ_8(v0, q0, q0, 8);

+                        VREV64Q_32(v0, v0);

+                        VEORQ(d1, d1, d1);

+                        if(MODREG) {

+                            if(q0==q1)

+                                v1 = v0;

+                            else

+                                v1 = fpu_get_scratch(dyn);

+                        } else 

+                            v1 = q1;

+                        if(v1!=v0) {

+                            VEXTQ_8(v1, q1, q1, 8);

+                            VREV64Q_32(v1, v1);

+                        }

+                        switch(u8) {

+                            case 0:

+                                MOV32w(x1, 0x5A827999);

+                                VDUPQS(d0, x1);

+                                VADDQ_32(v1, v1, d0);

+                                SHA1C(v0, d1, v1);

+                                break;

+                            case 1:

+                                MOV32w(x1, 0x6ED9EBA1);

+                                VDUPQS(d0, x1);

+                                VADDQ_32(v1, v1, d0);

+                                SHA1P(v0, d1, v1);

+                                break;

+                            case 2:

+                                MOV32w(x1, 0X8F1BBCDC);

+                                VDUPQS(d0, x1);

+                                VADDQ_32(v1, v1, d0);

+                                SHA1M(v0, d1, v1);

+                                break;

+                            case 3:

+                                MOV32w(x1, 0xCA62C1D6);

+                                VDUPQS(d0, x1);

+                                VADDQ_32(v1, v1, d0);

+                                SHA1P(v0, d1, v1);

+                                break;

+                        }

+                        VREV64Q_32(v0, v0);

+                        VEXTQ_8(q0, v0, v0, 8);

+                        break;

+                    } else {

+                        if(MODREG) {

+                            ed = (nextop&7)+(rex.b<<3);

+                            sse_reflect_reg(dyn, ninst, ed);

+                            ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[ed]));

+                        } else {

+                            SMREAD();

+                            addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);

+                            if(wback!=x2) {

+                                MOVx_REG(x2, wback);

+                            }

+                        }

+                        u8 = F8;

+                        GETG;

+                        sse_forget_reg(dyn, ninst, gd);

+                        ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[gd]));

+                        MOV32w(x3, u8);

+                        CALL(sha1rnds4, -1);

+                    }

+                    break;

+

                 default:

                     DEFAULT;

             }