about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-11-21 18:00:43 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-11-21 18:00:58 +0100
commitafe6fbe3b330929bde1a36712346bda2ffc7d9f9 (patch)
treee79e73f8bf46cf7cee4c667954d53fd0047147fe /src
parent39568bff2fe3dc8d907d0738ccca4c14501cd808 (diff)
downloadbox64-afe6fbe3b330929bde1a36712346bda2ffc7d9f9.tar.gz
box64-afe6fbe3b330929bde1a36712346bda2ffc7d9f9.zip
Added full support for the SHA cpu extension
Diffstat (limited to 'src')
-rw-r--r--src/emu/x64run0f.c46
-rw-r--r--src/emu/x64shaext.c194
-rw-r--r--src/emu/x64shaext.h18
-rw-r--r--src/include/debug.h2
-rw-r--r--src/main.c14
-rw-r--r--src/tools/my_cpuid.c9
6 files changed, 279 insertions, 4 deletions
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index 36482ca3..739e35b1 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -22,6 +22,7 @@
 #include "my_cpuid.h"

 #include "bridge.h"

 #include "signals.h"

+#include "x64shaext.h"

 #ifdef DYNAREC

 #include "custommem.h"

 #include "../dynarec/native_lock.h"

@@ -340,6 +341,43 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                     }

                     break;

 

+                case 0xC8:  /* SHA1NEXTE Gx, Ex */

+                    nextop = F8;

+                    GETGX;

+                    GETEX(0);

+                    sha1nexte(emu, GX, EX);

+                    break;

+                case 0xC9:  /* SHA1MSG1 Gx, Ex */

+                    nextop = F8;

+                    GETGX;

+                    GETEX(0);

+                    sha1msg1(emu, GX, EX);

+                    break;

+                case 0xCA:  /* SHA1MSG2 Gx, Ex */

+                    nextop = F8;

+                    GETGX;

+                    GETEX(0);

+                    sha1msg2(emu, GX, EX);

+                    break;

+                case 0xCB:  /* SHA256RNDS2 Gx, Ex (, XMM0) */

+                    nextop = F8;

+                    GETGX;

+                    GETEX(0);

+                    sha256rnds2(emu, GX, EX);

+                    break;

+                case 0xCC:  /* SHA256MSG1 Gx, Ex */

+                    nextop = F8;

+                    GETGX;

+                    GETEX(0);

+                    sha256msg1(emu, GX, EX);

+                    break;

+                case 0xCD:  /* SHA256MSG2 Gx, Ex */

+                    nextop = F8;

+                    GETGX;

+                    GETEX(0);

+                    sha256msg2(emu, GX, EX);

+                    break;

+

                 case 0xF0: /* MOVBE Gd, Ed*/

                     nextop = F8;

                     GETGD;

@@ -389,6 +427,14 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                     }

                     break;

 

+                case 0xCC:  /* SHA1RNDS4 Gx, Ex, Ib */

+                    nextop = F8;

+                    GETGX;

+                    GETEX(1);

+                    tmp8u = F8;

+                    sha1rnds4(emu, GX, EX, tmp8u);

+                    break;

+

                 default:

                     return 0;

             }

diff --git a/src/emu/x64shaext.c b/src/emu/x64shaext.c
new file mode 100644
index 00000000..a0533bb3
--- /dev/null
+++ b/src/emu/x64shaext.c
@@ -0,0 +1,194 @@
+#include <stdint.h>
+
+#include "box64stack.h"
+#include "x64emu.h"
+#include "x64run_private.h"
+#include "x64emu_private.h"
+#include "x64shaext.h"
+
+static uint32_t rol(uint32_t a, int n)
+{
+    n = n&31;
+    if(!n)
+        return a;
+    return (a<<n) | (a>>(32-n));
+}
+
+static uint32_t ror(uint32_t a, int n)
+{
+    n = n&31;
+    if(!n)
+        return a;
+    return (a>>n) | (a<<(32-n));
+}
+
+static uint32_t f0(uint32_t B, uint32_t C, uint32_t D)
+{
+    return (B & C) ^ ((~B) & D);
+}
+
+static uint32_t f1(uint32_t B, uint32_t C, uint32_t D)
+{
+    return B ^ C ^ D;
+}
+
+static uint32_t f2(uint32_t B, uint32_t C, uint32_t D)
+{
+    return (B & C) ^ (B & D) ^ (C & D);
+}
+
+static uint32_t f3(uint32_t B, uint32_t C, uint32_t D)
+{
+    return B ^ C ^ D;
+}
+
+static uint32_t Ch(uint32_t E, uint32_t F, uint32_t G)
+{
+    return (E & F) ^ ((~E) & G);
+}
+
+static uint32_t Maj(uint32_t A, uint32_t B, uint32_t C)
+{
+    return (A & B) ^ (A & C) ^ (B & C);
+}
+
+static uint32_t sigma0(uint32_t A)
+{
+    return ror(A, 2) ^ ror(A, 13) ^ ror(A, 22);
+}
+static uint32_t sigma1(uint32_t E)
+{
+    return ror(E, 6) ^ ror(E, 11) ^ ror(E, 25);
+}
+static uint32_t tho0(uint32_t W)
+{
+    return ror(W, 7) ^ ror(W, 18) ^ (W>>3);
+}
+static uint32_t tho1(uint32_t W)
+{
+    return ror(W, 17) ^ ror(W, 19) ^ (W>>10);
+}
+
+static const uint32_t Ks[] = { 0x5A827999, 0x6ED9EBA1, 0X8F1BBCDC, 0xCA62C1D6 };
+
+void sha1nexte(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
+{
+    uint32_t tmp = rol(xmm1->ud[3], 30);
+    xmm1->ud[3] = xmm2->ud[3] + tmp;
+    xmm1->ud[2] = xmm2->ud[2];
+    xmm1->ud[1] = xmm2->ud[1];
+    xmm1->ud[0] = xmm2->ud[0];
+}
+
+void sha1msg1(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
+{
+    uint32_t w0 = xmm1->ud[3];
+    uint32_t w1 = xmm1->ud[2];
+    uint32_t w2 = xmm1->ud[1];
+    uint32_t w3 = xmm1->ud[0];
+    uint32_t w4 = xmm2->ud[3];
+    uint32_t w5 = xmm2->ud[2];
+    xmm1->ud[3] = w2 ^ w0;
+    xmm1->ud[2] = w3 ^ w1;
+    xmm1->ud[1] = w4 ^ w2;
+    xmm1->ud[0] = w5 ^ w3;
+}
+
+void sha1msg2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
+{
+    uint32_t w13 = xmm2->ud[2];
+    uint32_t w14 = xmm2->ud[1];
+    uint32_t w15 = xmm2->ud[0];
+    uint32_t w16 = rol(xmm1->ud[3] ^ w13, 1);
+    uint32_t w17 = rol(xmm1->ud[2] ^ w14, 1);
+    uint32_t w18 = rol(xmm1->ud[1] ^ w15, 1);
+    uint32_t w19 = rol(xmm1->ud[0] ^ w16, 1);
+    xmm1->ud[3] = w16;
+    xmm1->ud[2] = w17;
+    xmm1->ud[1] = w18;
+    xmm1->ud[0] = w19;
+}
+
+void sha256msg1(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
+{
+    uint32_t w4 = xmm2->ud[0];
+    uint32_t w3 = xmm1->ud[3];
+    uint32_t w2 = xmm1->ud[2];
+    uint32_t w1 = xmm1->ud[1];
+    uint32_t w0 = xmm1->ud[0];
+    xmm1->ud[3] = w3 + tho0(w4);
+    xmm1->ud[2] = w2 + tho0(w3);
+    xmm1->ud[1] = w1 + tho0(w2);
+    xmm1->ud[0] = w0 + tho0(w1);
+}
+
+void sha256msg2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
+{
+    uint32_t w14 = xmm2->ud[2];
+    uint32_t w15 = xmm2->ud[3];
+    uint32_t w16 = xmm1->ud[0] + tho1(w14);
+    uint32_t w17 = xmm1->ud[1] + tho1(w15);
+    uint32_t w18 = xmm1->ud[2] + tho1(w16);
+    uint32_t w19 = xmm1->ud[3] + tho1(w17);
+    xmm1->ud[3] = w19;
+    xmm1->ud[2] = w18;
+    xmm1->ud[1] = w17;
+    xmm1->ud[0] = w16;
+}
+
+void sha1rnds4(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2, uint8_t ib)
+{
+    uint32_t K = Ks[ib&3];
+    uint32_t(*f)(uint32_t , uint32_t , uint32_t) = NULL;
+    switch (ib&3) {
+        case 0: f = f0; break;
+        case 1: f = f1; break;
+        case 2: f = f2; break;
+        case 3: f = f3; break;
+    }
+    uint32_t A = xmm1->ud[3];
+    uint32_t B = xmm1->ud[2];
+    uint32_t C = xmm1->ud[1];
+    uint32_t D = xmm1->ud[0];
+    uint32_t E = 0;
+    for(int i=0; i<4; ++i) {
+        uint32_t new_A = f(B, C, D) + rol(A, 5) + xmm2->ud[3-i] + E + K;
+        E = D;
+        D = C;
+        C = rol(B, 30);
+        B = A;
+        A = new_A;
+    }
+    xmm1->ud[3] = A;
+    xmm1->ud[2] = B;
+    xmm1->ud[1] = C;
+    xmm1->ud[0] = D;
+}
+
+void sha256rnds2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2)
+{
+    uint32_t A = xmm2->ud[3];
+    uint32_t B = xmm2->ud[2];
+    uint32_t C = xmm1->ud[3];
+    uint32_t D = xmm1->ud[2];
+    uint32_t E = xmm2->ud[1];
+    uint32_t F = xmm2->ud[0];
+    uint32_t G = xmm1->ud[1];
+    uint32_t H = xmm1->ud[0];
+    for(int i=0; i<2; ++i) {
+        uint32_t new_A = Ch(E, F, G) + sigma1(E) + emu->xmm[0].ud[i] + H + Maj(A, B, C) + sigma0(A);
+        uint32_t new_E = Ch(E, F, G) + sigma1(E) + emu->xmm[0].ud[i] + H + D;
+        H = G;
+        G = F;
+        F = E;
+        E = new_E;
+        D = C;
+        C = B;
+        B = A;
+        A = new_A;
+    }
+    xmm1->ud[3] = A;
+    xmm1->ud[2] = B;
+    xmm1->ud[1] = E;
+    xmm1->ud[0] = F;
+}
\ No newline at end of file
diff --git a/src/emu/x64shaext.h b/src/emu/x64shaext.h
new file mode 100644
index 00000000..a86102dc
--- /dev/null
+++ b/src/emu/x64shaext.h
@@ -0,0 +1,18 @@
+#ifndef __X64_SHAEXT_H__
+#define __X64_SHAEXT_H__
+
+#include <stdint.h>
+
+#include "regs.h"
+
+typedef struct x64emu_s x64emu_t;
+
+void sha1nexte(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
+void sha1msg1(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
+void sha1msg2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
+void sha256msg1(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
+void sha256msg2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
+void sha1rnds4(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2, uint8_t ib);
+void sha256rnds2(x64emu_t* emu, sse_regs_t* xmm1, sse_regs_t* xmm2);
+
+#endif //__X64_SHAEXT_H__
\ No newline at end of file
diff --git a/src/include/debug.h b/src/include/debug.h
index ee4d7700..d5e4bb91 100644
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -37,6 +37,8 @@ extern int arm64_aes;
 extern int arm64_pmull;
 extern int arm64_crc32;
 extern int arm64_atomics;
+extern int arm64_sha1;
+extern int arm64_sha2;
 extern int arm64_flagm;
 extern int arm64_flagm2;
 #elif defined(RV64)
diff --git a/src/main.c b/src/main.c
index 022b92df..4fae8d7f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -77,6 +77,8 @@ int arm64_aes = 0;
 int arm64_pmull = 0;
 int arm64_crc32 = 0;
 int arm64_atomics = 0;
+int arm64_sha1 = 0;
+int arm64_sha2 = 0;
 int arm64_uscat = 0;
 int arm64_flagm = 0;
 int arm64_flagm2 = 0;
@@ -377,6 +379,14 @@ HWCAP2_ECV
         arm64_aes = 1;
     if(hwcap&HWCAP_ATOMICS)
         arm64_atomics = 1;
+    #ifdef HWCAP_SHA1
+    if(hwcap&HWCAP_SHA1)
+        arm64_sha1 = 1;
+    #endif
+    #ifdef HWCAP_SHA2
+    if(hwcap&HWCAP_SHA2)
+        arm64_sha2 = 1;
+    #endif
     #ifdef HWCAP_USCAT
     if(hwcap&HWCAP_USCAT)
         arm64_uscat = 1;
@@ -407,6 +417,10 @@ HWCAP2_ECV
         printf_log(LOG_INFO, " PMULL");
     if(arm64_atomics)
         printf_log(LOG_INFO, " ATOMICS");
+    if(arm64_sha1)
+        printf_log(LOG_INFO, " SHA1");
+    if(arm64_sha2)
+        printf_log(LOG_INFO, " SHA2");
     if(arm64_uscat)
         printf_log(LOG_INFO, " USCAT");
     if(arm64_flagm)
diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c
index 81399633..b4bcbcbb 100644
--- a/src/tools/my_cpuid.c
+++ b/src/tools/my_cpuid.c
@@ -270,10 +270,11 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
             R_EDX = 0;
             break;
         case 0x7:   // extended bits...
-            if(R_ECX==1) {
-                R_EAX = 0; // Bit 5 is avx512_bf16
-            } else 
-                R_EAX = R_ECX = R_EBX = R_EDX = 0; // TODO
+            if(R_ECX==0) {
+                R_EAX = 0;
+                R_EBX = 0 |
+                        1<<29;  // SHA extension
+            } else {R_EAX = R_ECX = R_EBX = R_EDX = 0;}
             break;
         case 0xB:   // Extended Topology Enumeration Leaf
             //TODO!