about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-09-30 19:13:47 +0200
committerptitSeb <sebastien.chev@gmail.com>2022-09-30 19:13:47 +0200
commit7b67bbe6988d00d557d72c9867b99826320d1835 (patch)
treec3f57cbd1749acebad85ddf296462103e0985efb /src
parent7692371ccbdb050fdb758ac56f80822881e89e00 (diff)
downloadbox64-7b67bbe6988d00d557d72c9867b99826320d1835.tar.gz
box64-7b67bbe6988d00d557d72c9867b99826320d1835.zip
Added PCLMULQDQ ([DYNAREC] Too, using PMULL if present) extension (improve a lot AES-XTS score of Geekbench 5)
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64/arm64_emitter.h9
-rwxr-xr-xsrc/dynarec/arm64/arm64_printer.c11
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_660f.c45
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_functions.c16
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_functions.h1
-rw-r--r--src/emu/x64run660f.c19
-rwxr-xr-xsrc/tools/box64stack.c18
-rw-r--r--src/tools/my_cpuid.c1
-rwxr-xr-xsrc/wrapped/wrappedlibc.c2
9 files changed, 114 insertions, 8 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index dcd6ee64..4b7d985a 100755
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1764,7 +1764,7 @@
 #define SQDMULHQ_16(Vd, Vn, Vm)     EMIT(QDMULH_vector(1, 0, 0b01, Vm, Vn, Vd))
 #define SQDMULHQ_32(Vd, Vn, Vm)     EMIT(QDMULH_vector(1, 0, 0b10, Vm, Vn, Vd))
 
-// AES extensions
+// AES extension
 #define AES_gen(D, Rn, Rd)      (0b01001110<<24 | 0b00<<22 | 0b10100<<17 | 0b0010<<13 | (D)<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
 #define AESD(Vd, Vn)    EMIT(AES_gen(1, Vn, Vd))
 #define AESE(Vd, Vn)    EMIT(AES_gen(0, Vn, Vd))
@@ -1773,4 +1773,11 @@
 #define AESIMC(Vd, Vn)  EMIT(AESMC_gen(1, Vn, Vd))
 #define AESMC(Vd, Vn)   EMIT(AESMC_gen(0, Vn, Vd))
 
+// PMULL extension is PMULL_128
+#define PMULL_gen(Q, size, Rm, Rn, Rd)  (0<<31 | (Q)<<30 | 0b001110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (Rn)<<5 | (Rd))
+#define PMULL(Rd, Rn, Rm)   EMIT(PMULL_gen(0, 0b00, Rm, Rn, Rd))
+#define PMULL2(Rd, Rn, Rm)  EMIT(PMULL_gen(1, 0b00, Rm, Rn, Rd))
+#define PMULL_128(Rd, Rn, Rm)   EMIT(PMULL_gen(0, 0b11, Rm, Rn, Rd))
+#define PMULL2_128(Rd, Rn, Rm)  EMIT(PMULL_gen(1, 0b11, Rm, Rn, Rd))
+
 #endif  //__ARM64_EMITTER_H__
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index a156c00e..cccf33db 100755
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -1351,6 +1351,17 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "AES%sMC V%d.16B, V%d.16B", sf?"I":"", Rd, Rn);

         return buff;

     }

+    // PMULL

+    if(isMask(opcode, "0Q001110ff1mmmmm111000nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "??", "??", "??", "??", "1D", "2D"};

+        const char* Z[] = {"8H", "??", "??", "1Q"};

+        int sz = sf;

+        const char* Vn = Y[(sz<<1)|a.Q];

+        const char* Vd = Z[sz];

+        snprintf(buff, sizeof(buff), "PMULL%s V%d.%s, V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn);   

+        return buff;

+    }

+

     // DMB ISH

     if(isMask(opcode, "11010101000000110011nnnn10111111", &a)) {

         snprintf(buff, sizeof(buff), "DMB %s", (Rn==0b1011)?"ISH":"???");

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 4ff74907..19af27b9 100755
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -676,6 +676,51 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     }

                     break;

 

+                case 0x44:

+                    INST_NAME("PCLMULQDQ Gx, Ex, Ib");

+                    nextop = F8;

+                    if(arm64_pmull) {

+                        GETGX(q0, 1);

+                        GETEX(q1, 0, 1);

+                        u8 = F8;

+                        switch (u8&0b00010001) {

+                            case 0b00000000:

+                                PMULL_128(q0, q0, q1);

+                                break;

+                            case 0b00010001:

+                                PMULL2_128(q0, q0, q1);

+                                break;

+                            case 0b00000001:

+                                VEXTQ_8(q0, q0, q0, 8); // Swap Up/Lower 64bits parts

+                                PMULL_128(q0, q0, q1);

+                                break;

+                            case 0b00010000:

+                                VEXTQ_8(q0, q0, q0, 8); // Swap Up/Lower 64bits parts

+                                PMULL2_128(q0, q0, q1);

+                                break;

+                        }

+                    } else {

+                        GETG;

+                        sse_forget_reg(dyn, ninst, gd);

+                        MOV32w(x1, gd); // gx

+                        if(MODREG) {

+                            ed = (nextop&7)+(rex.b<<3); 

+                            sse_forget_reg(dyn, ninst, ed);

+                            MOV32w(x2, ed);

+                            MOV32w(x3, 0);  //p = NULL

+                        } else {

+                            MOV32w(x2, 0);

+                            addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0, 0, rex, NULL, 0, 1);

+                            if(ed!=x3) {

+                                MOVx_REG(x3, ed);

+                            }

+                        }

+                        u8 = F8;

+                        MOV32w(x4, u8);

+                        CALL(arm_pclmul, -1);

+                    }

+                    break;

+

                 case 0xDF:

                     INST_NAME("AESKEYGENASSIST Gx, Ex, Ib");  // AES-NI

                     nextop = F8;

diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index a41dfad7..d51f5a3a 100755
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -332,6 +332,22 @@ void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8)
     GX->ud[3] ^= u8;
 }
 
+void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8)
+{
+    sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex];
+    sse_regs_t *GX = &emu->xmm[gx];
+    int g = (u8&1)?1:0;
+    int e = (u8&0b10000)?1:0;
+    __int128 result = 0;
+    __int128 op2 = EX->q[e];
+    for (int i=0; i<64; ++i)
+        if(GX->q[g]&(1LL<<i))
+            result ^= (op2<<i);
+
+    GX->q[0] = result&0xffffffffffffffffLL;
+    GX->q[1] = (result>>64)&0xffffffffffffffffLL;
+}
+
 void arm_clflush(x64emu_t* emu, void* p)
 {
     cleanDBFromAddressRange((uintptr_t)p, 8, 0);
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h
index a5e15a1e..7183fd6d 100755
--- a/src/dynarec/arm64/dynarec_arm64_functions.h
+++ b/src/dynarec/arm64/dynarec_arm64_functions.h
@@ -35,6 +35,7 @@ void arm_aesdlast(x64emu_t* emu, int xmm);
 void arm_aeselast(x64emu_t* emu, int xmm);
 void arm_aesimc(x64emu_t* emu, int xmm);
 void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8);
+void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8);
 
 void arm_clflush(x64emu_t* emu, void* p);
 
diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c
index f182e34f..7561fff2 100644
--- a/src/emu/x64run660f.c
+++ b/src/emu/x64run660f.c
@@ -797,6 +797,25 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                     GX->f[i] = (tmp8u&(1<<i))?tmpf:0.0f;

                 break;

 

+            case 0x44:  /* PCLMULQDQ Gx, Ex, Ib */

+                nextop = F8;

+                GETEX(1);

+                GETGX;

+                tmp8u = F8;

+                {

+                    int g = (tmp8u&1)?1:0;

+                    int e = (tmp8u&0b10000)?1:0;

+                    __int128 result = 0;

+                    __int128 op2 = EX->q[e];

+                    for (int i=0; i<64; ++i)

+                        if(GX->q[g]&(1LL<<i))

+                            result ^= (op2<<i);

+

+                    GX->q[0] = result&0xffffffffffffffffLL;

+                    GX->q[1] = (result>>64)&0xffffffffffffffffLL;

+                }

+                break;

+

             case 0xDF:      // AESKEYGENASSIST Gx, Ex, u8

                 nextop = F8;

                 GETEX(1);

diff --git a/src/tools/box64stack.c b/src/tools/box64stack.c
index a46fcbd6..ff0de001 100755
--- a/src/tools/box64stack.c
+++ b/src/tools/box64stack.c
@@ -155,12 +155,18 @@ void SetupInitialStack(x64emu_t *emu)
     Push(emu, real_getauxval(13)); Push(emu, 13);       //AT_GID(13)
     Push(emu, real_getauxval(14)); Push(emu, 14);       //AT_EGID(14)
     Push(emu, p_x86_64); Push(emu, 15);                 //AT_PLATFORM(15)=&"x86_64"
-    // Push HWCAP:
-    //  FPU: 1<<0 ; VME: 1<<1 ; DE : 1<<2 ; PSE: 1<<3 ; TSC: 1<<4 ; MSR: 1<<5 ; PAE: 1<<6 ; MCE: 1<<7
-    //  CX8: 1<<8 ; APIC:1<<9 ;             SEP: 1<<11; MTRR:1<<12; PGE: 1<<13; MCA: 1<<14; CMOV:1<<15
-    // FCMOV:1<<16;                                                                       ; MMX: 1<<23
-    // OSFXR:1<<24; XMM: 1<<25;XMM2: 1<<26;                                                AMD3D:1<<31
-    Push(emu, (1<<0) | (1<<1) | (1<<2) | (1<<3) | (1<<4) | (1<<8)  | (1<<15) | (1<<16) | (1<<23) | (1<<25) | (1<<26));
+    // Push HWCAP: same as CPUID 1.EDX
+    Push(emu,   1         // fpu 
+              | 1<<4      // rdtsc
+              | 1<<8      // cmpxchg8
+              | 1<<11     // sep (sysenter & sysexit)
+              | 1<<15     // cmov
+              | 1<<19     // clflush (seems to be with SSE2)
+              | 1<<23     // mmx
+              | 1<<24     // fxsr (fxsave, fxrestore)
+              | 1<<25     // SSE
+              | 1<<26     // SSE2
+        );
     Push(emu, 16);                                      //AT_HWCAP(16)=...
     //Push(emu, sysconf(_SC_CLK_TCK)); Push(emu, 17);     //AT_CLKTCK(17)=times() frequency
     Push(emu, real_getauxval(23)); Push(emu, 23);       //AT_SECURE(23)
diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c
index 1c1a1871..7ab19db0 100644
--- a/src/tools/my_cpuid.c
+++ b/src/tools/my_cpuid.c
@@ -61,6 +61,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                     | 1<<26     // SSE2
                     ;
             R_ECX =   1<<0      // SSE3
+                    | 1<<1      // PCLMULQDQ
                     | 1<<9      // SSSE3
                     | 1<<12     // fma
                     | 1<<13     // cx16 (cmpxchg16)
diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c
index ad6e804a..37a6b175 100755
--- a/src/wrapped/wrappedlibc.c
+++ b/src/wrapped/wrappedlibc.c
@@ -1392,7 +1392,7 @@ void CreateCPUInfoFile(int fd)
         P;
         sprintf(buff, "bogomips\t: %g\n", bogoMips);
         P;
-        sprintf(buff, "flags\t\t: fpu cx8 sep cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fma fxsr cpuid cx16 aes movbe pni\n");
+        sprintf(buff, "flags\t\t: fpu cx8 sep cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fma fxsr cpuid pclmulqdq cx16 aes movbe pni\n");
         P;
         sprintf(buff, "address sizes\t: 46 bits physical, 48 bits virtual\n");
         P;