about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-09-30 19:13:47 +0200
committerptitSeb <sebastien.chev@gmail.com>2022-09-30 19:13:47 +0200
commit7b67bbe6988d00d557d72c9867b99826320d1835 (patch)
treec3f57cbd1749acebad85ddf296462103e0985efb /src/dynarec
parent7692371ccbdb050fdb758ac56f80822881e89e00 (diff)
downloadbox64-7b67bbe6988d00d557d72c9867b99826320d1835.tar.gz
box64-7b67bbe6988d00d557d72c9867b99826320d1835.zip
Added PCLMULQDQ ([DYNAREC] Too, using PMULL if present) extension (improve a lot AES-XTS score of Geekbench 5)
Diffstat (limited to 'src/dynarec')
-rwxr-xr-xsrc/dynarec/arm64/arm64_emitter.h9
-rwxr-xr-xsrc/dynarec/arm64/arm64_printer.c11
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_660f.c45
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_functions.c16
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_functions.h1
5 files changed, 81 insertions, 1 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index dcd6ee64..4b7d985a 100755
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1764,7 +1764,7 @@
 #define SQDMULHQ_16(Vd, Vn, Vm)     EMIT(QDMULH_vector(1, 0, 0b01, Vm, Vn, Vd))
 #define SQDMULHQ_32(Vd, Vn, Vm)     EMIT(QDMULH_vector(1, 0, 0b10, Vm, Vn, Vd))
 
-// AES extensions
+// AES extension
 #define AES_gen(D, Rn, Rd)      (0b01001110<<24 | 0b00<<22 | 0b10100<<17 | 0b0010<<13 | (D)<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
 #define AESD(Vd, Vn)    EMIT(AES_gen(1, Vn, Vd))
 #define AESE(Vd, Vn)    EMIT(AES_gen(0, Vn, Vd))
@@ -1773,4 +1773,11 @@
 #define AESIMC(Vd, Vn)  EMIT(AESMC_gen(1, Vn, Vd))
 #define AESMC(Vd, Vn)   EMIT(AESMC_gen(0, Vn, Vd))
 
+// PMULL extension is PMULL_128
+#define PMULL_gen(Q, size, Rm, Rn, Rd)  (0<<31 | (Q)<<30 | 0b001110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (Rn)<<5 | (Rd))
+#define PMULL(Rd, Rn, Rm)   EMIT(PMULL_gen(0, 0b00, Rm, Rn, Rd))
+#define PMULL2(Rd, Rn, Rm)  EMIT(PMULL_gen(1, 0b00, Rm, Rn, Rd))
+#define PMULL_128(Rd, Rn, Rm)   EMIT(PMULL_gen(0, 0b11, Rm, Rn, Rd))
+#define PMULL2_128(Rd, Rn, Rm)  EMIT(PMULL_gen(1, 0b11, Rm, Rn, Rd))
+
 #endif  //__ARM64_EMITTER_H__
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index a156c00e..cccf33db 100755
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -1351,6 +1351,17 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "AES%sMC V%d.16B, V%d.16B", sf?"I":"", Rd, Rn);

         return buff;

     }

+    // PMULL

+    if(isMask(opcode, "0Q001110ff1mmmmm111000nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "??", "??", "??", "??", "1D", "2D"};

+        const char* Z[] = {"8H", "??", "??", "1Q"};

+        int sz = sf;

+        const char* Vn = Y[(sz<<1)|a.Q];

+        const char* Vd = Z[sz];

+        snprintf(buff, sizeof(buff), "PMULL%s V%d.%s, V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn);   

+        return buff;

+    }

+

     // DMB ISH

     if(isMask(opcode, "11010101000000110011nnnn10111111", &a)) {

         snprintf(buff, sizeof(buff), "DMB %s", (Rn==0b1011)?"ISH":"???");

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 4ff74907..19af27b9 100755
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -676,6 +676,51 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     }

                     break;

 

+                case 0x44:

+                    INST_NAME("PCLMULQDQ Gx, Ex, Ib");

+                    nextop = F8;

+                    if(arm64_pmull) {

+                        GETGX(q0, 1);

+                        GETEX(q1, 0, 1);

+                        u8 = F8;

+                        switch (u8&0b00010001) {

+                            case 0b00000000:

+                                PMULL_128(q0, q0, q1);

+                                break;

+                            case 0b00010001:

+                                PMULL2_128(q0, q0, q1);

+                                break;

+                            case 0b00000001:

+                                VEXTQ_8(q0, q0, q0, 8); // Swap Up/Lower 64bits parts

+                                PMULL_128(q0, q0, q1);

+                                break;

+                            case 0b00010000:

+                                VEXTQ_8(q0, q0, q0, 8); // Swap Up/Lower 64bits parts

+                                PMULL2_128(q0, q0, q1);

+                                break;

+                        }

+                    } else {

+                        GETG;

+                        sse_forget_reg(dyn, ninst, gd);

+                        MOV32w(x1, gd); // gx

+                        if(MODREG) {

+                            ed = (nextop&7)+(rex.b<<3); 

+                            sse_forget_reg(dyn, ninst, ed);

+                            MOV32w(x2, ed);

+                            MOV32w(x3, 0);  //p = NULL

+                        } else {

+                            MOV32w(x2, 0);

+                            addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0, 0, rex, NULL, 0, 1);

+                            if(ed!=x3) {

+                                MOVx_REG(x3, ed);

+                            }

+                        }

+                        u8 = F8;

+                        MOV32w(x4, u8);

+                        CALL(arm_pclmul, -1);

+                    }

+                    break;

+

                 case 0xDF:

                     INST_NAME("AESKEYGENASSIST Gx, Ex, Ib");  // AES-NI

                     nextop = F8;

diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index a41dfad7..d51f5a3a 100755
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -332,6 +332,22 @@ void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8)
     GX->ud[3] ^= u8;
 }
 
+void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8)
+{
+    sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex];
+    sse_regs_t *GX = &emu->xmm[gx];
+    int g = (u8&1)?1:0;
+    int e = (u8&0b10000)?1:0;
+    __int128 result = 0;
+    __int128 op2 = EX->q[e];
+    for (int i=0; i<64; ++i)
+        if(GX->q[g]&(1LL<<i))
+            result ^= (op2<<i);
+
+    GX->q[0] = result&0xffffffffffffffffLL;
+    GX->q[1] = (result>>64)&0xffffffffffffffffLL;
+}
+
 void arm_clflush(x64emu_t* emu, void* p)
 {
     cleanDBFromAddressRange((uintptr_t)p, 8, 0);
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h
index a5e15a1e..7183fd6d 100755
--- a/src/dynarec/arm64/dynarec_arm64_functions.h
+++ b/src/dynarec/arm64/dynarec_arm64_functions.h
@@ -35,6 +35,7 @@ void arm_aesdlast(x64emu_t* emu, int xmm);
 void arm_aeselast(x64emu_t* emu, int xmm);
 void arm_aesimc(x64emu_t* emu, int xmm);
 void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8);
+void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8);
 
 void arm_clflush(x64emu_t* emu, void* p);