about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorwannacu <76616478+wannacu@users.noreply.github.com>2025-02-14 21:29:43 +0800
committerGitHub <noreply@github.com>2025-02-14 14:29:43 +0100
commitefa1cbd49aa57ccc2c168fed2c4ec93e876f8e86 (patch)
treeda7f0ec7bc7a658e8b888b9f207c2fae93d6a194 /src
parent91798194d51fe51eb236d91f9814c31511b5036a (diff)
downloadbox64-efa1cbd49aa57ccc2c168fed2c4ec93e876f8e86.tar.gz
box64-efa1cbd49aa57ccc2c168fed2c4ec93e876f8e86.zip
[ARM64_DYNAREC] Add some opcodes (#2358)
* [ARM64_DYNAREC] Added 66 0F 3A 41 opcode

* [ARM64_DYNAREC] Added AVX.66.0F38 DB opcode

* [ARM64_DYNAREC] Added AVX.66.0F3A DF opcode

* [ARM64_DYNAREC] Added AVX.F2.0F38 F5 opcode

* [ARM64_DYNAREC] Added 66 F3 0F BC,B8 opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c21
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66f30f.c57
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c17
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c44
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c31
5 files changed, 167 insertions, 3 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 2a30bc9c..ec5223ac 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -1238,6 +1238,27 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }

                     break;

 

+                case 0x41:

+                    INST_NAME("DPPD Gx, Ex, Ib");

+                    nextop = F8;

+                    GETGX(q0, 1);

+                    GETEX(q1, 0, 1);

+                    u8 = F8;

+                    v0 = fpu_get_scratch(dyn, ninst);

+                    VFMULQD(v0, q0, q1);

+                    // mask some, duplicate all, mask some

+                    for(int i=0; i<2; ++i)

+                        if(!(u8&(1<<(4+i)))) {

+                            VMOVQDfrom(v0, i, xZR);

+                        }

+                    FADDPD(v0, v0);

+                    VDUPQ_64(q0, v0, 0);

+                    for(int i=0; i<2; ++i)

+                        if(!(u8&(1<<i))) {

+                            VMOVQDfrom(q0, i, xZR);

+                        }

+                    break;

+

                 case 0x44:

                     INST_NAME("PCLMULQDQ Gx, Ex, Ib");

                     nextop = F8;

diff --git a/src/dynarec/arm64/dynarec_arm64_66f30f.c b/src/dynarec/arm64/dynarec_arm64_66f30f.c
index 432a6646..88421c9f 100644
--- a/src/dynarec/arm64/dynarec_arm64_66f30f.c
+++ b/src/dynarec/arm64/dynarec_arm64_66f30f.c
@@ -55,6 +55,59 @@ uintptr_t dynarec64_66F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
     #endif
 
     switch(opcode) {
+        case 0xB8:
+            INST_NAME("POPCNT Gw, Ew");
+            SETFLAGS(X_ALL, SF_SET);
+            SET_DFNONE();
+            nextop = F8;
+            GETGW(x2);
+            GETEW(x1, 0);
+            v1 = fpu_get_scratch(dyn, ninst);
+            VEORQ(v1, v1, v1);
+            VMOVQDfrom(v1, 0, ed);
+            CNT_8(v1, v1);
+            UADDLV_8(v1, v1);
+            VMOVHto(gd, v1, 0);
+            IFX(X_ALL) {
+                IFX(X_AF|X_PF|X_SF|X_OF|X_CF) {
+                    MOV32w(x1, (1<<F_OF) | (1<<F_SF) | (1<<F_ZF) | (1<<F_AF) | (1<<F_CF) | (1<<F_PF));
+                    BICw(xFlags, xFlags, x1);
+                }
+                IFX(X_ZF) {
+                    CMPSw_U12(gd, 0);
+                    IFNATIVE(NF_EQ) {}
+                    else {
+                        CSETw(x1, cEQ);
+                        BFIw(xFlags, x1, F_ZF, 1);
+                    }
+                }
+            }
+            GWBACK;
+            break;
+
+        case 0xBC:
+            INST_NAME("TZCNT Gw, Ew");
+            SETFLAGS(X_CF|X_ZF, SF_SUBSET);
+            SET_DFNONE();
+            nextop = F8;
+            GETEW(x1, 0);
+            GETGW(x2);
+            TSTxw_REG(ed, ed);
+            IFX(X_CF) {
+                CSETw(x3, cEQ);
+                BFIw(xFlags, x3, F_CF, 1);  // CF = is source 0?
+            }
+            RBITw(x3, ed);   // reverse
+            CLZw(gd, x3);    // x2 gets leading 0 == TZCNT
+            MOV32w(x3, 16);
+            CSELw(gd, x3, gd, cEQ); // if src is zero, use bit width as res
+            IFX(X_ZF) {
+                TSTxw_REG(gd, gd);
+                CSETw(x3, cEQ);
+                BFIw(xFlags, x3, F_ZF, 1);  // ZF = is dest 0?
+            }
+            GWBACK;
+            break;
 
         case 0xBD:
             INST_NAME("LZCNT Gw, Ew");
@@ -68,14 +121,14 @@ uintptr_t dynarec64_66F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
                 CSETw(x3, cEQ);
                 BFIw(xFlags, x3, F_CF, 1);  // CF = is source 0?
             }
-            LSLw_IMM(ed, ed, 16);
             CLZw(gd, ed);
+            SUBw_U12(gd, gd, 16);   // sub zero cnt of high word
             IFX(X_ZF) {
                 TSTxw_REG(gd, gd);
                 CSETw(x3, cEQ);
                 BFIw(xFlags, x3, F_ZF, 1);  // ZF = is dest 0?
             }
-            EWBACK;
+            GWBACK;
             break;
 
         default:
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
index 781fc2a9..4f2b1c28 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
@@ -1866,6 +1866,23 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             if(!vex.l) YMM0(gd);
             break;
 
+        case 0xDB:
+            INST_NAME("VAESIMC Gx, Ex");
+            nextop = F8;
+            GETGX_empty_EX(v0, v1, 0);
+            if(arm64_aes) {
+                AESIMC(v0, v1);
+            } else {
+                if(v0!=v1) {
+                    VMOVQ(v0, v1);
+                }
+                sse_forget_reg(dyn, ninst, gd);
+                MOV32w(x1, gd);
+                CALL(native_aesimc, -1);
+            }
+            if(!vex.l) YMM0(gd);
+            break;
+
         case 0xDC:
             INST_NAME("VAESENC Gx, Vx, Ex");  // AES-NI
             nextop = F8;
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
index a2111a4e..1096b528 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
@@ -632,6 +632,26 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             if(!vex.l) YMM0(gd);
             break;
 
+        case 0x41:
+            INST_NAME("VDPPD Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETGX_empty_VXEX(v0, v1, v2, 0);
+            u8 = F8;
+            VFMULQD(v0, v1, v2);
+            // mask some, duplicate all, mask some
+            for(int i=0; i<2; ++i)
+                if(!(u8&(1<<(4+i)))) {
+                    VMOVQDfrom(v0, i, xZR);
+                }
+            FADDPD(v0, v0);
+            VDUPQ_64(v0, v0, 0);
+            for(int i=0; i<2; ++i)
+                if(!(u8&(1<<i))) {
+                    VMOVQDfrom(v0, i, xZR);
+                }
+            if(!vex.l) YMM0(gd);
+            break;
+
         case 0x44:
             INST_NAME("PCLMULQDQ Gx, Vx, Ex, Ib");
             nextop = F8;
@@ -784,6 +804,30 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             if(!vex.l) YMM0(gd);
             break;
 
+       case 0xDF:
+            INST_NAME("VAESKEYGENASSIST Gx, Ex, Ib");
+            nextop = F8;
+            GETG;
+            sse_forget_reg(dyn, ninst, gd);
+            MOV32w(x1, gd); // gx
+            if(MODREG) {
+                ed = (nextop&7)+(rex.b<<3);
+                sse_forget_reg(dyn, ninst, ed);
+                MOV32w(x2, ed);
+                MOV32w(x3, 0);  //p = NULL
+            } else {
+                MOV32w(x2, 0);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);
+                if(ed!=x3) {
+                    MOVx_REG(x3, ed);
+                }
+            }
+            u8 = F8;
+            MOV32w(x4, u8);
+            CALL(native_aeskeygenassist, -1);
+            if(!vex.l) YMM0(gd);
+            break;
+
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c
index 191b3581..fbee03e3 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c
@@ -60,7 +60,36 @@ uintptr_t dynarec64_AVX_F2_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
     rex_t rex = vex.rex;
 
     switch(opcode) {
-
+        case 0xF5:
+            INST_NAME("PDEP Gd, Ed, Vd");
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            GETVD;
+            if(gd==ed || gd==vd) {
+                gb1 = gd;
+                gd = x4;
+            } else {
+                gb1 = 0;
+            }
+            // x3 = mask of mask, loop while not 0
+            MOV32w(gd, 0);
+            MOV64x(x2, 1);
+            MOV64x(x3, 1);
+            MARK;
+            TSTxw_REG(ed, x3);
+            B_MARK2(cEQ);
+            TSTxw_REG(vd, x2);
+            B_MARK3(cEQ);
+            ORRxw_REG(gd, gd, x3);
+            MARK3;
+            LSLxw_IMM(x2, x2, 1);
+            MARK2;
+            LSLxw_IMM(x3, x3, 1);
+            CBNZxw_MARK(x3);
+            if(gb1)
+                MOVxw_REG(gb1, gd);
+            break;
         case 0xF6:
             INST_NAME("MULX Gd, Vd, Ed (,RDX)");
             nextop = F8;