about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-10-30 19:00:44 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-10-30 19:00:44 +0100
commit5c13f8f10ac82ca642a6c930585989cc0d75a664 (patch)
tree269e1c1acb78487caf49fb1e31407f87e4b65a4d /src
parent20cf990bf7e2c37a565dedc03890c9df1ea8b602 (diff)
downloadbox64-5c13f8f10ac82ca642a6c930585989cc0d75a664.tar.gz
box64-5c13f8f10ac82ca642a6c930585989cc0d75a664.zip
[ARM64_DYNAREC] Added 66 0F 3A 60..63 opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h3
-rw-r--r--src/dynarec/arm64/arm64_printer.c14
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c178
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c13
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h4
5 files changed, 208 insertions, 4 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 886ecfa0..4a939eb8 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1766,8 +1766,9 @@
 // MOV Immediate
 #define MOVI_vector(Q, op, abc, cmode, defgh, Rd)   ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd))
 #define MOVIQ_8(Rd, imm8)           EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
+#define MOVIQ_16(Rd, imm8, lsl8)    EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd))
 #define MOVI_8(Rd, imm8)            EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
-#define MOVI_16(Rd, imm8)           EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1000, ((imm8)&0b11111), Rd))
+#define MOVI_16(Rd, imm8, lsl8)     EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd))
 #define MOVI_32(Rd, imm8)           EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b0000, ((imm8)&0b11111), Rd))
 #define MOVI_64(Rd, imm8)           EMIT(MOVI_vector(0, 1, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
 
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index 4a889a28..70f96d34 100644
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -983,11 +983,21 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm);

         return buff;

     }

-    // MOV immediate (not)shifted 16bits & 32bits

+    // MOV immediate notshifted 16bits & 32bits

     if(isMask(opcode, "0Q00111100000iiif00001iiiiiddddd", &a)) {

         const char* Y[] = {"2S", "4S", "4H", "8H"};

         const char* Vd = Y[(sf<<1)| a.Q];

-        snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm);

+        int sh = 0;

+

+        snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm<<sh);

+        return buff;

+    }

+    // MOV immediate shifted 16bits

+    if(isMask(opcode, "0Q00111100000iii101001iiiiiddddd", &a)) {

+        const char* Y[] = {"4H", "8H"};

+        const char* Vd = Y[a.Q];

+

+        snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm<<8);

         return buff;

     }

 

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 800436f3..224ea138 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -20,6 +20,7 @@
 #include "dynarec_arm64_private.h"

 #include "dynarec_arm64_functions.h"

 #include "dynarec_arm64_helper.h"

+#include "emu/x64compstrings.h"

 

 uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)

 {

@@ -48,6 +49,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     MAYUSE(j64);

     #if STEP > 1

     static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 };

+    static const int8_t mask_string8[] = { 7, 6, 5, 4, 3, 2, 1, 0 };

+    static const int8_t mask_string16[] = { 15, 14, 13, 12, 11, 10, 9, 8 };

     static const int8_t round_round[] = { 0, 2, 1, 3};

     #endif

 

@@ -1237,6 +1240,181 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     }

                     break;

 

+                case 0x60:

+                    INST_NAME("PCMPESTRM Gx, Ex, Ib");

+                    SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET);

+                    nextop = F8;

+                    GETG;

+                    sse_forget_reg(dyn, ninst, gd);

+                    ADDx_U12(x3, xEmu, offsetof(x64emu_t, xmm[gd]));

+                    if(MODREG) {

+                        ed = (nextop&7)+(rex.b<<3);

+                        sse_reflect_reg(dyn, ninst, ed);

+                        ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[ed]));

+                    } else {

+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);

+                        if(ed!=x1) {

+                            MOVx_REG(x1, ed);

+                        }

+                    }

+                    MOVx_REG(x2, xRDX);

+                    MOVx_REG(x4, xRAX);

+                    u8 = F8;

+                    MOV32w(x5, u8);

+                    CALL(sse42_compare_string_explicit_len, x1);

+                    q0 = sse_get_reg_empty(dyn, ninst, x2, gd);

+                    q1 = fpu_get_scratch(dyn);

+                    if(u8&0b1000000) {

+                        switch(u8&1) {

+                            case 0b00:

+                                VDUPQB(q0, x1); // load the low 8bits of the mask

+                                LSRw_IMM(x1, x1, 8);

+                                VDUPQB(q1, x1); // load the high 8bits of the mask

+                                VEXTQ_8(q0, q0, q1, 8); // low and hig bits mask

+                                TABLE64(x2, (uintptr_t)&mask_string8);

+                                VLDR64_U12(q1, x2, 0);     // load shift

+                                VDUPQ_64(q1, q1, 0);

+                                USHLQ_8(q0, q0, q1); // extract 1 bit

+                                MOVIQ_8(q1, 0x80);   // load mask

+                                VANDQ(q0, q0, q1);

+                                VSSHRQ_8(q0, q0, 7);    // saturate the mask

+                                break;

+                            case 0b01:

+                                VDUPQH(q0, x1); // load the 8bits of the mask

+                                TABLE64(x2, (uintptr_t)&mask_string16);

+                                VLDR64_U12(q1, x2, 0);     // load shift

+                                UXTL_8(q1, q1);     // extend mask to 16bits

+                                USHLQ_16(q0, q0, q1); // extract 1 bit

+                                MOVIQ_16(q1, 0x80, 1);   // load mask

+                                VANDQ(q0, q0, q1);

+                                VSSHRQ_16(q0, q0, 15);    // saturate the mask

+                        }

+                    } else {

+                        VEORQ(q0, q0, q0);

+                        VMOVQHfrom(q0, 0, x1);

+                    }

+                    break;

+                case 0x61:

+                    INST_NAME("PCMPESTRI Gx, Ex, Ib");

+                    SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET);

+                    nextop = F8;

+                    GETG;

+                    sse_reflect_reg(dyn, ninst, gd);

+                    ADDx_U12(x3, xEmu, offsetof(x64emu_t, xmm[gd]));

+                    if(MODREG) {

+                        ed = (nextop&7)+(rex.b<<3);

+                        sse_reflect_reg(dyn, ninst, ed);

+                        ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[ed]));

+                    } else {

+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);

+                        if(ed!=x1) {

+                            MOVx_REG(x1, ed);

+                        }

+                    }

+                    MOVx_REG(x2, xRDX);

+                    MOVx_REG(x4, xRAX);

+                    u8 = F8;

+                    MOV32w(x5, u8);

+                    CALL(sse42_compare_string_explicit_len, x1);

+                    CBNZw_MARK(x1);

+                    MOV32w(xRCX, (u8&1)?8:16);

+                    B_NEXT_nocond;

+                    MARK;

+                    if(u8&0b1000000) {

+                        CLZw(xRCX, x1);

+                        MOV32w(x2, 31);

+                        SUBw_REG(xRCX, x2, xRCX);

+                    } else {

+                        RBITxw(xRCX, x1);

+                        CLZw(xRCX, xRCX);

+                    }

+                    break;

+                case 0x62:

+                    INST_NAME("PCMPISTRM Gx, Ex, Ib");

+                    SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET);

+                    nextop = F8;

+                    GETG;

+                    sse_forget_reg(dyn, ninst, gd);

+                    ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[gd]));

+                    if(MODREG) {

+                        ed = (nextop&7)+(rex.b<<3);

+                        sse_reflect_reg(dyn, ninst, ed);

+                        ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[ed]));

+                    } else {

+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);

+                        if(ed!=x1) {

+                            MOVx_REG(x1, ed);

+                        }

+                    }

+                    u8 = F8;

+                    MOV32w(x3, u8);

+                    CALL(sse42_compare_string_implicit_len, x1);

+                    q0 = sse_get_reg_empty(dyn, ninst, x2, gd);

+                    q1 = fpu_get_scratch(dyn);

+                    if(u8&0b1000000) {

+                        switch(u8&1) {

+                            case 0b00:

+                                VDUPQB(q0, x1); // load the low 8bits of the mask

+                                LSRw_IMM(x1, x1, 8);

+                                VDUPQB(q1, x1); // load the high 8bits of the mask

+                                VEXTQ_8(q0, q0, q1, 8); // low and hig bits mask

+                                TABLE64(x2, (uintptr_t)&mask_string8);

+                                VLDR64_U12(q1, x2, 0);     // load shift

+                                VDUPQ_64(q1, q1, 0);

+                                USHLQ_8(q0, q0, q1); // extract 1 bit

+                                MOVIQ_8(q1, 0x80);   // load mask

+                                VANDQ(q0, q0, q1);

+                                VSSHRQ_8(q0, q0, 7);    // saturate the mask

+                                break;

+                            case 0b01:

+                                VDUPQH(q0, x1); // load the 8bits of the mask

+                                TABLE64(x2, (uintptr_t)&mask_string16);

+                                VLDR64_U12(q1, x2, 0);     // load shift

+                                UXTL_8(q1, q1);     // extend mask to 16bits

+                                USHLQ_16(q0, q0, q1); // extract 1 bit

+                                MOVIQ_16(q1, 0x80, 1);   // load mask

+                                VANDQ(q0, q0, q1);

+                                VSSHRQ_16(q0, q0, 15);    // saturate the mask

+                        }

+                    } else {

+                        VEORQ(q0, q0, q0);

+                        VMOVQHfrom(q0, 0, x1);

+                    }

+                    break;

+                case 0x63:

+                    INST_NAME("PCMPISTRI Gx, Ex, Ib");

+                    SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET);

+                    nextop = F8;

+                    GETG;

+                    sse_reflect_reg(dyn, ninst, gd);

+                    ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[gd]));

+                    if(MODREG) {

+                        ed = (nextop&7)+(rex.b<<3);

+                        sse_reflect_reg(dyn, ninst, ed);

+                        ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[ed]));

+                    } else {

+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);

+                        if(ed!=x1) {

+                            MOVx_REG(x1, ed);

+                        }

+                    }

+                    u8 = F8;

+                    MOV32w(x3, u8);

+                    CALL(sse42_compare_string_implicit_len, x1);

+                    CBNZw_MARK(x1);

+                    MOV32w(xRCX, (u8&1)?8:16);

+                    B_NEXT_nocond;

+                    MARK;

+                    if(u8&0b1000000) {

+                        CLZw(xRCX, x1);

+                        MOV32w(x2, 31);

+                        SUBw_REG(xRCX, x2, xRCX);

+                    } else {

+                        RBITxw(xRCX, x1);

+                        CLZw(xRCX, xRCX);

+                    }

+                    break;

+

                 case 0xDF:

                     INST_NAME("AESKEYGENASSIST Gx, Ex, Ib");  // AES-NI

                     nextop = F8;

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 006e2c3c..f623061f 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -1609,6 +1609,17 @@ static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1)
         }
 }
 
+void sse_reflect_reg(dynarec_arm_t* dyn, int ninst, int a)
+{
+    if(dyn->n.ssecache[a].v==-1)
+        return;
+    if(dyn->n.neoncache[dyn->n.ssecache[a].reg].t == NEON_CACHE_XMMW) {
+        VSTR128_U12(dyn->n.ssecache[a].reg, xEmu, offsetof(x64emu_t, xmm[a]));
+        /*dyn->n.neoncache[dyn->n.ssecache[a].reg].t = NEON_CACHE_XMMR;
+        dyn->n.ssecache[a].write = 0;*/
+    }
+}
+
 void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not07)
 {
     int start = not07?8:0;
@@ -1641,6 +1652,8 @@ void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not07)
     for (int i=start; i<16; ++i)
         if(dyn->n.ssecache[i].v!=-1) {
             VLDR128_U12(dyn->n.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
+            /*dyn->n.ssecache[i].write = 0;   // OPTIM: it's sync, so not write anymore
+            dyn->n.neoncache[dyn->n.ssecache[i].reg].t = NEON_CACHE_XMMR;*/
         }
     MESSAGE(LOG_DUMP, "\t------- Pop XMM Cache (%d)\n", n);
 }
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 268f8ee5..298f106a 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1041,6 +1041,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define sse_get_reg_empty STEPNAME(sse_get_reg_empty)
 #define sse_forget_reg   STEPNAME(sse_forget_reg)
 #define sse_purge07cache STEPNAME(sse_purge07cache)
+#define sse_reflect_reg  STEPNAME(sse_reflect_reg)
 
 #define fpu_pushcache   STEPNAME(fpu_pushcache)
 #define fpu_popcache    STEPNAME(fpu_popcache)
@@ -1232,7 +1233,8 @@ int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a);
 void sse_forget_reg(dynarec_arm_t* dyn, int ninst, int a);
 // purge the XMM0..XMM7 cache (before function call)
 void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1);
-
+// Push current value to the cache
+void sse_reflect_reg(dynarec_arm_t* dyn, int ninst, int a);
 // common coproc helpers
 // reset the cache
 void fpu_reset(dynarec_arm_t* dyn);