about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-06-28 13:19:56 +0200
committerptitSeb <sebastien.chev@gmail.com>2021-06-28 13:19:56 +0200
commit99ba7707f63d906f5e22a44ae2cbbc8dbd5e3f3e (patch)
treea02f388c9f30cdeec9e54bb3118c02cef672bab4
parentea1ef4e602af2f0e92181b96cff5ffa66186972f (diff)
downloadbox64-99ba7707f63d906f5e22a44ae2cbbc8dbd5e3f3e.tar.gz
box64-99ba7707f63d906f5e22a44ae2cbbc8dbd5e3f3e.zip
[DYNAREC] Added CF opcode and optimized native call
-rwxr-xr-xsrc/dynarec/arm64_epilog.S3
-rwxr-xr-xsrc/dynarec/arm64_next.S9
-rwxr-xr-xsrc/dynarec/arm64_prolog.S3
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c87
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.c62
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h8
-rwxr-xr-xsrc/emu/x64emu_private.h7
7 files changed, 127 insertions, 52 deletions
diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S
index 4b73803a..c4dd0043 100755
--- a/src/dynarec/arm64_epilog.S
+++ b/src/dynarec/arm64_epilog.S
@@ -18,6 +18,9 @@ arm64_epilog:
     stp     x22, x23, [x0, (8 * 12)]
     stp     x24, x25, [x0, (8 * 14)]
     stp     x26, x27, [x0, (8 * 16)] // put back reg value in emu, including EIP (so x27 must be EIP now)
+    // and the 4 first SSE regs too
+    stp     q0, q1, [x0, (8 * 18)]
+    stp     q2, q3, [x0, (8 * 22)]
     //restore all used register
     //vpop     {d8-d15}
     ldp     x19, x20, [sp, (8 * 0)]
diff --git a/src/dynarec/arm64_next.S b/src/dynarec/arm64_next.S
index 834c1a89..3baae986 100755
--- a/src/dynarec/arm64_next.S
+++ b/src/dynarec/arm64_next.S
@@ -12,13 +12,16 @@
 arm64_next:
     // emu is r0
     // IP address is r1
-    sub     sp,  sp,  (8 * 12)
+    sub     sp,  sp,  (8 * 12 + 16 * 4)
     stp     x0,  x1,  [sp, (8 *  0)]
     stp     x10, x11, [sp, (8 *  2)]
     stp     x12, x13, [sp, (8 *  4)]
     stp     x14, x15, [sp, (8 *  6)]
     stp     x16, x17, [sp, (8 *  8)]
     str     x18, [sp, (8 * 10)]
+    stp     q0, q1, [sp, (8 * 12)]
+    stp     q2, q3, [sp, (8 * 16)]
+
     mov     x2, lr      // "from" is in lr, so put in x2
     // call the function
     bl      LinkNext
@@ -31,7 +34,9 @@ arm64_next:
     ldp     x14, x15, [sp, (8 *  6)]
     ldp     x16, x17, [sp, (8 *  8)]
     ldr     x18, [sp, (8 * 10)]
-    add     sp,  sp, (8 * 12)
+    ldp     q0, q1, [sp, (8 * 12)]
+    ldp     q2, q3, [sp, (8 * 16)]
+    add     sp,  sp, (8 * 12 + 16 * 4)
     // return offset is jump address
     br      x3
 
diff --git a/src/dynarec/arm64_prolog.S b/src/dynarec/arm64_prolog.S
index 21961e5b..5c6a92cd 100755
--- a/src/dynarec/arm64_prolog.S
+++ b/src/dynarec/arm64_prolog.S
@@ -31,5 +31,8 @@ arm64_prolog:
     ldp     x22, x23, [x0, (8 * 12)]
     ldp     x24, x25, [x0, (8 * 14)]
     ldp     x26, x27, [x0, (8 * 16)]
+    // grab 4 first SSE regs too
+    ldp     q0, q1, [x0, (8 * 18)]
+    ldp     q2, q3, [x0, (8 * 22)]
     //jump to function
     br       x1
diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index df621729..31605840 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -24,6 +24,8 @@
 #include "dynarec_arm64_functions.h"
 #include "dynarec_arm64_helper.h"
 
+int isSimpleWrapper(wrapper_t fun);
+
 uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
 {
     uint8_t nextop, opcode;
@@ -1556,19 +1558,25 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip)));
                     x87_forget(dyn, ninst, x3, x4, 0);
                     sse_purge07cache(dyn, ninst, x3);
-                    GETIP(ip+1); // read the 0xCC
-                    STORE_XEMU_CALL(xRIP);
-                    CALL_S(x64Int3, -1);
-                    LOAD_XEMU_CALL(xRIP);
-                    addr+=8+8;
-                    TABLE64(x3, addr); // expected return address
-                    CMPSx_REG(xRIP, x3);
-                    B_MARK(cNE);
-                    LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit));
-                    CBZw_NEXT(w1);
-                    MARK;
-                    LOAD_XEMU_REM();
-                    jump_to_epilog(dyn, 0, xRIP, ninst);
+                    if(box64_log<2 && isSimpleWrapper(*(wrapper_t*)(addr))) {
+                        //GETIP(ip+3+8+8); // read the 0xCC
+                        call_n(dyn, ninst, *(void**)(addr+8));
+                        addr+=8+8;
+                    } else {
+                        GETIP(ip+1); // read the 0xCC
+                        STORE_XEMU_CALL(xRIP);
+                        CALL_S(x64Int3, -1);
+                        LOAD_XEMU_CALL(xRIP);
+                        addr+=8+8;
+                        TABLE64(x3, addr); // expected return address
+                        CMPSx_REG(xRIP, x3);
+                        B_MARK(cNE);
+                        LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit));
+                        CBZw_NEXT(w1);
+                        MARK;
+                        LOAD_XEMU_REM();
+                        jump_to_epilog(dyn, 0, xRIP, ninst);
+                    }
                 }
             } else {
                 #if 1
@@ -1587,6 +1595,15 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 #endif
             }
             break;
+
+        case 0xCF:
+            INST_NAME("IRET");
+            SETFLAGS(X_ALL, SF_SET);    // Not a hack, EFLAGS are restored
+            BARRIER(2);
+            iret_to_epilog(dyn, ninst, rex.w);
+            *need_epilog = 0;
+            *ok = 0;
+            break;
         case 0xD0:
         case 0xD2:  // TODO: Jump if CL is 0
             nextop = F8;
@@ -1979,26 +1996,32 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn);
                     // calling a native function
                     sse_purge07cache(dyn, ninst, x3);
-                    GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already
-                    STORE_XEMU_CALL(xRIP);
-                    CALL_S(x64Int3, -1);
-                    LOAD_XEMU_CALL(xRIP);
-                    TABLE64(x3, dyn->insts[ninst].natcall);
-                    ADDx_U12(x3, x3, 2+8+8);
-                    CMPSx_REG(xRIP, x3);
-                    B_MARK(cNE);    // Not the expected address, exit dynarec block
-                    POP1(xRIP);   // pop the return address
-                    if(dyn->insts[ninst].retn) {
-                        ADDx_U12(xRSP, xRSP, dyn->insts[ninst].retn);
+                    if(box64_log<2 && dyn->insts && isSimpleWrapper(*(wrapper_t*)(dyn->insts[ninst].natcall+2))) {
+                        //GETIP(ip+3+8+8); // read the 0xCC
+                        call_n(dyn, ninst, *(void**)(dyn->insts[ninst].natcall+2+8));
+                        POP1(xRIP);   // pop the return address
+                    } else {
+                        GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already
+                        STORE_XEMU_CALL(xRIP);
+                        CALL_S(x64Int3, -1);
+                        LOAD_XEMU_CALL(xRIP);
+                        TABLE64(x3, dyn->insts[ninst].natcall);
+                        ADDx_U12(x3, x3, 2+8+8);
+                        CMPSx_REG(xRIP, x3);
+                        B_MARK(cNE);    // Not the expected address, exit dynarec block
+                        POP1(xRIP);   // pop the return address
+                        if(dyn->insts[ninst].retn) {
+                            ADDx_U12(xRSP, xRSP, dyn->insts[ninst].retn);
+                        }
+                        TABLE64(x3, addr);
+                        CMPSx_REG(xRIP, x3);
+                        B_MARK(cNE);    // Not the expected address again
+                        LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit));
+                        CBZw_NEXT(w1);  // not quitting, so lets continue
+                        MARK;
+                        LOAD_XEMU_REM();    // load remaining register, has they have changed
+                        jump_to_epilog(dyn, 0, xRIP, ninst);
                     }
-                    TABLE64(x3, addr);
-                    CMPSx_REG(xRIP, x3);
-                    B_MARK(cNE);    // Not the expected address again
-                    LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit));
-                    CBZw_NEXT(w1);  // not quitting, so lets continue
-                    MARK;
-                    LOAD_XEMU_REM();    // load remaining register, has they have changed
-                    jump_to_epilog(dyn, 0, xRIP, ninst);
                     break;
                 default:
                     if(ninst && dyn->insts && dyn->insts[ninst-1].x64.set_flags) {
diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c
index 3365cc6b..add6f1d1 100755
--- a/src/dynarec/dynarec_arm64_helper.c
+++ b/src/dynarec/dynarec_arm64_helper.c
@@ -450,8 +450,9 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n)
     BLR(x2); // save LR
 }
 
-void iret_to_epilog(dynarec_arm_t* dyn, int ninst)
+void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits)
 {
+    #warning TODO: is64bits
     MAYUSE(ninst);
     MESSAGE(LOG_DUMP, "IRet to epilog\n");
     // POP IP
@@ -480,7 +481,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
     if(saveflags) {
         STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
     }
-    fpu_pushcache(dyn, ninst, reg);
+    fpu_pushcache(dyn, ninst, reg, 0);
     if(ret!=-2) {
         STPx_S7_preindex(xEmu, savereg, xSP, -16);   // ARM64 stack needs to be 16byte aligned
         STPx_S7_offset(xRAX, xRCX, xEmu, offsetof(x64emu_t, regs[_AX]));    // x9..x15, x16,x17,x18 those needs to be saved by caller
@@ -510,13 +511,48 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
         GO(R8, R9);
         #undef GO
     }
-    fpu_popcache(dyn, ninst, reg);
+    fpu_popcache(dyn, ninst, reg, 0);
     if(saveflags) {
         LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
     }
     SET_NODF();
 }
 
+void call_n(dynarec_arm_t* dyn, int ninst, void* fnc)
+{
+    MAYUSE(fnc);
+    STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
+    fpu_pushcache(dyn, ninst, x3, 1);
+    // x9..x15, x16,x17,x18 those needs to be saved by caller
+    // RDI, RSI, RDX, RCX, R8, R9 are used for function call
+    STPx_S7_preindex(xEmu, xRBX, xSP, -16);   // ARM64 stack needs to be 16byte aligned
+    STPx_S7_offset(xRSP, xRBP, xEmu, offsetof(x64emu_t, regs[_SP]));
+    STPx_S7_offset(xRSI, xRDI, xEmu, offsetof(x64emu_t, regs[_SI]));
+    // prepare regs for native call
+    MOVx_REG(0, xRDI);
+    MOVx_REG(x1, xRSI);
+    MOVx_REG(x2, xRDX);
+    MOVx_REG(x3, xRCX);
+    MOVx_REG(x4, xR8);
+    MOVx_REG(x5, xR9);
+    // native call
+    TABLE64(16, (uintptr_t)fnc);    // using x16 as scratch regs for call address
+    BLR(16);
+    // put return value in x86 regs
+    MOVx_REG(xRAX, 0);
+    MOVx_REG(xRDX, x1);
+    // all done, restore all regs
+    LDPx_S7_postindex(xEmu, xRBX, xSP, 16);
+    #define GO(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A]))
+    GO(RSP, RBP);
+    GO(RSI, RDI);
+    #undef GO
+
+    fpu_popcache(dyn, ninst, x3, 1);
+    LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
+    SET_NODF();
+}
+
 void grab_segdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg, int segment)
 {
     (void)addr;
@@ -1025,7 +1061,7 @@ static void sse_reset(dynarec_arm_t* dyn, int ninst)
     (void)ninst;
 #if STEP > 1
     for (int i=0; i<16; ++i)
-        dyn->ssecache[i] = -1;
+        dyn->ssecache[i] = (i<4)?i:-1;
 #else
     (void)dyn;
 #endif
@@ -1065,7 +1101,7 @@ void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1)
     (void) ninst; (void)s1;
 #if STEP > 1
     int old = -1;
-    for (int i=0; i<8; ++i)
+    for (int i=4; i<8; ++i)
         if(dyn->ssecache[i]!=-1) {
             if (old==-1) {
                 MESSAGE(LOG_DUMP, "\tPurge XMM0..7 Cache ------\n");
@@ -1089,7 +1125,7 @@ static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1)
     (void) ninst; (void)s1;
 #if STEP > 1
     int old = -1;
-    for (int i=0; i<16; ++i)
+    for (int i=4; i<16; ++i)
         if(dyn->ssecache[i]!=-1) {
             if (old==-1) {
                 MESSAGE(LOG_DUMP, "\tPurge SSE Cache ------\n");
@@ -1121,19 +1157,20 @@ static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1)
 }
 #endif
 
-void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1)
+void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not03)
 {
     (void) ninst; (void)s1;
 #if STEP > 1
+    int start = not03?4:0;
     // only SSE regs needs to be push back to xEmu
     int n=0;
-    for (int i=0; i<16; i++)
+    for (int i=start; i<16; i++)
         if(dyn->ssecache[i]!=-1)
             ++n;
     if(!n)
         return;
     MESSAGE(LOG_DUMP, "\tPush XMM Cache (%d)------\n", n);
-    for (int i=0; i<16; ++i)
+    for (int i=start; i<16; ++i)
         if(dyn->ssecache[i]!=-1) {
             VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i]));
         }
@@ -1143,19 +1180,20 @@ void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1)
 #endif
 }
 
-void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1)
+void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not03)
 {
     (void) ninst; (void)s1;
 #if STEP > 1
+    int start = not03?4:0;
     // only SSE regs needs to be pop back from xEmu
     int n=0;
-    for (int i=0; i<16; i++)
+    for (int i=start; i<16; i++)
         if(dyn->ssecache[i]!=-1)
             ++n;
     if(!n)
         return;
     MESSAGE(LOG_DUMP, "\tPop XMM Cache (%d)------\n", n);
-    for (int i=0; i<16; ++i)
+    for (int i=start; i<16; ++i)
         if(dyn->ssecache[i]!=-1) {
             VLDR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i]));
         }
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index aae92a2d..48bc1f1e 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -635,6 +635,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define retn_to_epilog  STEPNAME(retn_to_epilog)
 #define iret_to_epilog  STEPNAME(iret_to_epilog)
 #define call_c          STEPNAME(call_c)
+#define call_n          STEPNAME(call_n)
 #define grab_segdata    STEPNAME(grab_segdata)
 #define emit_cmp8       STEPNAME(emit_cmp8)
 #define emit_cmp16      STEPNAME(emit_cmp16)
@@ -751,8 +752,9 @@ void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst);
 void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst);
 void ret_to_epilog(dynarec_arm_t* dyn, int ninst);
 void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n);
-void iret_to_epilog(dynarec_arm_t* dyn, int ninst);
+void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits);
 void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
+void call_n(dynarec_arm_t* dyn, int ninst, void* fnc);
 void grab_segdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg, int segment);
 void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
@@ -878,8 +880,8 @@ void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3);
 #ifdef HAVE_TRACE
 void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3);
 #endif
-void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1);
-void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1);
+void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not03);
+void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not03);
 
 uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index 70d9576b..73fe8a7a 100755
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -30,7 +30,8 @@ typedef struct x64emu_s {
 	reg64_t     regs[16];
 	x64flags_t  eflags;
     reg64_t     ip;
-    uintptr_t   old_ip;
+    // sse
+    sse_regs_t  xmm[16];
     // fpu / mmx
 	mmx87_regs_t mmx87[8];
 	uint16_t    cw,cw_mask_all;
@@ -42,8 +43,8 @@ typedef struct x64emu_s {
     fpu_ld_t    fpu_ld[8]; // for long double emulation / 80bits fld fst
     fpu_ll_t    fpu_ll[8]; // for 64bits fild / fist sequence
 	fpu_p_reg_t p_regs[8];
-    // sse
-    sse_regs_t  xmm[16];
+    // old ip
+    uintptr_t   old_ip;
     // defered flags
     int         dummy1;     // to align on 64bits with df
     defered_flags_t df;