about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-03-03 01:10:44 +0800
committerGitHub <noreply@github.com>2024-03-02 18:10:44 +0100
commita86f5972398dcebcfd718fccc1a956d7045c503b (patch)
tree00abf5c0acd76f7f3e2c2cd475d8986b38ba63ca
parentb72d3a77e91b4a80576c97b3da5cae34ef072c2d (diff)
downloadbox64-a86f5972398dcebcfd718fccc1a956d7045c503b.tar.gz
box64-a86f5972398dcebcfd718fccc1a956d7045c503b.zip
[LA64_DYNAREC] Added CC native call support, fixed call_c (#1312)
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c50
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c50
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h81
-rw-r--r--src/dynarec/la64/la64_epilog.S8
4 files changed, 163 insertions, 26 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index f4562f4e..7a5eb0f6 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -314,6 +314,56 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             *need_epilog = 0;
             *ok = 0;
             break;
+        case 0xCC:
+            SETFLAGS(X_ALL, SF_SET);
+            SKIPTEST(x1);
+            if (PK(0) == 'S' && PK(1) == 'C') {
+                addr += 2;
+                BARRIER(BARRIER_FLOAT);
+                INST_NAME("Special Box64 instruction");
+                if (PK64(0) == 0) {
+                    addr += 8;
+                    MESSAGE(LOG_DEBUG, "Exit x64 Emu\n");
+                    MOV64x(x1, 1);
+                    ST_W(x1, xEmu, offsetof(x64emu_t, quit));
+                    *ok = 0;
+                    *need_epilog = 1;
+                } else {
+                    MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip)));
+                    x87_forget(dyn, ninst, x3, x4, 0);
+                    sse_purge07cache(dyn, ninst, x3);
+
+                    // FIXME: Even the basic support of isSimpleWrapper is disabled for now.
+
+                    GETIP(ip + 1); // read the 0xCC
+                    STORE_XEMU_CALL(x3);
+                    ADDI_D(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip
+                    CALL_S(x64Int3, -1);
+                    LOAD_XEMU_CALL();
+                    addr += 8 + 8;
+                    TABLE64(x3, addr); // expected return address
+                    BNE_MARK(xRIP, x3);
+                    LD_W(w1, xEmu, offsetof(x64emu_t, quit));
+                    CBZ_NEXT(w1);
+                    MARK;
+                    jump_to_epilog_fast(dyn, 0, xRIP, ninst);
+                }
+            } else {
+                if (!box64_ignoreint3) {
+                    INST_NAME("INT 3");
+                    // check if TRAP signal is handled
+                    LD_D(x1, xEmu, offsetof(x64emu_t, context));
+                    MOV64x(x2, offsetof(box64context_t, signals[SIGTRAP]));
+                    ADD_D(x2, x2, x1);
+                    LD_D(x3, x2, 0);
+                    CBZ_NEXT(x3);
+                    GETIP(ip);
+                    STORE_XEMU_CALL(x3);
+                    CALL(native_int3, -1);
+                    LOAD_XEMU_CALL();
+                }
+            }
+            break;
         case 0xFF:
             nextop = F8;
             switch ((nextop >> 3) & 7) {
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index ba18e76d..f8cd8de1 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -340,6 +340,25 @@ void jump_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst)
     BR(x2);
 }
 
+void jump_to_epilog_fast(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst)
+{
+    MAYUSE(dyn);
+    MAYUSE(ip);
+    MAYUSE(ninst);
+    MESSAGE(LOG_DUMP, "Jump to epilog\n");
+
+    if (reg) {
+        if (reg != xRIP) {
+            MV(xRIP, reg);
+        }
+    } else {
+        GETIP_(ip);
+    }
+    TABLE64(x2, (uintptr_t)la64_epilog_fast);
+    SMEND();
+    BR(x2);
+}
+
 void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits)
 {
     MAYUSE(dyn);
@@ -465,14 +484,15 @@ void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav
         ADDI_D(xSP, xSP, -16); // RV64 stack needs to be 16byte aligned
         ST_D(xEmu, xSP, 0);
         ST_D(savereg, xSP, 8);
-        // x5..x8, x10..x17, x28..x31 those needs to be saved by caller
+        // $r4..$r20 needs to be saved by caller
         STORE_REG(RAX);
         STORE_REG(RCX);
         STORE_REG(RDX);
-        STORE_REG(R12);
-        STORE_REG(R13);
-        STORE_REG(R14);
-        STORE_REG(R15);
+        STORE_REG(RBX);
+        STORE_REG(RSP);
+        STORE_REG(RBP);
+        STORE_REG(RSI);
+        STORE_REG(RDI);
         ST_D(xRIP, xEmu, offsetof(x64emu_t, ip));
     }
     TABLE64(reg, (uintptr_t)fnc);
@@ -489,10 +509,11 @@ void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav
         GO(RAX);
         GO(RCX);
         GO(RDX);
-        GO(R12);
-        GO(R13);
-        GO(R14);
-        GO(R15);
+        GO(RBX);
+        GO(RSP);
+        GO(RBP);
+        GO(RSI);
+        GO(RDI);
         if (ret != xRIP)
             LD_D(xRIP, xEmu, offsetof(x64emu_t, ip));
 #undef GO
@@ -509,6 +530,17 @@ void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav
     dyn->last_ip = 0;
 }
 
+void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st)
+{
+    // TODO
+}
+
+// purge the SSE cache for XMM0..XMM7 (to use before function native call)
+void sse_purge07cache(dynarec_la64_t* dyn, int ninst, int s1)
+{
+    // TODO
+}
+
 void fpu_pushcache(dynarec_la64_t* dyn, int ninst, int s1, int not07)
 {
     // TODO
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index f790ca9d..9bd7ef04 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -222,6 +222,22 @@
 #define MARKLOCK    dyn->insts[ninst].marklock = dyn->native_size
 #define GETMARKLOCK dyn->insts[ninst].marklock
 
+#define Bxx_gen(OP, M, reg1, reg2)   \
+    j64 = GET##M - dyn->native_size; \
+    B##OP(reg1, reg2, j64)
+
+#define BxxZ_gen(OP, M, reg1, reg2)  \
+    j64 = GET##M - dyn->native_size; \
+    B##OP##Z(reg1, j64)
+
+// Branch to MARK if reg1!=reg2 (use j64)
+#define BNE_MARK(reg1, reg2) Bxx_gen(NE, MARK, reg1, reg2)
+
+// Branch to NEXT if reg1==0 (use j64)
+#define CBZ_NEXT(reg1)                                                        \
+    j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
+    BEQZ(reg1, j64)
+
 #define IFX(A)      if ((dyn->insts[ninst].x64.gen_flags & (A)))
 #define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || !dyn->insts[ninst].x64.gen_flags))
 #define IFXX(A)     if ((dyn->insts[ninst].x64.gen_flags == (A)))
@@ -231,6 +247,22 @@
 #define STORE_REG(A) ST_D(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 #define LOAD_REG(A)  LD_D(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 
+// Need to also store current value of some register, as they may be used by functions like setjmp
+#define STORE_XEMU_CALL(s0) \
+    STORE_REG(RBX);         \
+    STORE_REG(RDX);         \
+    STORE_REG(RSP);         \
+    STORE_REG(RBP);         \
+    STORE_REG(RDI);         \
+    STORE_REG(RSI);         \
+    STORE_REG(R8);          \
+    STORE_REG(R9);          \
+    STORE_REG(R10);         \
+    STORE_REG(R11);
+
+#define LOAD_XEMU_CALL()
+
+
 #define SET_DFNONE()                             \
     if (!dyn->f.dfnone) {                        \
         ST_W(xZR, xEmu, offsetof(x64emu_t, df)); \
@@ -389,6 +421,7 @@
 #define MODREG ((nextop & 0xC0) == 0xC0)
 
 void la64_epilog(void);
+void la64_epilog_fast(void);
 void* la64_next(x64emu_t* emu, uintptr_t addr);
 
 #ifndef STEPNAME
@@ -401,24 +434,29 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 
 #define dynarec64_00 STEPNAME(dynarec64_00)
 
-#define geted          STEPNAME(geted)
-#define geted32        STEPNAME(geted32)
-#define jump_to_epilog STEPNAME(jump_to_epilog)
-#define jump_to_next   STEPNAME(jump_to_next)
-#define ret_to_epilog  STEPNAME(ret_to_epilog)
-#define call_c         STEPNAME(call_c)
-#define emit_test32    STEPNAME(emit_test32)
-#define emit_add32     STEPNAME(emit_add32)
-#define emit_add32c    STEPNAME(emit_add32c)
-#define emit_add8      STEPNAME(emit_add8)
-#define emit_add8c     STEPNAME(emit_add8c)
-#define emit_sub32     STEPNAME(emit_sub32)
-#define emit_sub32c    STEPNAME(emit_sub32c)
-#define emit_sub8      STEPNAME(emit_sub8)
-#define emit_sub8c     STEPNAME(emit_sub8c)
+#define geted               STEPNAME(geted)
+#define geted32             STEPNAME(geted32)
+#define jump_to_epilog      STEPNAME(jump_to_epilog)
+#define jump_to_epilog_fast STEPNAME(jump_to_epilog_fast)
+#define jump_to_next        STEPNAME(jump_to_next)
+#define ret_to_epilog       STEPNAME(ret_to_epilog)
+#define call_c              STEPNAME(call_c)
+#define emit_test32         STEPNAME(emit_test32)
+#define emit_add32          STEPNAME(emit_add32)
+#define emit_add32c         STEPNAME(emit_add32c)
+#define emit_add8           STEPNAME(emit_add8)
+#define emit_add8c          STEPNAME(emit_add8c)
+#define emit_sub32          STEPNAME(emit_sub32)
+#define emit_sub32c         STEPNAME(emit_sub32c)
+#define emit_sub8           STEPNAME(emit_sub8)
+#define emit_sub8c          STEPNAME(emit_sub8c)
 
 #define emit_pf STEPNAME(emit_pf)
 
+
+#define x87_forget       STEPNAME(x87_forget)
+#define sse_purge07cache STEPNAME(sse_purge07cache)
+
 #define fpu_pushcache       STEPNAME(fpu_pushcache)
 #define fpu_popcache        STEPNAME(fpu_popcache)
 #define fpu_reset_cache     STEPNAME(fpu_reset_cache)
@@ -437,6 +475,7 @@ uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
 
 // generic x64 helper
 void jump_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst);
+void jump_to_epilog_fast(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst);
 void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits);
 void ret_to_epilog(dynarec_la64_t* dyn, int ninst, rex_t rex);
 void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
@@ -464,6 +503,13 @@ void fpu_unreflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3);
 void fpu_pushcache(dynarec_la64_t* dyn, int ninst, int s1, int not07);
 void fpu_popcache(dynarec_la64_t* dyn, int ninst, int s1, int not07);
 
+// refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached)
+void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st);
+
+// SSE/SSE2 helpers
+// purge the XMM0..XMM7 cache (before function call)
+void sse_purge07cache(dynarec_la64_t* dyn, int ninst, int s1);
+
 void CacheTransform(dynarec_la64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3);
 
 #if STEP < 2
@@ -572,6 +618,11 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         ST_W(xZR, xEmu, offsetof(x64emu_t, test.clean)); \
     }
 
+#define SKIPTEST(s1)                                     \
+    if (box64_dynarec_test) {                            \
+        ST_W(xZR, xEmu, offsetof(x64emu_t, test.clean)); \
+    }
+
 #define GOTEST(s1, s2)                                 \
     if (box64_dynarec_test) {                          \
         MOV32w(s2, 1);                                 \
diff --git a/src/dynarec/la64/la64_epilog.S b/src/dynarec/la64/la64_epilog.S
index 97f5e9ac..14f5dc4e 100644
--- a/src/dynarec/la64/la64_epilog.S
+++ b/src/dynarec/la64/la64_epilog.S
@@ -7,8 +7,10 @@
 .align 4
 
 .global la64_epilog
+.global la64_epilog_fast
+
 la64_epilog:
-    //update register -> emu
+    // update register -> emu
     st.d   $r12, $r4, (8 * 0)
     st.d   $r13, $r4, (8 * 1)
     st.d   $r14, $r4, (8 * 2)
@@ -26,7 +28,9 @@ la64_epilog:
     st.d   $r29, $r4, (8 * 14)
     st.d   $r30, $r4, (8 * 15)
     st.d   $r31, $r4, (8 * 16) // xFlags
-    st.d   $r20, $r4, (8 * 17) // put back reg value in emu, including EIP (so x27 must be EIP now)
+    st.d   $r20, $r4, (8 * 17) // put back reg value in emu, including EIP (so $r20 must be EIP now)
+    // fallback to epilog_fast now, just restoring saved regs
+la64_epilog_fast:
     addi.d $sp, $r22, 0       // restore save sp from xSavedSP
     // restore all used register
     ld.d   $r1,  $sp, (8 * 0) // load ra