about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-03-02 20:11:20 +0800
committerGitHub <noreply@github.com>2024-03-02 13:11:20 +0100
commitc5828803c68528eaf26684c0e0a9d8f4b416ae70 (patch)
tree09d1d7622a4d9137fb22de2d005e68e70934f191 /src
parent9883c6464e3df077322d00295a64b63d546d1dc8 (diff)
downloadbox64-c5828803c68528eaf26684c0e0a9d8f4b416ae70.tar.gz
box64-c5828803c68528eaf26684c0e0a9d8f4b416ae70.zip
[LA64_DYNAREC] Added more opcodes with CALL/RET optimization (#1310)
* [LA64_DYNAREC] Added FF /2 CALL Ed opcode

* [LA64_DYNAREC] Added 81/83 /0 ADD opcode

* [LA64_DYNAREC] Added C3 RET opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c73
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c54
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h26
-rw-r--r--src/dynarec/la64/dynarec_la64_pass0.h1
-rw-r--r--src/dynarec/la64/la64_emitter.h6
5 files changed, 159 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index d74b3165..f4562f4e 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -181,6 +181,18 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0x83:
             nextop = F8;
             switch ((nextop >> 3) & 7) {
+                case 0: // ADD
+                    if (opcode == 0x81) {
+                        INST_NAME("ADD Ed, Id");
+                    } else {
+                        INST_NAME("ADD Ed, Ib");
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETED((opcode == 0x81) ? 4 : 1);
+                    if (opcode == 0x81) i64 = F32S; else i64 = F8S;
+                    emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6);
+                    WBACK;
+                    break;
                 case 5: // SUB
                     if (opcode == 0x81) {
                         INST_NAME("SUB Ed, Id");
@@ -239,7 +251,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
             GOCOND(0x70, "J", "ib");
 
-#undef GO
+        #undef GO
 
         case 0x85:
             INST_NAME("TEST Ed, Gd");
@@ -291,6 +303,65 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 }
             }
             break;
+        case 0xC3:
+            INST_NAME("RET");
+            // SETFLAGS(X_ALL, SF_SET);    // Hack, set all flags (to an unknown state...)
+            if (box64_dynarec_safeflags) {
+                READFLAGS(X_PEND); // so instead, force the deferred flags, so it's not too slow, and flags are not lost
+            }
+            BARRIER(BARRIER_FLOAT);
+            ret_to_epilog(dyn, ninst, rex);
+            *need_epilog = 0;
+            *ok = 0;
+            break;
+        case 0xFF:
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 2:
+                    INST_NAME("CALL Ed");
+                    PASS2IF((box64_dynarec_safeflags > 1) || ((ninst && dyn->insts[ninst - 1].x64.set_flags) || ((ninst > 1) && dyn->insts[ninst - 2].x64.set_flags)), 1)
+                    {
+                        READFLAGS(X_PEND); // that's suspicious
+                    }
+                    else
+                    {
+                        SETFLAGS(X_ALL, SF_SET); // Hack to put flag in "don't care" state
+                    }
+                    GETEDz(0);
+                    if (box64_dynarec_callret && box64_dynarec_bigblock > 1) {
+                        BARRIER(BARRIER_FULL);
+                    } else {
+                        BARRIER(BARRIER_FLOAT);
+                        *need_epilog = 0;
+                        *ok = 0;
+                    }
+                    GETIP_(addr);
+                    if (box64_dynarec_callret) {
+                        SET_HASCALLRET();
+                        // Push actual return address
+                        if (addr < (dyn->start + dyn->isize)) {
+                            // there is a next
+                            j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0;
+                            PCADDU12I(x4, ((j64 + 0x800) >> 12) & 0xfffff);
+                            ADDI_D(x4, x4, j64 & 0xfff);
+                            MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64 >> 2);
+                        } else {
+                            MESSAGE(LOG_NONE, "\tCALLRET set return to Jmptable(%p)\n", (void*)addr);
+                            j64 = getJumpTableAddress64(addr);
+                            TABLE64(x4, j64);
+                            LD_D(x4, x4, 0);
+                        }
+                        ADDI_D(xSP, xSP, -16);
+                        ST_D(x4, xSP, 0);
+                        ST_D(xRIP, xSP, 8);
+                    }
+                    PUSH1z(xRIP);
+                    jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index 77c19950..ba18e76d 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -398,6 +398,60 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
     JIRL(xRA, x2, 0x0); // save LR...
 }
 
+void ret_to_epilog(dynarec_la64_t* dyn, int ninst, rex_t rex)
+{
+    MAYUSE(dyn);
+    MAYUSE(ninst);
+    MESSAGE(LOG_DUMP, "Ret to epilog\n");
+    POP1z(xRIP);
+    MVz(x1, xRIP);
+    SMEND();
+    if (box64_dynarec_callret) {
+        // pop the actual return address from RV64 stack
+        LD_D(x2, xSP, 0);     // native addr
+        LD_D(x6, xSP, 8);     // x86 addr
+        ADDI_D(xSP, xSP, 16); // pop
+        BNE(x6, xRIP, 2 * 4); // is it the right address?
+        BR(x2);
+        // not the correct return address, regular jump, but purge the stack first, it's unsync now...
+        ADDI_D(xSP, xSavedSP, -16);
+    }
+
+    uintptr_t tbl = rex.is32bits ? getJumpTable32() : getJumpTable64();
+    MOV64x(x3, tbl);
+    if (!rex.is32bits) {
+        SRLI_D(x2, xRIP, JMPTABL_START3);
+        SLLI_D(x2, x2, 3);
+        ADD_D(x3, x3, x2);
+        LD_D(x3, x3, 0);
+    }
+    MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+    SRLI_D(x2, xRIP, JMPTABL_START2 - 3);
+    AND(x2, x2, x4);
+    ADD_D(x3, x3, x2);
+    LD_D(x3, x3, 0);
+    if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+        MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+    }
+    SRLI_D(x2, xRIP, JMPTABL_START1 - 3);
+    AND(x2, x2, x4);
+    ADD_D(x3, x3, x2);
+    LD_D(x3, x3, 0);
+    if (JMPTABLE_MASK0 < 2048) {
+        ANDI(x2, xRIP, JMPTABLE_MASK0);
+    } else {
+        if (JMPTABLE_MASK1 != JMPTABLE_MASK0) {
+            MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
+        }
+        AND(x2, xRIP, x4);
+    }
+    SLLI_D(x2, x2, 3);
+    ADD_D(x3, x3, x2);
+    LD_D(x2, x3, 0);
+    BR(x2); // save LR
+    CLEARIP();
+}
+
 void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg)
 {
     MAYUSE(fnc);
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 5a1ea5cb..f790ca9d 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -96,6 +96,17 @@
         ed = x1;                                                                                \
     }
 
+#define GETEDz(D)                                                                               \
+    if (MODREG) {                                                                               \
+        ed = TO_LA64((nextop & 7) + (rex.b << 3));                                              \
+        wback = 0;                                                                              \
+    } else {                                                                                    \
+        SMREAD();                                                                               \
+        addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \
+        LDz(x1, wback, fixedaddress);                                                           \
+        ed = x1;                                                                                \
+    }
+
 // Write back ed in wback (if wback not 0)
 #define WBACK                              \
     if (wback) {                           \
@@ -318,6 +329,9 @@
 #ifndef BARRIER
 #define BARRIER(A)
 #endif
+#ifndef SET_HASCALLRET
+#define SET_HASCALLRET()
+#endif
 #ifndef DEFAULT
 #define DEFAULT \
     *ok = -1;   \
@@ -362,6 +376,16 @@
 #endif
 #define CLEARIP() dyn->last_ip = 0
 
+#if STEP < 2
+#define PASS2IF(A, B) if (A)
+#elif STEP == 2
+#define PASS2IF(A, B)                         \
+    if (A) dyn->insts[ninst].pass2choice = B; \
+    if (dyn->insts[ninst].pass2choice == B)
+#else
+#define PASS2IF(A, B) if (dyn->insts[ninst].pass2choice == B)
+#endif
+
 #define MODREG ((nextop & 0xC0) == 0xC0)
 
 void la64_epilog(void);
@@ -381,6 +405,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define geted32        STEPNAME(geted32)
 #define jump_to_epilog STEPNAME(jump_to_epilog)
 #define jump_to_next   STEPNAME(jump_to_next)
+#define ret_to_epilog  STEPNAME(ret_to_epilog)
 #define call_c         STEPNAME(call_c)
 #define emit_test32    STEPNAME(emit_test32)
 #define emit_add32     STEPNAME(emit_add32)
@@ -413,6 +438,7 @@ uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
 // generic x64 helper
 void jump_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst);
 void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits);
+void ret_to_epilog(dynarec_la64_t* dyn, int ninst, rex_t rex);
 void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
 void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
diff --git a/src/dynarec/la64/dynarec_la64_pass0.h b/src/dynarec/la64/dynarec_la64_pass0.h
index fd934d09..af2c29ba 100644
--- a/src/dynarec/la64/dynarec_la64_pass0.h
+++ b/src/dynarec/la64/dynarec_la64_pass0.h
@@ -22,6 +22,7 @@
         dyn->insts[ninst].x64.barrier = A;         \
     } else                                         \
         dyn->insts[ninst].barrier_maybe = 1
+#define SET_HASCALLRET() dyn->insts[ninst].x64.has_callret = 1
 #define NEW_INST                                                                                                  \
     ++dyn->size;                                                                                                  \
     if (dyn->size + 3 >= dyn->cap) {                                                                              \
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index b840b496..f8d8a20e 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -628,6 +628,12 @@ f24-f31  fs0-fs7   Static registers                Callee
     else                      \
         LD_WU(rd, rj, imm12);
 
+#define LDz(rd, rj, imm12)    \
+    if (rex.is32bits)         \
+        LD_WU(rd, rj, imm12); \
+    else                      \
+        LD_D(rd, rj, imm12);
+
 #define SDxw(rd, rj, imm12)  \
     if (rex.w)               \
         ST_D(rd, rj, imm12); \