about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-10-25 15:09:17 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-10-25 15:09:17 +0200
commit0900cc2f5907b55c7cc6d29058765e1acd414e01 (patch)
tree8cdb65aedc797896308fb188e575758170d19785 /src
parent87bf751b115267d7c388c849c43fca6d3f0d0881 (diff)
downloadbox64-0900cc2f5907b55c7cc6d29058765e1acd414e01.tar.gz
box64-0900cc2f5907b55c7cc6d29058765e1acd414e01.zip
[DYNAREC] Various improvment to x87 code and segment handling
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c7
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d9.c24
-rw-r--r--src/dynarec/arm64/dynarec_arm64_dd.c3
-rw-r--r--src/dynarec/arm64/dynarec_arm64_df.c11
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c49
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h7
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c2
-rw-r--r--src/emu/x64emu_private.h3
10 files changed, 95 insertions, 15 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index d078029b..2233c27f 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -1213,7 +1213,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if((nextop&0xC0)==0xC0) {   // reg <= seg
                 LDRw_U12(xRAX+(nextop&7)+(rex.b<<3), xEmu, offsetof(x64emu_t, segs[u8]));
             } else {                    // mem <= seg
-                LDRw_U12(x3, xEmu, offsetof(x64emu_t, segs[u8]));
+                LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[u8]));
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
                 STH(x3, wback, fixedaddress);
                 SMWRITE2();
@@ -1247,7 +1247,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 LDH(x1, wback, fixedaddress);
                 ed = x1;
             }
-            STRw_U12(ed, xEmu, offsetof(x64emu_t, segs[u8]));
+            STRH_U12(ed, xEmu, offsetof(x64emu_t, segs[u8]));
             STRw_U12(wZR, xEmu, offsetof(x64emu_t, segs_serial[u8]));
             break;
         case 0x8F:
@@ -1998,6 +1998,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     *need_epilog = 1;
                 } else {
                     MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip)));
+                    x87_stackcount(dyn, ninst, x1);
                     x87_forget(dyn, ninst, x3, x4, 0);
                     sse_purge07cache(dyn, ninst, x3);
                     SMEND();
@@ -2976,7 +2977,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         LDxw(x1, wback, 0);
                         ed = x1;
                         LDH(x3, wback, rex.w?8:4);
-                        STW(x3, xEmu, offsetof(x64emu_t, segs[_CS]));
+                        STH(x3, xEmu, offsetof(x64emu_t, segs[_CS]));
                         STW(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS]));
                         jump_to_epilog(dyn, 0, ed, ninst);
                         *need_epilog = 0;
diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c
index c41c19ef..84427d20 100644
--- a/src/dynarec/arm64/dynarec_arm64_66.c
+++ b/src/dynarec/arm64/dynarec_arm64_66.c
@@ -639,7 +639,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("MOV EW, Seg");

             nextop=F8;

             u8 = (nextop&0x38)>>3;

-            LDRw_U12(x3, xEmu, offsetof(x64emu_t, segs[u8]));

+            LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[u8]));

             if((nextop&0xC0)==0xC0) {   // reg <= seg

                 UXTHw(xRAX+(nextop&7)+(rex.b<<3), x3);

             } else {                    // mem <= seg

diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c
index 7b695e02..20563efd 100644
--- a/src/dynarec/arm64/dynarec_arm64_d9.c
+++ b/src/dynarec/arm64/dynarec_arm64_d9.c
@@ -179,22 +179,28 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xF0:
             INST_NAME("F2XM1");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             CALL(native_f2xm1, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xF1:
             INST_NAME("FYL2X");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fyl2x, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xF2:
             INST_NAME("FPTAN");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             CALL(native_ftan, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             if(PK(0)==0xdd && PK(1)==0xd8) {
                 MESSAGE(LOG_DUMP, "Optimized next DD D8 fstp st0, st0, not emiting 1\n");
                 u8 = F8;
@@ -211,24 +217,30 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xF3:
             INST_NAME("FPATAN");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fpatan, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xF4:
             INST_NAME("FXTRACT");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0);
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fxtract, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xF5:
             INST_NAME("FPREM1");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fprem1, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xF6:
             INST_NAME("FDECSTP");
@@ -249,16 +261,20 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xF8:
             INST_NAME("FPREM");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fprem, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xF9:
             INST_NAME("FYL2XP1");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fyl2xp1, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xFA:
@@ -274,8 +290,10 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FSINCOS");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0);
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fsincos, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xFC:
             INST_NAME("FRNDINT");
@@ -298,21 +316,27 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xFD:
             INST_NAME("FSCALE");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fscale, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xFE:
             INST_NAME("FSIN");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             CALL(native_fsin, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xFF:
             INST_NAME("FCOS");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             CALL(native_fcos, -1);
+            x87_unstackcount(dyn, ninst, x1, i1);
             break;
 
 
diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c
index 7c689bbf..c56258d1 100644
--- a/src/dynarec/arm64/dynarec_arm64_dd.c
+++ b/src/dynarec/arm64/dynarec_arm64_dd.c
@@ -220,7 +220,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 7:
                     INST_NAME("FNSTSW m2byte");
-                    fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+                    //fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x4, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
                     LDRw_U12(x2, xEmu, offsetof(x64emu_t, top));
                     LDRH_U12(x3, xEmu, offsetof(x64emu_t, sw));
@@ -234,6 +234,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ANDw_mask(x2, x2, 0, 2);
                     }
                     BFIw(x3, x2, 11, 3); // inject TOP at bit 11 (3 bits)
+                    STRH_U12(x3, xEmu, offsetof(x64emu_t, sw));
                     STH(x3, ed, fixedaddress);   // store whole sw flags
                     break;
                 default:
diff --git a/src/dynarec/arm64/dynarec_arm64_df.c b/src/dynarec/arm64/dynarec_arm64_df.c
index 40ad5066..b81c4128 100644
--- a/src/dynarec/arm64/dynarec_arm64_df.c
+++ b/src/dynarec/arm64/dynarec_arm64_df.c
@@ -34,6 +34,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     int64_t j64;
     int64_t fixedaddress;
     int unscaled;
+    int i1;
 
     MAYUSE(s0);
     MAYUSE(v2);
@@ -57,6 +58,14 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xE0:
             INST_NAME("FNSTSW AX");
             LDRw_U12(x2, xEmu, offsetof(x64emu_t, top));
+            if(dyn->n.x87stack) {
+                if(dyn->n.x87stack>0) {
+                    SUBw_U12(x2, x2, dyn->n.x87stack);
+                } else {
+                    ADDw_U12(x2, x2, -dyn->n.x87stack);
+                }
+                ANDw_mask(x2, x2, 0, 2);  //mask=7
+            }
             LDRH_U12(x1, xEmu, offsetof(x64emu_t, sw));
             BFIw(x1, x2, 11, 3); // inject top
             STRH_U12(x1, xEmu, offsetof(x64emu_t, sw));
@@ -315,10 +324,12 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 6:
                     INST_NAME("FBSTP tbytes, ST0");
+                    i1 = x87_stackcount(dyn, ninst, x1);
                     x87_forget(dyn, ninst, x1, x2, 0);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
                     if(ed!=x1) {MOVx_REG(x1, ed);}
                     CALL(fpu_fbst, -1);
+                    x87_unstackcount(dyn, ninst, x1, i1);
                     X87_POP_OR_FAIL(dyn, ninst, x3);
                     break;
                 case 7:
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index a2a4d590..f886e75a 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -866,11 +866,11 @@ static void x87_reset(dynarec_arm_t* dyn)
             dyn->n.neoncache[i].v = 0;
 }
 
-void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch)
+int x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch)
 {
     MAYUSE(scratch);
     if(!dyn->n.x87stack)
-        return;
+        return 0;
     if(dyn->n.mmxcount)
         mmx_purgecache(dyn, ninst, 0, scratch);
     MESSAGE(LOG_DUMP, "\tSynch x87 Stackcount (%d)\n", dyn->n.x87stack);
@@ -893,10 +893,45 @@ void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch)
     ANDw_mask(scratch, scratch, 0, 2);  //mask=7
     STRw_U12(scratch, xEmu, offsetof(x64emu_t, top));
     // reset x87stack, but not the stack count of neoncache
+    int ret = dyn->n.x87stack;
     dyn->n.x87stack = 0;
     dyn->n.stack_next -= dyn->n.stack;
     dyn->n.stack = 0;
     MESSAGE(LOG_DUMP, "\t------x87 Stackcount\n");
+    return ret;
+}
+
+void x87_unstackcount(dynarec_arm_t* dyn, int ninst, int scratch, int count)
+{
+    MAYUSE(scratch);
+    if(!count)
+        return;
+    if(dyn->n.mmxcount)
+        mmx_purgecache(dyn, ninst, 0, scratch);
+    MESSAGE(LOG_DUMP, "\tUnsynch x87 Stackcount (%d)\n", count);
+    int a = -count;
+    // Add x87stack to emu fpu_stack
+    LDRw_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack));
+    if(a>0) {
+        ADDw_U12(scratch, scratch, a);
+    } else {
+        SUBw_U12(scratch, scratch, -a);
+    }
+    STRw_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack));
+    // Sub x87stack to top, with and 7
+    LDRw_U12(scratch, xEmu, offsetof(x64emu_t, top));
+    if(a>0) {
+        SUBw_U12(scratch, scratch, a);
+    } else {
+        ADDw_U12(scratch, scratch, -a);
+    }
+    ANDw_mask(scratch, scratch, 0, 2);  //mask=7
+    STRw_U12(scratch, xEmu, offsetof(x64emu_t, top));
+    // reset x87stack, but not the stack count of neoncache
+    dyn->n.x87stack = count;
+    dyn->n.stack = count;
+    dyn->n.stack_next += dyn->n.stack;
+    MESSAGE(LOG_DUMP, "\t------x87 Unstackcount\n");
 }
 
 int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b)
@@ -1252,7 +1287,6 @@ void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
 
 void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
 {
-    x87_stackcount(dyn, ninst, s1);
     int ret = -1;
     for (int i=0; (i<8) && (ret==-1); ++i)
         if(dyn->n.x87cache[i] == st)
@@ -1270,8 +1304,13 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
     // Get top
     LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
     // Update
-    if(st) {
-        ADDw_U12(s2, s2, st);
+    int ast = st - dyn->n.x87stack;
+    if(ast) {
+        if(ast>0) {
+            ADDw_U12(x2, x2, ast);
+        } else {
+            SUBw_U12(x2, x2, -ast);
+        }
         ANDw_mask(s2, s2, 0, 2); //mask=7    // (emu->top + i)&7
     }
     if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F) {
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index c794a161..b3f02ee5 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1030,6 +1030,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define x87_forget      STEPNAME(x87_forget)
 #define x87_reget_st    STEPNAME(x87_reget_st)
 #define x87_stackcount  STEPNAME(x87_stackcount)
+#define x87_unstackcount  STEPNAME(x87_unstackcount)
 #define x87_swapreg     STEPNAME(x87_swapreg)
 #define x87_setround    STEPNAME(x87_setround)
 #define x87_restoreround STEPNAME(x87_restoreround)
@@ -1146,8 +1147,10 @@ void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint
 void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
 
 // x87 helper
-// cache of the local stack counter, to avoid update at every call
-void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch);
+// cache of the local stack counter, to avoid update at every call, return old internal stack counter
+int x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch);
+// revert local stack counter to previous version (return from x87_stackcount)
+void x87_unstackcount(dynarec_arm_t* dyn, int ninst, int scratch, int count);
 // fpu push. Return the Dd value to be used
 int x87_do_push(dynarec_arm_t* dyn, int ninst, int s1, int t);
 // fpu push. Do not allocate a cache register. Needs a scratch register to do x87stack synch (or 0 to not do it)
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 08945265..b2a0c420 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -418,7 +418,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 LHU(x1, ed, fixedaddress);
                 ed = x1;
             }
-            SW(ed, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3]));
+            SH(ed, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3]));
             SW(xZR, xEmu, offsetof(x64emu_t, segs_serial[(nextop&0x38)>>3]));
             break;
         case 0x8F:
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index 9e8b4511..39a1aab9 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -1173,7 +1173,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         LDxw(x1, wback, 0);
                         ed = x1;
                         LHU(x3, wback, rex.w?8:4);
-                        SW(x3, xEmu, offsetof(x64emu_t, segs[_CS]));
+                        SH(x3, xEmu, offsetof(x64emu_t, segs[_CS]));
                         SW(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS]));
                         jump_to_epilog(dyn, 0, ed, ninst);
                         *need_epilog = 0;
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index 25601105..c5c4f4d9 100644
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -86,7 +86,8 @@ typedef struct x64emu_s {
     uintptr_t   prev2_ip;
     #endif
     // segments
-    uint32_t    segs[6];        // only 32bits value?
+    uint16_t    segs[6];        // only 32bits value?
+    uint16_t    dummy_seg6, dummy_seg7; // to stay aligned
     uintptr_t   segs_offs[6];   // computed offset associate with segment
     uint32_t    segs_serial[6];  // are seg offset clean (not 0) or does they need to be re-computed (0)? For GS, serial need to be the same as context->sel_serial
     // parent context