about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-07-11 17:22:27 +0200
committerptitSeb <sebastien.chev@gmail.com>2021-07-11 17:22:27 +0200
commita7422d11edc92f16e8a0d6a479d5efb016a70c19 (patch)
treec67c3478dcde19f781b692cdaad844f8c4231147
parentb39fbc0cfddea724f877756c587571127bd22edf (diff)
downloadbox64-a7422d11edc92f16e8a0d6a479d5efb016a70c19.tar.gz
box64-a7422d11edc92f16e8a0d6a479d5efb016a70c19.zip
Splitted mmx and x87 register (from box86)
-rwxr-xr-xsrc/dynarec/dynarec_arm64_functions.c12
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.c20
-rwxr-xr-xsrc/emu/x64emu.c8
-rwxr-xr-xsrc/emu/x64emu_private.h5
-rwxr-xr-xsrc/emu/x64run_private.c4
-rw-r--r--src/emu/x64rundb.c4
-rw-r--r--src/emu/x64rundf.c8
-rwxr-xr-xsrc/emu/x87emu_private.c34
-rwxr-xr-xsrc/emu/x87emu_private.h6
-rwxr-xr-xsrc/include/regs.h6
10 files changed, 62 insertions, 45 deletions
diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c
index 0751ae71..9333d2b4 100755
--- a/src/dynarec/dynarec_arm64_functions.c
+++ b/src/dynarec/dynarec_arm64_functions.c
@@ -28,7 +28,7 @@
 
 void arm_fstp(x64emu_t* emu, void* p)
 {
-    if(ST0.q!=STld(0).ref)
+    if(ST0.q!=STld(0).uref)
         D2LD(&ST0.d, p);
     else
         memcpy(p, &STld(0).ld, 10);
@@ -108,8 +108,8 @@ void arm_fild64(x64emu_t* emu, int64_t* ed)
     int64_t tmp;
     memcpy(&tmp, ed, sizeof(tmp));
     ST0.d = tmp;
-    STll(0).ll = tmp;
-    STll(0).ref = ST0.q;
+    STll(0).sq = tmp;
+    STll(0).sref = ST0.sq;
 }
 
 void arm_fbstp(x64emu_t* emu, uint8_t* ed)
@@ -120,8 +120,8 @@ void arm_fbstp(x64emu_t* emu, uint8_t* ed)
 void arm_fistp64(x64emu_t* emu, int64_t* ed)
 {
     // used of memcpy to avoid aligments issues
-    if((uint64_t)STll(0).ref==ST(0).q) {
-        memcpy(ed, &STll(0).ll, sizeof(int64_t));
+    if(STll(0).sref==ST(0).sq) {
+        memcpy(ed, &STll(0).sq, sizeof(int64_t));
     } else {
         int64_t tmp;
         if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d))
@@ -143,7 +143,7 @@ void arm_fld(x64emu_t* emu, uint8_t* ed)
 {
     memcpy(&STld(0).ld, ed, 10);
     LD2D(&STld(0), &ST(0).d);
-    STld(0).ref = ST0.q;
+    STld(0).uref = ST0.q;
 }
 
 void arm_ud(x64emu_t* emu)
diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c
index 8cac4c0a..ea8274ae 100755
--- a/src/dynarec/dynarec_arm64_helper.c
+++ b/src/dynarec/dynarec_arm64_helper.c
@@ -741,7 +741,7 @@ void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
     if(ret!=0) {
         // --- set values
         // prepare offset to fpu => s1
-        ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
+        ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87));
         // Get top
         // loop all cache entries
         for (int i=0; i<8; ++i)
@@ -772,7 +772,7 @@ static void x87_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int
     if(!ret)    // nothing to do
         return;
     // prepare offset to fpu => s1
-    ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
+    ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87));
     // Get top
     LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
     // loop all cache entries
@@ -806,7 +806,7 @@ int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
     // found, setup and grab the value
     dyn->x87cache[ret] = st;
     dyn->x87reg[ret] = fpu_get_reg_x87(dyn);
-    ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
+    ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87));
     LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
     int a = st - dyn->x87stack;
     if(a) {
@@ -851,7 +851,7 @@ void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
         return;
     MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st);
     // prepare offset to fpu => s1
-    ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
+    ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87));
     // Get top
     LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
     // Update
@@ -879,7 +879,7 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
         return;
     MESSAGE(LOG_DUMP, "\tForget x87 Cache for ST%d\n", st);
     // prepare offset to fpu => s1
-    ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
+    ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87));
     // Get top
     LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
     // Update
@@ -908,7 +908,7 @@ void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
         if(dyn->x87cache[i]==st) {
             // refresh the value
             MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st);
-            ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
+            ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87));
             LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
             int a = st - dyn->x87stack;
             if(a<0) {
@@ -932,7 +932,7 @@ void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
     // found, setup and grab the value
     dyn->x87cache[ret] = st;
     dyn->x87reg[ret] = fpu_get_reg_x87(dyn);
-    ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
+    ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87));
     LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
     int a = st - dyn->x87stack;
     if(a<0) {
@@ -1006,7 +1006,7 @@ int mmx_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a)
     if(dyn->mmxcache[a]!=-1)
         return dyn->mmxcache[a];
     int ret = dyn->mmxcache[a] = fpu_get_reg_emm(dyn, a);
-    VLDR64_U12(ret, xEmu, offsetof(x64emu_t, mmx87[a]));
+    VLDR64_U12(ret, xEmu, offsetof(x64emu_t, mmx[a]));
     return ret;
 #else
     (void)dyn; (void)a;
@@ -1039,7 +1039,7 @@ void mmx_purgecache(dynarec_arm_t* dyn, int ninst, int s1)
                 MESSAGE(LOG_DUMP, "\tPurge MMX Cache ------\n");
                 ++old;
             }
-            VSTR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx87[i]));
+            VSTR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i]));
             fpu_free_reg(dyn, dyn->mmxcache[i]);
             dyn->mmxcache[i] = -1;
         }
@@ -1057,7 +1057,7 @@ static void mmx_reflectcache(dynarec_arm_t* dyn, int ninst, int s1)
 #if STEP > 1
     for (int i=0; i<8; ++i)
         if(dyn->mmxcache[i]!=-1) {
-            VLDR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx87[i]));
+            VLDR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i]));
         }
 #else
     (void)dyn;
diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c
index 75ce18fd..c5ef1b10 100755
--- a/src/emu/x64emu.c
+++ b/src/emu/x64emu.c
@@ -218,12 +218,12 @@ void CloneEmu(x64emu_t *newemu, const x64emu_t* emu)
     newemu->old_ip = emu->old_ip;
     memcpy(newemu->segs, emu->segs, sizeof(emu->segs));
     memset(newemu->segs_serial, 0, sizeof(newemu->segs_serial));
-	memcpy(newemu->mmx87, emu->mmx87, sizeof(emu->mmx87));
+	memcpy(newemu->x87, emu->x87, sizeof(emu->x87));
+	memcpy(newemu->mmx, emu->mmx, sizeof(emu->mmx));
     memcpy(newemu->fpu_ld, emu->fpu_ld, sizeof(emu->fpu_ld));
     memcpy(newemu->fpu_ll, emu->fpu_ll, sizeof(emu->fpu_ll));
 	memcpy(newemu->p_regs, emu->p_regs, sizeof(emu->p_regs));
 	newemu->cw = emu->cw;
-    newemu->cw_mask_all = emu->cw_mask_all;
 	memcpy(&newemu->sw, &emu->sw, sizeof(emu->sw));
 	newemu->top = emu->top;
     newemu->fpu_stack = emu->fpu_stack;
@@ -353,7 +353,7 @@ const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip)
     if(trace_emm) {
         // do emm reg if needed
         for(int i=0; i<8; ++i) {
-            sprintf(tmp, "mm%d:%016lx", i, emu->mmx87[i].q);
+            sprintf(tmp, "mm%d:%016lx", i, emu->mmx[i].q);
             strcat(buff, tmp);
             if ((i&3)==3) strcat(buff, "\n"); else strcat(buff, " ");
         }
@@ -370,7 +370,7 @@ const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip)
     // start with FPU regs...
     if(emu->fpu_stack) {
         for (int i=0; i<emu->fpu_stack; i++) {
-            sprintf(tmp, "ST%d=%f", i, emu->mmx87[(emu->top+i)&7].d);
+            sprintf(tmp, "ST%d=%f", i, ST(i).d);
             strcat(buff, tmp);
             int c = 10-strlen(tmp);
             if(c<1) c=1;
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index 73fe8a7a..cf81d316 100755
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -33,8 +33,9 @@ typedef struct x64emu_s {
     // sse
     sse_regs_t  xmm[16];
     // fpu / mmx
-	mmx87_regs_t mmx87[8];
-	uint16_t    cw,cw_mask_all;
+	mmx87_regs_t x87[8];
+	mmx87_regs_t mmx[8];
+	uint16_t    cw;
 	x87flags_t  sw;
 	uint32_t    top;        // top is part of sw, but it's faster to have it separatly
     int         fpu_stack;
diff --git a/src/emu/x64run_private.c b/src/emu/x64run_private.c
index 4eeddd06..ef697968 100755
--- a/src/emu/x64run_private.c
+++ b/src/emu/x64run_private.c
@@ -1241,7 +1241,7 @@ mmx87_regs_t* GetEm(x64emu_t *emu, rex_t rex, uint8_t v, uint8_t delta)
 {
     uint8_t m = v&0xC7;    // filter Ed
     if(m>=0xC0) {
-         return &emu->mmx87[m&0x07];
+         return &emu->mmx[m&0x07];
     } else return (mmx87_regs_t*)GetECommon(emu, rex, m, delta);
 }
 
@@ -1289,7 +1289,7 @@ mmx87_regs_t* GetGm(x64emu_t *emu, rex_t rex, uint8_t v)
     (void)rex;
 
     uint8_t m = (v&0x38)>>3;
-    return &emu->mmx87[m&7];
+    return &emu->mmx[m&7];
 }
 
 sse_regs_t* GetGx(x64emu_t *emu, rex_t rex, uint8_t v)
diff --git a/src/emu/x64rundb.c b/src/emu/x64rundb.c
index 0cd9c5dd..cb5c180c 100644
--- a/src/emu/x64rundb.c
+++ b/src/emu/x64rundb.c
@@ -163,11 +163,11 @@ int RunDB(x64emu_t *emu, rex_t rex)
                 fpu_do_push(emu);

                 memcpy(&STld(0).ld, ED, 10);

                 LD2D(&STld(0), &ST(0).d);

-                STld(0).ref = ST0.q;

+                STld(0).uref = ST0.q;

                 break;

             case 7: /* FSTP tbyte */

                 GETED(0);

-                if(ST0.q!=STld(0).ref)

+                if(ST0.q!=STld(0).uref)

                     D2LD(&ST0.d, ED);

                 else

                     memcpy(ED, &STld(0).ld, 10);

diff --git a/src/emu/x64rundf.c b/src/emu/x64rundf.c
index 7abf665a..a059cfd8 100644
--- a/src/emu/x64rundf.c
+++ b/src/emu/x64rundf.c
@@ -152,8 +152,8 @@ int RunDF(x64emu_t *emu, rex_t rex)
             tmp64s = ED->sq[0];

             fpu_do_push(emu);

             ST0.d = tmp64s;

-            STll(0).ll = tmp64s;

-            STll(0).ref = ST0.q;

+            STll(0).sq = tmp64s;

+            STll(0).sref = ST0.sq;

             break;

         case 6: /* FBSTP tbytes, ST0 */

             GETED(0);

@@ -162,8 +162,8 @@ int RunDF(x64emu_t *emu, rex_t rex)
             break;

         case 7: /* FISTP i64 */

             GETED(0);

-            if(STll(0).ref==ST(0).sq)

-                ED->sq[0] = STll(0).ll;

+            if(STll(0).sref==ST(0).sq)

+                ED->sq[0] = STll(0).sq;

             else {

                 if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, -(double)(int64_t)0x7fffffffffffffffLL) || !isfinite(ST0.d))

                     ED->sq[0] = 0x8000000000000000LL;

diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c
index 0393ac01..0ab63a6e 100755
--- a/src/emu/x87emu_private.c
+++ b/src/emu/x87emu_private.c
@@ -21,7 +21,7 @@ void fpu_do_free(x64emu_t* emu, int i)
 
 void reset_fpu(x64emu_t* emu)
 {
-    memset(emu->mmx87, 0, sizeof(emu->mmx87));
+    memset(emu->x87, 0, sizeof(emu->x87));
     memset(emu->fpu_ld, 0, sizeof(emu->fpu_ld));
     emu->cw = 0x37F;
     emu->sw.x16 = 0x0000;
@@ -177,8 +177,8 @@ void D2LD(void* d, void* ld)
 	int32_t sign80 = (s.ud[1]&0x80000000)?1:0;
 	int32_t exp80 =  s.ud[1]&0x7ff00000;
 	int32_t exp80final = (exp80>>20);
-	int64_t mant80 = s.q&0x000fffffffffffffL;
-	int64_t mant80final = (mant80 << 11);
+	uint64_t mant80 = s.q&0x000fffffffffffffL;
+	uint64_t mant80final = (mant80 << 11);
     if(exp80final==0x7ff) {
         // NaN and Infinite
         exp80final = 0x7fff;
@@ -294,7 +294,11 @@ void fpu_fxsave32(x64emu_t* emu, void* ed)
 {
     xsave32_t *p = (xsave32_t*)ed;
     // should save flags & all
-    emu->sw.f.F87_TOP = emu->top&7;
+    int top = emu->top&7;
+    int stack = 8-top;
+    if(top==0)  // check if stack is full or empty, based on tag[0]
+        stack = (emu->p_regs[0].tag)?8:0;
+    emu->sw.f.F87_TOP = top;
     p->ControlWord = emu->cw;
     p->StatusWord = emu->sw.x16;
     uint8_t tags = 0;
@@ -310,7 +314,7 @@ void fpu_fxsave32(x64emu_t* emu, void* ed)
     p->MxCsr_Mask = 0;
     // copy FPU/MMX regs...
     for(int i=0; i<8; ++i)
-        memcpy(&p->FloatRegisters[i].q[0], &emu->mmx87[0], sizeof(emu->mmx87[0]));
+        memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t));
     // copy SSE regs
     memcpy(&p->XmmRegisters[0], &emu->xmm[0], sizeof(emu->xmm));
 }
@@ -319,7 +323,11 @@ void fpu_fxsave64(x64emu_t* emu, void* ed)
 {
     xsave64_t *p = (xsave64_t*)ed;
     // should save flags & all
-    emu->sw.f.F87_TOP = emu->top&7;
+    int top = emu->top&7;
+    int stack = 8-top;
+    if(top==0)  // check if stack is full or empty, based on tag[0]
+        stack = (emu->p_regs[0].tag)?8:0;
+    emu->sw.f.F87_TOP = top;
     p->ControlWord = emu->cw;
     p->StatusWord = emu->sw.x16;
     uint8_t tags = 0;
@@ -333,7 +341,7 @@ void fpu_fxsave64(x64emu_t* emu, void* ed)
     p->MxCsr_Mask = 0;
     // copy FPU/MMX regs...
     for(int i=0; i<8; ++i)
-        memcpy(&p->FloatRegisters[i].q[0], &emu->mmx87[0], sizeof(emu->mmx87[0]));
+        memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t));
     // copy SSE regs
     memcpy(&p->XmmRegisters[0], &emu->xmm[0], sizeof(emu->xmm));
 }
@@ -347,9 +355,13 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed)
     uint8_t tags = p->TagWord;
     for(int i=0; i<8; ++i)
         emu->p_regs[i].tag = (tags>>(i*2))?0:0b11;
+    int top = emu->top&7;
+    int stack = 8-top;
+    if(top==0)  // check if stack is full or empty, based on tag[0]
+        stack = (emu->p_regs[0].tag)?8:0;
     // copy back MMX regs...
     for(int i=0; i<8; ++i)
-        memcpy(&emu->mmx87[i], &p->FloatRegisters[i].q[0], sizeof(emu->mmx87[0]));
+        memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t));
     // copy SSE regs
     memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm));
 }
@@ -363,9 +375,13 @@ void fpu_fxrstor64(x64emu_t* emu, void* ed)
     uint8_t tags = p->TagWord;
     for(int i=0; i<8; ++i)
         emu->p_regs[i].tag = (tags>>(i*2))?0:0b11;
+    int top = emu->top&7;
+    int stack = 8-top;
+    if(top==0)  // check if stack is full or empty, based on tag[0]
+        stack = (emu->p_regs[0].tag)?8:0;
     // copy back MMX regs...
     for(int i=0; i<8; ++i)
-        memcpy(&emu->mmx87[i], &p->FloatRegisters[i].q[0], sizeof(emu->mmx87[0]));
+        memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t));
     // copy SSE regs
     memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm));
 }
diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h
index 333d74c1..7d64bf27 100755
--- a/src/emu/x87emu_private.h
+++ b/src/emu/x87emu_private.h
@@ -14,9 +14,9 @@ typedef struct x64emu_s x64emu_t;
 #define LN2		0.69314718055994531
 #define LG2		0.3010299956639812
 
-#define ST0 emu->mmx87[emu->top]
-#define ST1 emu->mmx87[(emu->top+1)&7]
-#define ST(a) emu->mmx87[(emu->top+(a))&7]
+#define ST0 emu->x87[emu->top]
+#define ST1 emu->x87[(emu->top+1)&7]
+#define ST(a) emu->x87[(emu->top+(a))&7]
 
 #define STld(a)  emu->fpu_ld[(emu->top+(a))&7]
 #define STll(a)  emu->fpu_ll[(emu->top+(a))&7]
diff --git a/src/include/regs.h b/src/include/regs.h
index b7da3078..fc32abee 100755
--- a/src/include/regs.h
+++ b/src/include/regs.h
@@ -150,12 +150,12 @@ typedef struct {
 	#else
 	longdouble_t 	ld;
 	#endif
-	uint64_t		ref;
+	uint64_t		uref;
 } fpu_ld_t;
 
 typedef struct {
-	int64_t			ll;
-	int64_t			ref;
+	int64_t			sq;
+	int64_t			sref;
 } fpu_ll_t;
 
 typedef union {