diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-07-11 17:22:27 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-07-11 17:22:27 +0200 |
| commit | a7422d11edc92f16e8a0d6a479d5efb016a70c19 (patch) | |
| tree | c67c3478dcde19f781b692cdaad844f8c4231147 | |
| parent | b39fbc0cfddea724f877756c587571127bd22edf (diff) | |
| download | box64-a7422d11edc92f16e8a0d6a479d5efb016a70c19.tar.gz box64-a7422d11edc92f16e8a0d6a479d5efb016a70c19.zip | |
Splitted mmx and x87 register (from box86)
| -rwxr-xr-x | src/dynarec/dynarec_arm64_functions.c | 12 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.c | 20 | ||||
| -rwxr-xr-x | src/emu/x64emu.c | 8 | ||||
| -rwxr-xr-x | src/emu/x64emu_private.h | 5 | ||||
| -rwxr-xr-x | src/emu/x64run_private.c | 4 | ||||
| -rw-r--r-- | src/emu/x64rundb.c | 4 | ||||
| -rw-r--r-- | src/emu/x64rundf.c | 8 | ||||
| -rwxr-xr-x | src/emu/x87emu_private.c | 34 | ||||
| -rwxr-xr-x | src/emu/x87emu_private.h | 6 | ||||
| -rwxr-xr-x | src/include/regs.h | 6 |
10 files changed, 62 insertions, 45 deletions
diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c index 0751ae71..9333d2b4 100755 --- a/src/dynarec/dynarec_arm64_functions.c +++ b/src/dynarec/dynarec_arm64_functions.c @@ -28,7 +28,7 @@ void arm_fstp(x64emu_t* emu, void* p) { - if(ST0.q!=STld(0).ref) + if(ST0.q!=STld(0).uref) D2LD(&ST0.d, p); else memcpy(p, &STld(0).ld, 10); @@ -108,8 +108,8 @@ void arm_fild64(x64emu_t* emu, int64_t* ed) int64_t tmp; memcpy(&tmp, ed, sizeof(tmp)); ST0.d = tmp; - STll(0).ll = tmp; - STll(0).ref = ST0.q; + STll(0).sq = tmp; + STll(0).sref = ST0.sq; } void arm_fbstp(x64emu_t* emu, uint8_t* ed) @@ -120,8 +120,8 @@ void arm_fbstp(x64emu_t* emu, uint8_t* ed) void arm_fistp64(x64emu_t* emu, int64_t* ed) { // used of memcpy to avoid aligments issues - if((uint64_t)STll(0).ref==ST(0).q) { - memcpy(ed, &STll(0).ll, sizeof(int64_t)); + if(STll(0).sref==ST(0).sq) { + memcpy(ed, &STll(0).sq, sizeof(int64_t)); } else { int64_t tmp; if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d)) @@ -143,7 +143,7 @@ void arm_fld(x64emu_t* emu, uint8_t* ed) { memcpy(&STld(0).ld, ed, 10); LD2D(&STld(0), &ST(0).d); - STld(0).ref = ST0.q; + STld(0).uref = ST0.q; } void arm_ud(x64emu_t* emu) diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c index 8cac4c0a..ea8274ae 100755 --- a/src/dynarec/dynarec_arm64_helper.c +++ b/src/dynarec/dynarec_arm64_helper.c @@ -741,7 +741,7 @@ void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) if(ret!=0) { // --- set values // prepare offset to fpu => s1 - ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87)); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); // Get top // loop all cache entries for (int i=0; i<8; ++i) @@ -772,7 +772,7 @@ static void x87_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int if(!ret) // nothing to do return; // prepare offset to fpu => s1 - ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87)); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); // Get top LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); // loop all cache entries @@ -806,7 +806,7 @@ int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) // found, setup and grab the value dyn->x87cache[ret] = st; dyn->x87reg[ret] = fpu_get_reg_x87(dyn); - ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87)); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); int a = st - dyn->x87stack; if(a) { @@ -851,7 +851,7 @@ void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) return; MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st); // prepare offset to fpu => s1 - ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87)); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); // Get top LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); // Update @@ -879,7 +879,7 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) return; MESSAGE(LOG_DUMP, "\tForget x87 Cache for ST%d\n", st); // prepare offset to fpu => s1 - ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87)); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); // Get top LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); // Update @@ -908,7 +908,7 @@ void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) if(dyn->x87cache[i]==st) { // refresh the value MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st); - ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87)); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); int a = st - dyn->x87stack; if(a<0) { @@ -932,7 +932,7 @@ void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) // found, setup and grab the value dyn->x87cache[ret] = st; dyn->x87reg[ret] = fpu_get_reg_x87(dyn); - ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87)); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); int a = st - dyn->x87stack; if(a<0) { @@ -1006,7 +1006,7 @@ int mmx_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a) if(dyn->mmxcache[a]!=-1) return dyn->mmxcache[a]; int ret = dyn->mmxcache[a] = fpu_get_reg_emm(dyn, a); - VLDR64_U12(ret, xEmu, offsetof(x64emu_t, mmx87[a])); + VLDR64_U12(ret, xEmu, offsetof(x64emu_t, mmx[a])); return ret; #else (void)dyn; (void)a; @@ -1039,7 +1039,7 @@ void mmx_purgecache(dynarec_arm_t* dyn, int ninst, int s1) MESSAGE(LOG_DUMP, "\tPurge MMX Cache ------\n"); ++old; } - VSTR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx87[i])); + VSTR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i])); fpu_free_reg(dyn, dyn->mmxcache[i]); dyn->mmxcache[i] = -1; } @@ -1057,7 +1057,7 @@ static void mmx_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) #if STEP > 1 for (int i=0; i<8; ++i) if(dyn->mmxcache[i]!=-1) { - VLDR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx87[i])); + VLDR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i])); } #else (void)dyn; diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c index 75ce18fd..c5ef1b10 100755 --- a/src/emu/x64emu.c +++ b/src/emu/x64emu.c @@ -218,12 +218,12 @@ void CloneEmu(x64emu_t *newemu, const x64emu_t* emu) newemu->old_ip = emu->old_ip; memcpy(newemu->segs, emu->segs, sizeof(emu->segs)); memset(newemu->segs_serial, 0, sizeof(newemu->segs_serial)); - memcpy(newemu->mmx87, emu->mmx87, sizeof(emu->mmx87)); + memcpy(newemu->x87, emu->x87, sizeof(emu->x87)); + memcpy(newemu->mmx, emu->mmx, sizeof(emu->mmx)); memcpy(newemu->fpu_ld, emu->fpu_ld, sizeof(emu->fpu_ld)); memcpy(newemu->fpu_ll, emu->fpu_ll, sizeof(emu->fpu_ll)); memcpy(newemu->p_regs, emu->p_regs, sizeof(emu->p_regs)); newemu->cw = emu->cw; - newemu->cw_mask_all = emu->cw_mask_all; memcpy(&newemu->sw, &emu->sw, sizeof(emu->sw)); newemu->top = emu->top; newemu->fpu_stack = emu->fpu_stack; @@ -353,7 +353,7 @@ const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip) if(trace_emm) { // do emm reg if needed for(int i=0; i<8; ++i) { - sprintf(tmp, "mm%d:%016lx", i, emu->mmx87[i].q); + sprintf(tmp, "mm%d:%016lx", i, emu->mmx[i].q); strcat(buff, tmp); if ((i&3)==3) strcat(buff, "\n"); else strcat(buff, " "); } @@ -370,7 +370,7 @@ const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip) // start with FPU regs... if(emu->fpu_stack) { for (int i=0; i<emu->fpu_stack; i++) { - sprintf(tmp, "ST%d=%f", i, emu->mmx87[(emu->top+i)&7].d); + sprintf(tmp, "ST%d=%f", i, ST(i).d); strcat(buff, tmp); int c = 10-strlen(tmp); if(c<1) c=1; diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index 73fe8a7a..cf81d316 100755 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -33,8 +33,9 @@ typedef struct x64emu_s { // sse sse_regs_t xmm[16]; // fpu / mmx - mmx87_regs_t mmx87[8]; - uint16_t cw,cw_mask_all; + mmx87_regs_t x87[8]; + mmx87_regs_t mmx[8]; + uint16_t cw; x87flags_t sw; uint32_t top; // top is part of sw, but it's faster to have it separatly int fpu_stack; diff --git a/src/emu/x64run_private.c b/src/emu/x64run_private.c index 4eeddd06..ef697968 100755 --- a/src/emu/x64run_private.c +++ b/src/emu/x64run_private.c @@ -1241,7 +1241,7 @@ mmx87_regs_t* GetEm(x64emu_t *emu, rex_t rex, uint8_t v, uint8_t delta) { uint8_t m = v&0xC7; // filter Ed if(m>=0xC0) { - return &emu->mmx87[m&0x07]; + return &emu->mmx[m&0x07]; } else return (mmx87_regs_t*)GetECommon(emu, rex, m, delta); } @@ -1289,7 +1289,7 @@ mmx87_regs_t* GetGm(x64emu_t *emu, rex_t rex, uint8_t v) (void)rex; uint8_t m = (v&0x38)>>3; - return &emu->mmx87[m&7]; + return &emu->mmx[m&7]; } sse_regs_t* GetGx(x64emu_t *emu, rex_t rex, uint8_t v) diff --git a/src/emu/x64rundb.c b/src/emu/x64rundb.c index 0cd9c5dd..cb5c180c 100644 --- a/src/emu/x64rundb.c +++ b/src/emu/x64rundb.c @@ -163,11 +163,11 @@ int RunDB(x64emu_t *emu, rex_t rex) fpu_do_push(emu); memcpy(&STld(0).ld, ED, 10); LD2D(&STld(0), &ST(0).d); - STld(0).ref = ST0.q; + STld(0).uref = ST0.q; break; case 7: /* FSTP tbyte */ GETED(0); - if(ST0.q!=STld(0).ref) + if(ST0.q!=STld(0).uref) D2LD(&ST0.d, ED); else memcpy(ED, &STld(0).ld, 10); diff --git a/src/emu/x64rundf.c b/src/emu/x64rundf.c index 7abf665a..a059cfd8 100644 --- a/src/emu/x64rundf.c +++ b/src/emu/x64rundf.c @@ -152,8 +152,8 @@ int RunDF(x64emu_t *emu, rex_t rex) tmp64s = ED->sq[0]; fpu_do_push(emu); ST0.d = tmp64s; - STll(0).ll = tmp64s; - STll(0).ref = ST0.q; + STll(0).sq = tmp64s; + STll(0).sref = ST0.sq; break; case 6: /* FBSTP tbytes, ST0 */ GETED(0); @@ -162,8 +162,8 @@ int RunDF(x64emu_t *emu, rex_t rex) break; case 7: /* FISTP i64 */ GETED(0); - if(STll(0).ref==ST(0).sq) - ED->sq[0] = STll(0).ll; + if(STll(0).sref==ST(0).sq) + ED->sq[0] = STll(0).sq; else { if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, -(double)(int64_t)0x7fffffffffffffffLL) || !isfinite(ST0.d)) ED->sq[0] = 0x8000000000000000LL; diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index 0393ac01..0ab63a6e 100755 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -21,7 +21,7 @@ void fpu_do_free(x64emu_t* emu, int i) void reset_fpu(x64emu_t* emu) { - memset(emu->mmx87, 0, sizeof(emu->mmx87)); + memset(emu->x87, 0, sizeof(emu->x87)); memset(emu->fpu_ld, 0, sizeof(emu->fpu_ld)); emu->cw = 0x37F; emu->sw.x16 = 0x0000; @@ -177,8 +177,8 @@ void D2LD(void* d, void* ld) int32_t sign80 = (s.ud[1]&0x80000000)?1:0; int32_t exp80 = s.ud[1]&0x7ff00000; int32_t exp80final = (exp80>>20); - int64_t mant80 = s.q&0x000fffffffffffffL; - int64_t mant80final = (mant80 << 11); + uint64_t mant80 = s.q&0x000fffffffffffffL; + uint64_t mant80final = (mant80 << 11); if(exp80final==0x7ff) { // NaN and Infinite exp80final = 0x7fff; @@ -294,7 +294,11 @@ void fpu_fxsave32(x64emu_t* emu, void* ed) { xsave32_t *p = (xsave32_t*)ed; // should save flags & all - emu->sw.f.F87_TOP = emu->top&7; + int top = emu->top&7; + int stack = 8-top; + if(top==0) // check if stack is full or empty, based on tag[0] + stack = (emu->p_regs[0].tag)?8:0; + emu->sw.f.F87_TOP = top; p->ControlWord = emu->cw; p->StatusWord = emu->sw.x16; uint8_t tags = 0; @@ -310,7 +314,7 @@ void fpu_fxsave32(x64emu_t* emu, void* ed) p->MxCsr_Mask = 0; // copy FPU/MMX regs... for(int i=0; i<8; ++i) - memcpy(&p->FloatRegisters[i].q[0], &emu->mmx87[0], sizeof(emu->mmx87[0])); + memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t)); // copy SSE regs memcpy(&p->XmmRegisters[0], &emu->xmm[0], sizeof(emu->xmm)); } @@ -319,7 +323,11 @@ void fpu_fxsave64(x64emu_t* emu, void* ed) { xsave64_t *p = (xsave64_t*)ed; // should save flags & all - emu->sw.f.F87_TOP = emu->top&7; + int top = emu->top&7; + int stack = 8-top; + if(top==0) // check if stack is full or empty, based on tag[0] + stack = (emu->p_regs[0].tag)?8:0; + emu->sw.f.F87_TOP = top; p->ControlWord = emu->cw; p->StatusWord = emu->sw.x16; uint8_t tags = 0; @@ -333,7 +341,7 @@ void fpu_fxsave64(x64emu_t* emu, void* ed) p->MxCsr_Mask = 0; // copy FPU/MMX regs... for(int i=0; i<8; ++i) - memcpy(&p->FloatRegisters[i].q[0], &emu->mmx87[0], sizeof(emu->mmx87[0])); + memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t)); // copy SSE regs memcpy(&p->XmmRegisters[0], &emu->xmm[0], sizeof(emu->xmm)); } @@ -347,9 +355,13 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed) uint8_t tags = p->TagWord; for(int i=0; i<8; ++i) emu->p_regs[i].tag = (tags>>(i*2))?0:0b11; + int top = emu->top&7; + int stack = 8-top; + if(top==0) // check if stack is full or empty, based on tag[0] + stack = (emu->p_regs[0].tag)?8:0; // copy back MMX regs... for(int i=0; i<8; ++i) - memcpy(&emu->mmx87[i], &p->FloatRegisters[i].q[0], sizeof(emu->mmx87[0])); + memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t)); // copy SSE regs memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm)); } @@ -363,9 +375,13 @@ void fpu_fxrstor64(x64emu_t* emu, void* ed) uint8_t tags = p->TagWord; for(int i=0; i<8; ++i) emu->p_regs[i].tag = (tags>>(i*2))?0:0b11; + int top = emu->top&7; + int stack = 8-top; + if(top==0) // check if stack is full or empty, based on tag[0] + stack = (emu->p_regs[0].tag)?8:0; // copy back MMX regs... for(int i=0; i<8; ++i) - memcpy(&emu->mmx87[i], &p->FloatRegisters[i].q[0], sizeof(emu->mmx87[0])); + memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t)); // copy SSE regs memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm)); } diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h index 333d74c1..7d64bf27 100755 --- a/src/emu/x87emu_private.h +++ b/src/emu/x87emu_private.h @@ -14,9 +14,9 @@ typedef struct x64emu_s x64emu_t; #define LN2 0.69314718055994531 #define LG2 0.3010299956639812 -#define ST0 emu->mmx87[emu->top] -#define ST1 emu->mmx87[(emu->top+1)&7] -#define ST(a) emu->mmx87[(emu->top+(a))&7] +#define ST0 emu->x87[emu->top] +#define ST1 emu->x87[(emu->top+1)&7] +#define ST(a) emu->x87[(emu->top+(a))&7] #define STld(a) emu->fpu_ld[(emu->top+(a))&7] #define STll(a) emu->fpu_ll[(emu->top+(a))&7] diff --git a/src/include/regs.h b/src/include/regs.h index b7da3078..fc32abee 100755 --- a/src/include/regs.h +++ b/src/include/regs.h @@ -150,12 +150,12 @@ typedef struct { #else longdouble_t ld; #endif - uint64_t ref; + uint64_t uref; } fpu_ld_t; typedef struct { - int64_t ll; - int64_t ref; + int64_t sq; + int64_t sref; } fpu_ll_t; typedef union { |