diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-10-25 15:09:17 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-10-25 15:09:17 +0200 |
| commit | 0900cc2f5907b55c7cc6d29058765e1acd414e01 (patch) | |
| tree | 8cdb65aedc797896308fb188e575758170d19785 /src | |
| parent | 87bf751b115267d7c388c849c43fca6d3f0d0881 (diff) | |
| download | box64-0900cc2f5907b55c7cc6d29058765e1acd414e01.tar.gz box64-0900cc2f5907b55c7cc6d29058765e1acd414e01.zip | |
[DYNAREC] Various improvment to x87 code and segment handling
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 7 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_66.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d9.c | 24 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_dd.c | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_df.c | 11 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 49 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 7 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_2.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_3.c | 2 | ||||
| -rw-r--r-- | src/emu/x64emu_private.h | 3 |
10 files changed, 95 insertions, 15 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index d078029b..2233c27f 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1213,7 +1213,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if((nextop&0xC0)==0xC0) { // reg <= seg LDRw_U12(xRAX+(nextop&7)+(rex.b<<3), xEmu, offsetof(x64emu_t, segs[u8])); } else { // mem <= seg - LDRw_U12(x3, xEmu, offsetof(x64emu_t, segs[u8])); + LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[u8])); addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0); STH(x3, wback, fixedaddress); SMWRITE2(); @@ -1247,7 +1247,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LDH(x1, wback, fixedaddress); ed = x1; } - STRw_U12(ed, xEmu, offsetof(x64emu_t, segs[u8])); + STRH_U12(ed, xEmu, offsetof(x64emu_t, segs[u8])); STRw_U12(wZR, xEmu, offsetof(x64emu_t, segs_serial[u8])); break; case 0x8F: @@ -1998,6 +1998,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin *need_epilog = 1; } else { MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip))); + x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x3, x4, 0); sse_purge07cache(dyn, ninst, x3); SMEND(); @@ -2976,7 +2977,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LDxw(x1, wback, 0); ed = x1; LDH(x3, wback, rex.w?8:4); - STW(x3, xEmu, offsetof(x64emu_t, segs[_CS])); + STH(x3, xEmu, offsetof(x64emu_t, segs[_CS])); STW(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS])); jump_to_epilog(dyn, 0, ed, ninst); *need_epilog = 0; diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index c41c19ef..84427d20 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -639,7 +639,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("MOV EW, Seg"); nextop=F8; u8 = (nextop&0x38)>>3; - LDRw_U12(x3, xEmu, offsetof(x64emu_t, segs[u8])); + LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[u8])); if((nextop&0xC0)==0xC0) { // reg <= seg UXTHw(xRAX+(nextop&7)+(rex.b<<3), x3); } else { // mem <= seg diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index 7b695e02..20563efd 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -179,22 +179,28 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xF0: INST_NAME("F2XM1"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); CALL(native_f2xm1, -1); + x87_unstackcount(dyn, ninst, x1, i1); break; case 0xF1: INST_NAME("FYL2X"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fyl2x, -1); + x87_unstackcount(dyn, ninst, x1, i1); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xF2: INST_NAME("FPTAN"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); CALL(native_ftan, -1); + x87_unstackcount(dyn, ninst, x1, i1); if(PK(0)==0xdd && PK(1)==0xd8) { MESSAGE(LOG_DUMP, "Optimized next DD D8 fstp st0, st0, not emiting 1\n"); u8 = F8; @@ -211,24 +217,30 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xF3: INST_NAME("FPATAN"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fpatan, -1); + x87_unstackcount(dyn, ninst, x1, i1); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xF4: INST_NAME("FXTRACT"); MESSAGE(LOG_DUMP, "Need Optimization\n"); X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fxtract, -1); + x87_unstackcount(dyn, ninst, x1, i1); break; case 0xF5: INST_NAME("FPREM1"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fprem1, -1); + x87_unstackcount(dyn, ninst, x1, i1); break; case 0xF6: INST_NAME("FDECSTP"); @@ -249,16 +261,20 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xF8: INST_NAME("FPREM"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fprem, -1); + x87_unstackcount(dyn, ninst, x1, i1); break; case 0xF9: INST_NAME("FYL2XP1"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fyl2xp1, -1); + x87_unstackcount(dyn, ninst, x1, i1); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xFA: @@ -274,8 +290,10 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSINCOS"); MESSAGE(LOG_DUMP, "Need Optimization\n"); X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fsincos, -1); + x87_unstackcount(dyn, ninst, x1, i1); break; case 0xFC: INST_NAME("FRNDINT"); @@ -298,21 +316,27 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xFD: INST_NAME("FSCALE"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fscale, -1); + x87_unstackcount(dyn, ninst, x1, i1); break; case 0xFE: INST_NAME("FSIN"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); CALL(native_fsin, -1); + x87_unstackcount(dyn, ninst, x1, i1); break; case 0xFF: INST_NAME("FCOS"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); CALL(native_fcos, -1); + x87_unstackcount(dyn, ninst, x1, i1); break; diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c index 7c689bbf..c56258d1 100644 --- a/src/dynarec/arm64/dynarec_arm64_dd.c +++ b/src/dynarec/arm64/dynarec_arm64_dd.c @@ -220,7 +220,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 7: INST_NAME("FNSTSW m2byte"); - fpu_purgecache(dyn, ninst, 0, x1, x2, x3); + //fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &ed, x4, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0); LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); LDRH_U12(x3, xEmu, offsetof(x64emu_t, sw)); @@ -234,6 +234,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDw_mask(x2, x2, 0, 2); } BFIw(x3, x2, 11, 3); // inject TOP at bit 11 (3 bits) + STRH_U12(x3, xEmu, offsetof(x64emu_t, sw)); STH(x3, ed, fixedaddress); // store whole sw flags break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_df.c b/src/dynarec/arm64/dynarec_arm64_df.c index 40ad5066..b81c4128 100644 --- a/src/dynarec/arm64/dynarec_arm64_df.c +++ b/src/dynarec/arm64/dynarec_arm64_df.c @@ -34,6 +34,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin int64_t j64; int64_t fixedaddress; int unscaled; + int i1; MAYUSE(s0); MAYUSE(v2); @@ -57,6 +58,14 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xE0: INST_NAME("FNSTSW AX"); LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + if(dyn->n.x87stack) { + if(dyn->n.x87stack>0) { + SUBw_U12(x2, x2, dyn->n.x87stack); + } else { + ADDw_U12(x2, x2, -dyn->n.x87stack); + } + ANDw_mask(x2, x2, 0, 2); //mask=7 + } LDRH_U12(x1, xEmu, offsetof(x64emu_t, sw)); BFIw(x1, x2, 11, 3); // inject top STRH_U12(x1, xEmu, offsetof(x64emu_t, sw)); @@ -315,10 +324,12 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 6: INST_NAME("FBSTP tbytes, ST0"); + i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); if(ed!=x1) {MOVx_REG(x1, ed);} CALL(fpu_fbst, -1); + x87_unstackcount(dyn, ninst, x1, i1); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 7: diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index a2a4d590..f886e75a 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -866,11 +866,11 @@ static void x87_reset(dynarec_arm_t* dyn) dyn->n.neoncache[i].v = 0; } -void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch) +int x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch) { MAYUSE(scratch); if(!dyn->n.x87stack) - return; + return 0; if(dyn->n.mmxcount) mmx_purgecache(dyn, ninst, 0, scratch); MESSAGE(LOG_DUMP, "\tSynch x87 Stackcount (%d)\n", dyn->n.x87stack); @@ -893,10 +893,45 @@ void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch) ANDw_mask(scratch, scratch, 0, 2); //mask=7 STRw_U12(scratch, xEmu, offsetof(x64emu_t, top)); // reset x87stack, but not the stack count of neoncache + int ret = dyn->n.x87stack; dyn->n.x87stack = 0; dyn->n.stack_next -= dyn->n.stack; dyn->n.stack = 0; MESSAGE(LOG_DUMP, "\t------x87 Stackcount\n"); + return ret; +} + +void x87_unstackcount(dynarec_arm_t* dyn, int ninst, int scratch, int count) +{ + MAYUSE(scratch); + if(!count) + return; + if(dyn->n.mmxcount) + mmx_purgecache(dyn, ninst, 0, scratch); + MESSAGE(LOG_DUMP, "\tUnsynch x87 Stackcount (%d)\n", count); + int a = -count; + // Add x87stack to emu fpu_stack + LDRw_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack)); + if(a>0) { + ADDw_U12(scratch, scratch, a); + } else { + SUBw_U12(scratch, scratch, -a); + } + STRw_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack)); + // Sub x87stack to top, with and 7 + LDRw_U12(scratch, xEmu, offsetof(x64emu_t, top)); + if(a>0) { + SUBw_U12(scratch, scratch, a); + } else { + ADDw_U12(scratch, scratch, -a); + } + ANDw_mask(scratch, scratch, 0, 2); //mask=7 + STRw_U12(scratch, xEmu, offsetof(x64emu_t, top)); + // reset x87stack, but not the stack count of neoncache + dyn->n.x87stack = count; + dyn->n.stack = count; + dyn->n.stack_next += dyn->n.stack; + MESSAGE(LOG_DUMP, "\t------x87 Unstackcount\n"); } int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b) @@ -1252,7 +1287,6 @@ void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) { - x87_stackcount(dyn, ninst, s1); int ret = -1; for (int i=0; (i<8) && (ret==-1); ++i) if(dyn->n.x87cache[i] == st) @@ -1270,8 +1304,13 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) // Get top LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); // Update - if(st) { - ADDw_U12(s2, s2, st); + int ast = st - dyn->n.x87stack; + if(ast) { + if(ast>0) { + ADDw_U12(x2, x2, ast); + } else { + SUBw_U12(x2, x2, -ast); + } ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 } if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F) { diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index c794a161..b3f02ee5 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -1030,6 +1030,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define x87_forget STEPNAME(x87_forget) #define x87_reget_st STEPNAME(x87_reget_st) #define x87_stackcount STEPNAME(x87_stackcount) +#define x87_unstackcount STEPNAME(x87_unstackcount) #define x87_swapreg STEPNAME(x87_swapreg) #define x87_setround STEPNAME(x87_setround) #define x87_restoreround STEPNAME(x87_restoreround) @@ -1146,8 +1147,10 @@ void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); // x87 helper -// cache of the local stack counter, to avoid update at every call -void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch); +// cache of the local stack counter, to avoid update at every call, return old internal stack counter +int x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch); +// revert local stack counter to previous version (return from x87_stackcount) +void x87_unstackcount(dynarec_arm_t* dyn, int ninst, int scratch, int count); // fpu push. Return the Dd value to be used int x87_do_push(dynarec_arm_t* dyn, int ninst, int s1, int t); // fpu push. Do not allocate a cache register. Needs a scratch register to do x87stack synch (or 0 to not do it) diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index 08945265..b2a0c420 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -418,7 +418,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LHU(x1, ed, fixedaddress); ed = x1; } - SW(ed, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); + SH(ed, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); SW(xZR, xEmu, offsetof(x64emu_t, segs_serial[(nextop&0x38)>>3])); break; case 0x8F: diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index 9e8b4511..39a1aab9 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -1173,7 +1173,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LDxw(x1, wback, 0); ed = x1; LHU(x3, wback, rex.w?8:4); - SW(x3, xEmu, offsetof(x64emu_t, segs[_CS])); + SH(x3, xEmu, offsetof(x64emu_t, segs[_CS])); SW(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS])); jump_to_epilog(dyn, 0, ed, ninst); *need_epilog = 0; diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index 25601105..c5c4f4d9 100644 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -86,7 +86,8 @@ typedef struct x64emu_s { uintptr_t prev2_ip; #endif // segments - uint32_t segs[6]; // only 32bits value? + uint16_t segs[6]; // only 32bits value? + uint16_t dummy_seg6, dummy_seg7; // to stay aligned uintptr_t segs_offs[6]; // computed offset associate with segment uint32_t segs_serial[6]; // are seg offset clean (not 0) or does they need to be re-computed (0)? For GS, serial need to be the same as context->sel_serial // parent context |