diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-03-16 19:48:58 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-03-16 19:48:58 +0100 |
| commit | 848763bb5cbd5c87bf108a655873622835cd60a2 (patch) | |
| tree | 30d9250e1d931eafc936641e4dc9e7abe0a5f979 /src | |
| parent | 076357a2c4bfc9bae226617cfa9cfec95262e85b (diff) | |
| download | box64-848763bb5cbd5c87bf108a655873622835cd60a2.tar.gz box64-848763bb5cbd5c87bf108a655873622835cd60a2.zip | |
[DYNAREC] Grouped common function in dynarec_native_functions.c
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_00.c | 2 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_0f.c | 10 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 14 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d9.c | 26 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_db.c | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_dd.c | 4 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_f20f.c | 4 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_functions.c | 399 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_functions.h | 55 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_helper.c | 4 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arch.h | 30 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.c | 425 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.h | 70 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 8 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 178 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.h | 23 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 4 |
17 files changed, 562 insertions, 698 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 583d734a..1974a5a8 100755 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -2332,7 +2332,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(xRIP); - CALL(arm_priv, -1); + CALL(native_priv, -1); LOAD_XEMU_CALL(xRIP); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 90ee5020..8b817c9e 100755 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -66,7 +66,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(xRIP); - CALL(arm_ud, -1); + CALL(native_ud, -1); LOAD_XEMU_CALL(xRIP); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -95,7 +95,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(xRIP); - CALL(arm_ud, -1); + CALL(native_ud, -1); LOAD_XEMU_CALL(xRIP); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -107,7 +107,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(xRIP); - CALL(arm_ud, -1); + CALL(native_ud, -1); LOAD_XEMU_CALL(xRIP); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -1242,11 +1242,11 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 7: INST_NAME("CLFLUSH Ed"); MESSAGE(LOG_DUMP, "Need Optimization?\n"); - GETED(0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0, 0, rex, NULL, 0, 0); if(ed!=x1) { MOVx_REG(x1, ed); } - CALL_(arm_clflush, -1, 0); + CALL_(native_clflush, -1, 0); break; default: DEFAULT; diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 821d3ae0..6d6e26e6 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -546,7 +546,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(arm_aesimc, -1); + CALL(native_aesimc, -1); } break; case 0xDC: @@ -564,7 +564,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETG; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(arm_aese, -1); + CALL(native_aese, -1); GETGX(q0, 1); GETEX(q1, 0, 0); VEORQ(q0, q0, q1); @@ -584,7 +584,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETG; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(arm_aeselast, -1); + CALL(native_aeselast, -1); GETGX(q0, 1); GETEX(q1, 0, 0); VEORQ(q0, q0, q1); @@ -605,7 +605,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETG; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(arm_aesd, -1); + CALL(native_aesd, -1); GETGX(q0, 1); GETEX(q1, 0, 0); VEORQ(q0, q0, q1); @@ -625,7 +625,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETG; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(arm_aesdlast, -1); + CALL(native_aesdlast, -1); GETGX(q0, 1); GETEX(q1, 0, 0); VEORQ(q0, q0, q1); @@ -806,7 +806,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } u8 = F8; MOV32w(x4, u8); - CALL(arm_pclmul, -1); + CALL(native_pclmul, -1); } break; @@ -830,7 +830,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } u8 = F8; MOV32w(x4, u8); - CALL(arm_aeskeygenassist, -1); + CALL(native_aeskeygenassist, -1); break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index e512cc1f..051f82ed 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -181,21 +181,21 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("F2XM1"); MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_f2xm1, -1); + CALL(native_f2xm1, -1); break; case 0xF1: INST_NAME("FYL2X"); MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fyl2x, -1); + CALL(native_fyl2x, -1); x87_do_pop(dyn, ninst, x3); break; case 0xF2: INST_NAME("FPTAN"); MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_ftan, -1); + CALL(native_ftan, -1); v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_F); if(ST_IS_F(0)) { FMOVS_8(v1, 0b01110000); @@ -208,7 +208,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fpatan, -1); + CALL(native_fpatan, -1); x87_do_pop(dyn, ninst, x3); break; case 0xF4: @@ -216,14 +216,14 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_do_push_empty(dyn, ninst, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fxtract, -1); + CALL(native_fxtract, -1); break; case 0xF5: INST_NAME("FPREM1"); MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fprem1, -1); + CALL(native_fprem1, -1); break; case 0xF6: INST_NAME("FDECSTP"); @@ -246,14 +246,14 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fprem, -1); + CALL(native_fprem, -1); break; case 0xF9: INST_NAME("FYL2XP1"); MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fyl2xp1, -1); + CALL(native_fyl2xp1, -1); x87_do_pop(dyn, ninst, x3); break; case 0xFA: @@ -270,7 +270,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_do_push_empty(dyn, ninst, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fsincos, -1); + CALL(native_fsincos, -1); break; case 0xFC: INST_NAME("FRNDINT"); @@ -278,7 +278,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); // use C helper for now, nothing staightforward is available x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_frndint, -1); + CALL(native_frndint, -1); #else v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); u8 = x87_setround(dyn, ninst, x1, x2, x3); @@ -295,19 +295,19 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fscale, -1); + CALL(native_fscale, -1); break; case 0xFE: INST_NAME("FSIN"); MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_fsin, -1); + CALL(native_fsin, -1); break; case 0xFF: INST_NAME("FCOS"); MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_fcos, -1); + CALL(native_fcos, -1); break; diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c index d8dc86a4..51a759fd 100644 --- a/src/dynarec/arm64/dynarec_arm64_db.c +++ b/src/dynarec/arm64/dynarec_arm64_db.c @@ -304,7 +304,7 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVx_REG(x1, ed); } x87_do_push_empty(dyn, ninst, x3); - CALL(arm_fld, -1); + CALL(native_fld, -1); } } break; @@ -321,7 +321,7 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(ed!=x1) { MOVx_REG(x1, ed); } - CALL(arm_fstp, -1); + CALL(native_fstp, -1); #else // Painfully long, straight conversion from the C code, shoud be optimized v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c index 36581009..38fa7d46 100644 --- a/src/dynarec/arm64/dynarec_arm64_dd.c +++ b/src/dynarec/arm64/dynarec_arm64_dd.c @@ -209,7 +209,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(arm_frstor, -1); + CALL(native_frstor, -1); break; case 6: INST_NAME("FSAVE m108byte"); @@ -217,7 +217,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(arm_fsave, -1); + CALL(native_fsave, -1); break; case 7: INST_NAME("FNSTSW m2byte"); diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index a5e81729..144ae8d3 100755 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -166,7 +166,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(xRIP); - CALL(arm_ud, -1); + CALL(native_ud, -1); LOAD_XEMU_CALL(xRIP); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -179,7 +179,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(xRIP); - CALL(arm_ud, -1); + CALL(native_ud, -1); LOAD_XEMU_CALL(xRIP); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index bfa17d6e..f1751f14 100755 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -28,332 +28,6 @@ #include "custommem.h" #include "bridge.h" -void arm_fstp(x64emu_t* emu, void* p) -{ - if(ST0.q!=STld(0).uref) - D2LD(&ST0.d, p); - else - memcpy(p, &STld(0).ld, 10); -} - -void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) -{ - (void)emu; - dynarec_log(LOG_DEBUG, "R%lu=0x%lx (%lu)\n", n, reg, reg); -} - -void arm_f2xm1(x64emu_t* emu) -{ - ST0.d = exp2(ST0.d) - 1.0; -} -void arm_fyl2x(x64emu_t* emu) -{ - ST(1).d = log2(ST0.d)*ST(1).d; -} -void arm_ftan(x64emu_t* emu) -{ - ST0.d = tan(ST0.d); - emu->sw.f.F87_C2 = 0; -} -void arm_fpatan(x64emu_t* emu) -{ - ST1.d = atan2(ST1.d, ST0.d); -} -void arm_fxtract(x64emu_t* emu) -{ - int32_t tmp32s = (ST1.q&0x7ff0000000000000LL)>>52; - tmp32s -= 1023; - ST1.d /= exp2(tmp32s); - ST0.d = tmp32s; -} -void arm_fprem(x64emu_t* emu) -{ - int32_t tmp32s = ST0.d / ST1.d; - ST0.d -= ST1.d * tmp32s; - emu->sw.f.F87_C2 = 0; - emu->sw.f.F87_C0 = (tmp32s&1); - emu->sw.f.F87_C3 = ((tmp32s>>1)&1); - emu->sw.f.F87_C1 = ((tmp32s>>2)&1); -} -void arm_fyl2xp1(x64emu_t* emu) -{ - ST(1).d = log2(ST0.d + 1.0)*ST(1).d; -} -void arm_fsincos(x64emu_t* emu) -{ - sincos(ST1.d, &ST1.d, &ST0.d); - emu->sw.f.F87_C2 = 0; -} -void arm_frndint(x64emu_t* emu) -{ - ST0.d = fpu_round(emu, ST0.d); -} -void arm_fscale(x64emu_t* emu) -{ - if(ST0.d!=0.0) - ST0.d *= exp2(trunc(ST1.d)); -} -void arm_fsin(x64emu_t* emu) -{ - ST0.d = sin(ST0.d); - emu->sw.f.F87_C2 = 0; -} -void arm_fcos(x64emu_t* emu) -{ - ST0.d = cos(ST0.d); - emu->sw.f.F87_C2 = 0; -} - -void arm_fbld(x64emu_t* emu, uint8_t* ed) -{ - fpu_fbld(emu, ed); -} - -void arm_fild64(x64emu_t* emu, int64_t* ed) -{ - int64_t tmp; - memcpy(&tmp, ed, sizeof(tmp)); - ST0.d = tmp; - STll(0).sq = tmp; - STll(0).sref = ST0.sq; -} - -void arm_fbstp(x64emu_t* emu, uint8_t* ed) -{ - fpu_fbst(emu, ed); -} - -void arm_fistp64(x64emu_t* emu, int64_t* ed) -{ - // used of memcpy to avoid aligments issues - if(STll(0).sref==ST(0).sq) { - memcpy(ed, &STll(0).sq, sizeof(int64_t)); - } else { - int64_t tmp; - if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d)) - tmp = 0x8000000000000000LL; - else - tmp = fpu_round(emu, ST0.d); - memcpy(ed, &tmp, sizeof(tmp)); - } -} - -void arm_fistt64(x64emu_t* emu, int64_t* ed) -{ - // used of memcpy to avoid aligments issues - int64_t tmp = ST0.d; - memcpy(ed, &tmp, sizeof(tmp)); -} - -void arm_fld(x64emu_t* emu, uint8_t* ed) -{ - memcpy(&STld(0).ld, ed, 10); - LD2D(&STld(0), &ST(0).d); - STld(0).uref = ST0.q; -} - -void arm_ud(x64emu_t* emu) -{ - emit_signal(emu, SIGILL, (void*)R_RIP, 0); -} - -void arm_priv(x64emu_t* emu) -{ - emit_signal(emu, SIGSEGV, (void*)R_RIP, 0); -} - -void arm_fsave(x64emu_t* emu, uint8_t* ed) -{ - fpu_savenv(emu, (char*)ed, 0); - - uint8_t* p = ed; - p += 28; - for (int i=0; i<8; ++i) { - LD2D(p, &ST(i).d); - p+=10; - } -} -void arm_frstor(x64emu_t* emu, uint8_t* ed) -{ - fpu_loadenv(emu, (char*)ed, 0); - - uint8_t* p = ed; - p += 28; - for (int i=0; i<8; ++i) { - D2LD(&ST(i).d, p); - p+=10; - } - -} - -void arm_fprem1(x64emu_t* emu) -{ - // simplified version - int32_t tmp32s = round(ST0.d / ST1.d); - ST0.d -= ST1.d*tmp32s; - emu->sw.f.F87_C2 = 0; - emu->sw.f.F87_C0 = (tmp32s&1); - emu->sw.f.F87_C3 = ((tmp32s>>1)&1); - emu->sw.f.F87_C1 = ((tmp32s>>2)&1); -} - -static uint8_t ff_mult(uint8_t a, uint8_t b) -{ - int retval = 0; - - for(int i = 0; i < 8; i++) { - if((b & 1) == 1) - retval ^= a; - - if((a & 0x80)) { - a <<= 1; - a ^= 0x1b; - } else { - a <<= 1; - } - - b >>= 1; - } - - return retval; -} - -void arm_aesimc(x64emu_t* emu, int xmm) -{ - sse_regs_t eax1 = emu->xmm[xmm]; - - for(int j=0; j<4; ++j) { - emu->xmm[xmm].ub[0+j*4] = ff_mult(0x0E, eax1.ub[0+j*4]) ^ ff_mult(0x0B, eax1.ub[1+j*4]) ^ ff_mult(0x0D, eax1.ub[2+j*4]) ^ ff_mult(0x09, eax1.ub[3+j*4]); - emu->xmm[xmm].ub[1+j*4] = ff_mult(0x09, eax1.ub[0+j*4]) ^ ff_mult(0x0E, eax1.ub[1+j*4]) ^ ff_mult(0x0B, eax1.ub[2+j*4]) ^ ff_mult(0x0D, eax1.ub[3+j*4]); - emu->xmm[xmm].ub[2+j*4] = ff_mult(0x0D, eax1.ub[0+j*4]) ^ ff_mult(0x09, eax1.ub[1+j*4]) ^ ff_mult(0x0E, eax1.ub[2+j*4]) ^ ff_mult(0x0B, eax1.ub[3+j*4]); - emu->xmm[xmm].ub[3+j*4] = ff_mult(0x0B, eax1.ub[0+j*4]) ^ ff_mult(0x0D, eax1.ub[1+j*4]) ^ ff_mult(0x09, eax1.ub[2+j*4]) ^ ff_mult(0x0E, eax1.ub[3+j*4]); - } -} -void arm_aesmc(x64emu_t* emu, int xmm) -{ - sse_regs_t eax1 = emu->xmm[xmm]; - - for(int j=0; j<4; ++j) { - emu->xmm[xmm].ub[0+j*4] = ff_mult(0x02, eax1.ub[0+j*4]) ^ ff_mult(0x03, eax1.ub[1+j*4]) ^ eax1.ub[2+j*4] ^ eax1.ub[3+j*4] ; - emu->xmm[xmm].ub[1+j*4] = eax1.ub[0+j*4] ^ ff_mult(0x02, eax1.ub[1+j*4]) ^ ff_mult(0x03, eax1.ub[2+j*4]) ^ eax1.ub[3+j*4] ; - emu->xmm[xmm].ub[2+j*4] = eax1.ub[0+j*4] ^ eax1.ub[1+j*4] ^ ff_mult(0x02, eax1.ub[2+j*4]) ^ ff_mult(0x03, eax1.ub[3+j*4]); - emu->xmm[xmm].ub[3+j*4] = ff_mult(0x03, eax1.ub[0+j*4]) ^ eax1.ub[1+j*4] ^ eax1.ub[2+j*4] ^ ff_mult(0x02, eax1.ub[3+j*4]); - } -} -void arm_aesdlast(x64emu_t* emu, int xmm) -{ - // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf - // A N K H E B O L I F C P M J G D - const uint8_t invshiftrows[] = {0,13,10, 7, 4, 1,14,11, 8, 5, 2,15,12, 9, 6, 3}; - const uint8_t invsubbytes[256] = { - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, - }; - - sse_regs_t eax1; - for(int i=0; i<16; ++i) - eax1.ub[i] = emu->xmm[xmm].ub[invshiftrows[i]]; - //STATE ← InvSubBytes( STATE ); - for(int i=0; i<16; ++i) - emu->xmm[xmm].ub[i] = invsubbytes[eax1.ub[i]]; - -} -static const uint8_t shiftrows[] = {0, 5,10,15, 4, 9,14, 3, 8,13, 2, 7,12, 1, 6,11}; -static const uint8_t subbytes[256] = { - 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, - 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, - 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, - 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, - 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, - 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, - 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, - 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, - 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, - 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, - 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, - 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, - 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, - 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, - 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, - 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, -}; -void arm_aeselast(x64emu_t* emu, int xmm) -{ - // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf - // A F K P E J O D I N C H M B G L - sse_regs_t eax1; - for(int i=0; i<16; ++i) - eax1.ub[i] = emu->xmm[xmm].ub[shiftrows[i]]; - //STATE ← SubBytes( STATE ); - for(int i=0; i<16; ++i) - emu->xmm[xmm].ub[i] = subbytes[eax1.ub[i]]; -} -void arm_aesd(x64emu_t* emu, int xmm) -{ - arm_aesdlast(emu, xmm); - arm_aesimc(emu, xmm); -} -void arm_aese(x64emu_t* emu, int xmm) -{ - arm_aeselast(emu, xmm); - arm_aesmc(emu, xmm); -} -void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) -{ - sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex]; - sse_regs_t *GX = &emu->xmm[gx]; - for (int i = 4; i < 8; ++i) - GX->ub[i] = subbytes[EX->ub[i]]; - for (int i = 12; i < 16; ++i) - GX->ub[i] = subbytes[EX->ub[i]]; - GX->ud[0] = GX->ud[1]; - uint8_t tmp8u = GX->ub[4]; - GX->ud[1] = GX->ud[1] >> 8; - GX->ub[7] = tmp8u; - GX->ud[1] ^= u8; - GX->ud[2] = GX->ud[3]; - tmp8u = GX->ub[12]; - GX->ud[3] = GX->ud[3] >> 8; - GX->ub[15] = tmp8u; - GX->ud[3] ^= u8; -} - -void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) -{ - sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex]; - sse_regs_t *GX = &emu->xmm[gx]; - int g = (u8&1)?1:0; - int e = (u8&0b10000)?1:0; - __int128 result = 0; - __int128 op2 = EX->q[e]; - for (int i=0; i<64; ++i) - if(GX->q[g]&(1LL<<i)) - result ^= (op2<<i); - - GX->q[0] = result&0xffffffffffffffffLL; - GX->q[1] = (result>>64)&0xffffffffffffffffLL; -} - -void arm_clflush(x64emu_t* emu, void* p) -{ - cleanDBFromAddressRange((uintptr_t)p, 8, 0); -} - - #define XMM0 0 #define XMM8 16 #define X870 8 @@ -602,36 +276,7 @@ int neoncache_combine_st(dynarec_arm_t* dyn, int ninst, int a, int b) return NEON_CACHE_ST_D; } -int isPred(dynarec_arm_t* dyn, int ninst, int pred) { - for(int i=0; i<dyn->insts[ninst].pred_sz; ++i) - if(dyn->insts[ninst].pred[i]==pred) - return pred; - return -1; -} -int getNominalPred(dynarec_arm_t* dyn, int ninst) { - if((ninst<=0) || !dyn->insts[ninst].pred_sz) - return -1; - if(isPred(dyn, ninst, ninst-1)!=-1) - return ninst-1; - return dyn->insts[ninst].pred[0]; -} - -int isCacheEmpty(dynarec_arm_t* dyn, int ninst) { - if(dyn->insts[ninst].n.stack_next) { - return 0; - } - for(int i=0; i<24; ++i) - if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i - if(!( - (dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_D) - && dyn->insts[ninst].n.neoncache[i].n<dyn->insts[ninst].n.stack_pop)) - return 0; - } - return 1; - -} - -static int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { +int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { int i2 = dyn->insts[ninst].x64.jmp_insts; if(i2<0) return 1; @@ -678,48 +323,6 @@ static int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { return ret; } -static int flagsCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { - int jmp = dyn->insts[ninst].x64.jmp_insts; - if(jmp<0) - return 0; - if(dyn->insts[ninst].f_exit.dfnone) // flags are fully known, nothing we can do more - return 0; -/* if((dyn->f.pending!=SF_SET) - && (dyn->f.pending!=SF_SET_PENDING)) { - if(dyn->f.pending!=SF_PENDING) {*/ - switch (dyn->insts[jmp].f_entry.pending) { - case SF_UNKNOWN: return 0; - case SF_SET: - if(dyn->insts[ninst].f_exit.pending!=SF_SET && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING) - return 1; - else - return 0; - case SF_SET_PENDING: - if(dyn->insts[ninst].f_exit.pending!=SF_SET - && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING - && dyn->insts[ninst].f_exit.pending!=SF_PENDING) - return 1; - else - return 0; - case SF_PENDING: - if(dyn->insts[ninst].f_exit.pending!=SF_SET - && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING - && dyn->insts[ninst].f_exit.pending!=SF_PENDING) - return 1; - else - return (dyn->insts[jmp].f_entry.dfnone == dyn->insts[ninst].f_exit.dfnone)?0:1; - } - if(dyn->insts[jmp].f_entry.dfnone && !dyn->insts[ninst].f_exit.dfnone) - return 1; - return 0; -} -int CacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { - int ret = 0; - if (fpuCacheNeedsTransform(dyn, ninst)) ret|=1; - if (flagsCacheNeedsTransform(dyn, ninst)) ret|=2; - return ret; -} - void neoncacheUnwind(neoncache_t* cache) { if(cache->swapped) { diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index e65c2cde..c5b729ad 100755 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -1,46 +1,7 @@ #ifndef __DYNAREC_ARM_FUNCTIONS_H__ #define __DYNAREC_ARM_FUNCTIONS_H__ -typedef struct x64emu_s x64emu_t; - -void arm_fstp(x64emu_t* emu, void* p); - -void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n); - -void arm_f2xm1(x64emu_t* emu); -void arm_fyl2x(x64emu_t* emu); -void arm_ftan(x64emu_t* emu); -void arm_fpatan(x64emu_t* emu); -void arm_fxtract(x64emu_t* emu); -void arm_fprem(x64emu_t* emu); -void arm_fyl2xp1(x64emu_t* emu); -void arm_fsincos(x64emu_t* emu); -void arm_frndint(x64emu_t* emu); -void arm_fscale(x64emu_t* emu); -void arm_fsin(x64emu_t* emu); -void arm_fcos(x64emu_t* emu); -void arm_fbld(x64emu_t* emu, uint8_t* ed); -void arm_fild64(x64emu_t* emu, int64_t* ed); -void arm_fbstp(x64emu_t* emu, uint8_t* ed); -void arm_fistp64(x64emu_t* emu, int64_t* ed); -void arm_fistt64(x64emu_t* emu, int64_t* ed); -void arm_fld(x64emu_t* emu, uint8_t* ed); -void arm_fsave(x64emu_t* emu, uint8_t* ed); -void arm_frstor(x64emu_t* emu, uint8_t* ed); -void arm_fprem1(x64emu_t* emu); - -void arm_aesd(x64emu_t* emu, int xmm); -void arm_aese(x64emu_t* emu, int xmm); -void arm_aesdlast(x64emu_t* emu, int xmm); -void arm_aeselast(x64emu_t* emu, int xmm); -void arm_aesimc(x64emu_t* emu, int xmm); -void arm_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8); -void arm_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8); - -void arm_clflush(x64emu_t* emu, void* p); - -void arm_ud(x64emu_t* emu); -void arm_priv(x64emu_t* emu); +#include "../dynarec_native_functions.h" // Get an FPU scratch reg int fpu_get_scratch(dynarec_arm_t* dyn); @@ -71,8 +32,8 @@ void neoncache_promote_double(dynarec_arm_t* dyn, int ninst, int a); // Combine and propagate if needed (pass 1 only) int neoncache_combine_st(dynarec_arm_t* dyn, int ninst, int a, int b); // with stack current dyn->n_stack* -// FPU Cache transformation (for loops) -int CacheNeedsTransform(dynarec_arm_t* dyn, int i1); +// FPU Cache transformation (for loops) // Specific, need to be writen par backend +int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst); // Undo the changes of a neoncache to get the status before the instruction void neoncacheUnwind(neoncache_t* cache); @@ -81,14 +42,4 @@ void neoncacheUnwind(neoncache_t* cache); int isPred(dynarec_arm_t* dyn, int ninst, int pred); int getNominalPred(dynarec_arm_t* dyn, int ninst); -// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD -int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta); -// Do the GETED, but don't emit anything... -uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); - -// Is what pointed at addr a native call? And if yes, to what function? -int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); - -const char* getCacheName(int t, int n); - #endif //__DYNAREC_ARM_FUNCTIONS_H__ \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 18dde4ba..8eb41e45 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -1744,9 +1744,9 @@ static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1) } void CacheTransform(dynarec_arm_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) { - if(cacheupd&1) - fpuCacheTransform(dyn, ninst, s1, s2, s3); if(cacheupd&2) + fpuCacheTransform(dyn, ninst, s1, s2, s3); + if(cacheupd&1) flagsCacheTransform(dyn, ninst, s1); } diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index d1d30599..d88f591b 100755 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -2,31 +2,43 @@ #define __DYNAREC_ARCH__H_ #ifdef ARM64 -#include "arm64/arm64_printer.h" -#include "arm64/dynarec_arm64_private.h" -#include "arm64/dynarec_arm64_functions.h" #define instruction_native_t instruction_arm64_t #define dynarec_native_t dynarec_arm_t +#define ADDITIONNAL_DEFINITION() \ + int fpuCacheNeedsTransform(dynarec_native_t* dyn, int ninst); + +#define OTHER_CACHE() \ + if (fpuCacheNeedsTransform(dyn, ninst)) ret|=2; + +#include "arm64/arm64_printer.h" +#include "arm64/dynarec_arm64_private.h" +#include "arm64/dynarec_arm64_functions.h" #elif defined(LA464) -#include "la464/la464_printer.h" -#include "la464/dynarec_la464_private.h" -#include "la464/dynarec_la464_functions.h" #define instruction_native_t instruction_la464_t #define dynarec_native_t dynarec_la464_t +#define ADDITIONNAL_DEFINITION() +#define OTHER_CACHE() + +#include "la464/la464_printer.h" +#include "la464/dynarec_la464_private.h" +#include "la464/dynarec_la464_functions.h" #elif defined(RV64) -#include "rv64/rv64_printer.h" -#include "rv64/dynarec_rv64_private.h" -#include "rv64/dynarec_rv64_functions.h" #define instruction_native_t instruction_rv64_t #define dynarec_native_t dynarec_rv64_t +#define ADDITIONNAL_DEFINITION() + +#define OTHER_CACHE() +#include "rv64/rv64_printer.h" +#include "rv64/dynarec_rv64_private.h" +#include "rv64/dynarec_rv64_functions.h" #else #error Unsupported platform #endif diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c new file mode 100644 index 00000000..566ba2d1 --- /dev/null +++ b/src/dynarec/dynarec_native_functions.c @@ -0,0 +1,425 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <errno.h> +#include <string.h> +#include <math.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "emu/x87emu_private.h" +#include "x64trace.h" +#include "signals.h" +#include "dynarec_native.h" +#include "custommem.h" +#include "bridge.h" +#include "dynarec_native_functions.h" + +void native_fstp(x64emu_t* emu, void* p) +{ + if(ST0.q!=STld(0).uref) + D2LD(&ST0.d, p); + else + memcpy(p, &STld(0).ld, 10); +} + +void native_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) +{ + (void)emu; + dynarec_log(LOG_DEBUG, "R%lu=0x%lx (%lu)\n", n, reg, reg); +} + +void native_f2xm1(x64emu_t* emu) +{ + ST0.d = exp2(ST0.d) - 1.0; +} +void native_fyl2x(x64emu_t* emu) +{ + ST(1).d = log2(ST0.d)*ST(1).d; +} +void native_ftan(x64emu_t* emu) +{ + ST0.d = tan(ST0.d); + emu->sw.f.F87_C2 = 0; +} +void native_fpatan(x64emu_t* emu) +{ + ST1.d = atan2(ST1.d, ST0.d); +} +void native_fxtract(x64emu_t* emu) +{ + int32_t tmp32s = (ST1.q&0x7ff0000000000000LL)>>52; + tmp32s -= 1023; + ST1.d /= exp2(tmp32s); + ST0.d = tmp32s; +} +void native_fprem(x64emu_t* emu) +{ + int32_t tmp32s = ST0.d / ST1.d; + ST0.d -= ST1.d * tmp32s; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = (tmp32s&1); + emu->sw.f.F87_C3 = ((tmp32s>>1)&1); + emu->sw.f.F87_C1 = ((tmp32s>>2)&1); +} +void native_fyl2xp1(x64emu_t* emu) +{ + ST(1).d = log2(ST0.d + 1.0)*ST(1).d; +} +void native_fsincos(x64emu_t* emu) +{ + sincos(ST1.d, &ST1.d, &ST0.d); + emu->sw.f.F87_C2 = 0; +} +void native_frndint(x64emu_t* emu) +{ + ST0.d = fpu_round(emu, ST0.d); +} +void native_fscale(x64emu_t* emu) +{ + if(ST0.d!=0.0) + ST0.d *= exp2(trunc(ST1.d)); +} +void native_fsin(x64emu_t* emu) +{ + ST0.d = sin(ST0.d); + emu->sw.f.F87_C2 = 0; +} +void native_fcos(x64emu_t* emu) +{ + ST0.d = cos(ST0.d); + emu->sw.f.F87_C2 = 0; +} + +void native_fbld(x64emu_t* emu, uint8_t* ed) +{ + fpu_fbld(emu, ed); +} + +void native_fild64(x64emu_t* emu, int64_t* ed) +{ + int64_t tmp; + memcpy(&tmp, ed, sizeof(tmp)); + ST0.d = tmp; + STll(0).sq = tmp; + STll(0).sref = ST0.sq; +} + +void native_fbstp(x64emu_t* emu, uint8_t* ed) +{ + fpu_fbst(emu, ed); +} + +void native_fistp64(x64emu_t* emu, int64_t* ed) +{ + // used of memcpy to avoid aligments issues + if(STll(0).sref==ST(0).sq) { + memcpy(ed, &STll(0).sq, sizeof(int64_t)); + } else { + int64_t tmp; + if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d)) + tmp = 0x8000000000000000LL; + else + tmp = fpu_round(emu, ST0.d); + memcpy(ed, &tmp, sizeof(tmp)); + } +} + +void native_fistt64(x64emu_t* emu, int64_t* ed) +{ + // used of memcpy to avoid aligments issues + int64_t tmp = ST0.d; + memcpy(ed, &tmp, sizeof(tmp)); +} + +void native_fld(x64emu_t* emu, uint8_t* ed) +{ + memcpy(&STld(0).ld, ed, 10); + LD2D(&STld(0), &ST(0).d); + STld(0).uref = ST0.q; +} + +void native_ud(x64emu_t* emu) +{ + emit_signal(emu, SIGILL, (void*)R_RIP, 0); +} + +void native_priv(x64emu_t* emu) +{ + emit_signal(emu, SIGSEGV, (void*)R_RIP, 0); +} + +void native_fsave(x64emu_t* emu, uint8_t* ed) +{ + fpu_savenv(emu, (char*)ed, 0); + + uint8_t* p = ed; + p += 28; + for (int i=0; i<8; ++i) { + LD2D(p, &ST(i).d); + p+=10; + } +} +void native_frstor(x64emu_t* emu, uint8_t* ed) +{ + fpu_loadenv(emu, (char*)ed, 0); + + uint8_t* p = ed; + p += 28; + for (int i=0; i<8; ++i) { + D2LD(&ST(i).d, p); + p+=10; + } + +} + +void native_fprem1(x64emu_t* emu) +{ + // simplified version + int32_t tmp32s = round(ST0.d / ST1.d); + ST0.d -= ST1.d*tmp32s; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = (tmp32s&1); + emu->sw.f.F87_C3 = ((tmp32s>>1)&1); + emu->sw.f.F87_C1 = ((tmp32s>>2)&1); +} + +static uint8_t ff_mult(uint8_t a, uint8_t b) +{ + int retval = 0; + + for(int i = 0; i < 8; i++) { + if((b & 1) == 1) + retval ^= a; + + if((a & 0x80)) { + a <<= 1; + a ^= 0x1b; + } else { + a <<= 1; + } + + b >>= 1; + } + + return retval; +} + +void native_aesimc(x64emu_t* emu, int xmm) +{ + sse_regs_t eax1 = emu->xmm[xmm]; + + for(int j=0; j<4; ++j) { + emu->xmm[xmm].ub[0+j*4] = ff_mult(0x0E, eax1.ub[0+j*4]) ^ ff_mult(0x0B, eax1.ub[1+j*4]) ^ ff_mult(0x0D, eax1.ub[2+j*4]) ^ ff_mult(0x09, eax1.ub[3+j*4]); + emu->xmm[xmm].ub[1+j*4] = ff_mult(0x09, eax1.ub[0+j*4]) ^ ff_mult(0x0E, eax1.ub[1+j*4]) ^ ff_mult(0x0B, eax1.ub[2+j*4]) ^ ff_mult(0x0D, eax1.ub[3+j*4]); + emu->xmm[xmm].ub[2+j*4] = ff_mult(0x0D, eax1.ub[0+j*4]) ^ ff_mult(0x09, eax1.ub[1+j*4]) ^ ff_mult(0x0E, eax1.ub[2+j*4]) ^ ff_mult(0x0B, eax1.ub[3+j*4]); + emu->xmm[xmm].ub[3+j*4] = ff_mult(0x0B, eax1.ub[0+j*4]) ^ ff_mult(0x0D, eax1.ub[1+j*4]) ^ ff_mult(0x09, eax1.ub[2+j*4]) ^ ff_mult(0x0E, eax1.ub[3+j*4]); + } +} +void native_aesmc(x64emu_t* emu, int xmm) +{ + sse_regs_t eax1 = emu->xmm[xmm]; + + for(int j=0; j<4; ++j) { + emu->xmm[xmm].ub[0+j*4] = ff_mult(0x02, eax1.ub[0+j*4]) ^ ff_mult(0x03, eax1.ub[1+j*4]) ^ eax1.ub[2+j*4] ^ eax1.ub[3+j*4] ; + emu->xmm[xmm].ub[1+j*4] = eax1.ub[0+j*4] ^ ff_mult(0x02, eax1.ub[1+j*4]) ^ ff_mult(0x03, eax1.ub[2+j*4]) ^ eax1.ub[3+j*4] ; + emu->xmm[xmm].ub[2+j*4] = eax1.ub[0+j*4] ^ eax1.ub[1+j*4] ^ ff_mult(0x02, eax1.ub[2+j*4]) ^ ff_mult(0x03, eax1.ub[3+j*4]); + emu->xmm[xmm].ub[3+j*4] = ff_mult(0x03, eax1.ub[0+j*4]) ^ eax1.ub[1+j*4] ^ eax1.ub[2+j*4] ^ ff_mult(0x02, eax1.ub[3+j*4]); + } +} +void native_aesdlast(x64emu_t* emu, int xmm) +{ + // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf + // A N K H E B O L I F C P M J G D + const uint8_t invshiftrows[] = {0,13,10, 7, 4, 1,14,11, 8, 5, 2,15,12, 9, 6, 3}; + const uint8_t invsubbytes[256] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, + }; + + sse_regs_t eax1; + for(int i=0; i<16; ++i) + eax1.ub[i] = emu->xmm[xmm].ub[invshiftrows[i]]; + //STATE ← InvSubBytes( STATE ); + for(int i=0; i<16; ++i) + emu->xmm[xmm].ub[i] = invsubbytes[eax1.ub[i]]; + +} +static const uint8_t shiftrows[] = {0, 5,10,15, 4, 9,14, 3, 8,13, 2, 7,12, 1, 6,11}; +static const uint8_t subbytes[256] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, +}; +void native_aeselast(x64emu_t* emu, int xmm) +{ + // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf + // A F K P E J O D I N C H M B G L + sse_regs_t eax1; + for(int i=0; i<16; ++i) + eax1.ub[i] = emu->xmm[xmm].ub[shiftrows[i]]; + //STATE ← SubBytes( STATE ); + for(int i=0; i<16; ++i) + emu->xmm[xmm].ub[i] = subbytes[eax1.ub[i]]; +} +void native_aesd(x64emu_t* emu, int xmm) +{ + native_aesdlast(emu, xmm); + native_aesimc(emu, xmm); +} +void native_aese(x64emu_t* emu, int xmm) +{ + native_aeselast(emu, xmm); + native_aesmc(emu, xmm); +} +void native_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) +{ + sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex]; + sse_regs_t *GX = &emu->xmm[gx]; + for (int i = 4; i < 8; ++i) + GX->ub[i] = subbytes[EX->ub[i]]; + for (int i = 12; i < 16; ++i) + GX->ub[i] = subbytes[EX->ub[i]]; + GX->ud[0] = GX->ud[1]; + uint8_t tmp8u = GX->ub[4]; + GX->ud[1] = GX->ud[1] >> 8; + GX->ub[7] = tmp8u; + GX->ud[1] ^= u8; + GX->ud[2] = GX->ud[3]; + tmp8u = GX->ub[12]; + GX->ud[3] = GX->ud[3] >> 8; + GX->ub[15] = tmp8u; + GX->ud[3] ^= u8; +} + +void native_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) +{ + sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex]; + sse_regs_t *GX = &emu->xmm[gx]; + int g = (u8&1)?1:0; + int e = (u8&0b10000)?1:0; + __int128 result = 0; + __int128 op2 = EX->q[e]; + for (int i=0; i<64; ++i) + if(GX->q[g]&(1LL<<i)) + result ^= (op2<<i); + + GX->q[0] = result&0xffffffffffffffffLL; + GX->q[1] = (result>>64)&0xffffffffffffffffLL; +} + +void native_clflush(x64emu_t* emu, void* p) +{ + cleanDBFromAddressRange((uintptr_t)p, 8, 0); +} + +static int flagsCacheNeedsTransform(dynarec_native_t* dyn, int ninst) { + int jmp = dyn->insts[ninst].x64.jmp_insts; + if(jmp<0) + return 0; + if(dyn->insts[ninst].f_exit.dfnone) // flags are fully known, nothing we can do more + return 0; +/* if((dyn->f.pending!=SF_SET) + && (dyn->f.pending!=SF_SET_PENDING)) { + if(dyn->f.pending!=SF_PENDING) {*/ + switch (dyn->insts[jmp].f_entry.pending) { + case SF_UNKNOWN: return 0; + case SF_SET: + if(dyn->insts[ninst].f_exit.pending!=SF_SET && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING) + return 1; + else + return 0; + case SF_SET_PENDING: + if(dyn->insts[ninst].f_exit.pending!=SF_SET + && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING + && dyn->insts[ninst].f_exit.pending!=SF_PENDING) + return 1; + else + return 0; + case SF_PENDING: + if(dyn->insts[ninst].f_exit.pending!=SF_SET + && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING + && dyn->insts[ninst].f_exit.pending!=SF_PENDING) + return 1; + else + return (dyn->insts[jmp].f_entry.dfnone == dyn->insts[ninst].f_exit.dfnone)?0:1; + } + if(dyn->insts[jmp].f_entry.dfnone && !dyn->insts[ninst].f_exit.dfnone) + return 1; + return 0; +} + +int CacheNeedsTransform(dynarec_native_t* dyn, int ninst) { + int ret = 0; + if (flagsCacheNeedsTransform(dyn, ninst)) ret|=1; + OTHER_CACHE() + return ret; +} + +int isPred(dynarec_native_t* dyn, int ninst, int pred) { + for(int i=0; i<dyn->insts[ninst].pred_sz; ++i) + if(dyn->insts[ninst].pred[i]==pred) + return pred; + return -1; +} +int getNominalPred(dynarec_native_t* dyn, int ninst) { + if((ninst<=0) || !dyn->insts[ninst].pred_sz) + return -1; + if(isPred(dyn, ninst, ninst-1)!=-1) + return ninst-1; + return dyn->insts[ninst].pred[0]; +} + +int isCacheEmpty(dynarec_native_t* dyn, int ninst) { + if(dyn->insts[ninst].n.stack_next) { + return 0; + } + for(int i=0; i<24; ++i) + if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i + if(!( + (dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_D) + && dyn->insts[ninst].n.neoncache[i].n<dyn->insts[ninst].n.stack_pop)) + return 0; + } + return 1; + +} \ No newline at end of file diff --git a/src/dynarec/dynarec_native_functions.h b/src/dynarec/dynarec_native_functions.h new file mode 100644 index 00000000..4cbd71a7 --- /dev/null +++ b/src/dynarec/dynarec_native_functions.h @@ -0,0 +1,70 @@ +#ifndef __DYNAREC_NATIVE_FUNCTIONS_H__ +#define __DYNAREC_NATIVE_FUNCTIONS_H__ + +#include <stdint.h> + +#include "dynarec_arch.h" + +typedef struct x64emu_s x64emu_t; + +void native_fstp(x64emu_t* emu, void* p); + +void native_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n); + +void native_f2xm1(x64emu_t* emu); +void native_fyl2x(x64emu_t* emu); +void native_ftan(x64emu_t* emu); +void native_fpatan(x64emu_t* emu); +void native_fxtract(x64emu_t* emu); +void native_fprem(x64emu_t* emu); +void native_fyl2xp1(x64emu_t* emu); +void native_fsincos(x64emu_t* emu); +void native_frndint(x64emu_t* emu); +void native_fscale(x64emu_t* emu); +void native_fsin(x64emu_t* emu); +void native_fcos(x64emu_t* emu); +void native_fbld(x64emu_t* emu, uint8_t* ed); +void native_fild64(x64emu_t* emu, int64_t* ed); +void native_fbstp(x64emu_t* emu, uint8_t* ed); +void native_fistp64(x64emu_t* emu, int64_t* ed); +void native_fistt64(x64emu_t* emu, int64_t* ed); +void native_fld(x64emu_t* emu, uint8_t* ed); +void native_fsave(x64emu_t* emu, uint8_t* ed); +void native_frstor(x64emu_t* emu, uint8_t* ed); +void native_fprem1(x64emu_t* emu); + +void native_aesd(x64emu_t* emu, int xmm); +void native_aese(x64emu_t* emu, int xmm); +void native_aesdlast(x64emu_t* emu, int xmm); +void native_aeselast(x64emu_t* emu, int xmm); +void native_aesimc(x64emu_t* emu, int xmm); +void native_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8); +void native_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8); + +void native_clflush(x64emu_t* emu, void* p); + +void native_ud(x64emu_t* emu); +void native_priv(x64emu_t* emu); + +// Caches transformation (for loops) // Specific, need to be writen par backend +int CacheNeedsTransform(dynarec_native_t* dyn, int i1); + +// predecessor access +int isPred(dynarec_native_t* dyn, int ninst, int pred); +int getNominalPred(dynarec_native_t* dyn, int ninst); + +// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD +int getedparity(dynarec_native_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta); +// Do the GETED, but don't emit anything... +uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); + +// Is what pointed at addr a native call? And if yes, to what function? +int isNativeCall(dynarec_native_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); + +int isCacheEmpty(dynarec_native_t* dyn, int ninst); + +const char* getCacheName(int t, int n); + +ADDITIONNAL_DEFINITION() + +#endif //__DYNAREC_NATIVE_FUNCTIONS_H__ \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index bbff1f78..ada4e23e 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -63,7 +63,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); - CALL(rv64_ud, -1); + CALL(native_ud, -1); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -91,7 +91,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); - CALL(rv64_ud, -1); + CALL(native_ud, -1); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -103,7 +103,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); - CALL(rv64_ud, -1); + CALL(native_ud, -1); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -268,7 +268,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if(wback!=A1) { MV(A1, wback); } - CALL_(arm_clflush, -1, 0); + CALL_(native_clflush, -1, 0); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index f7a0af69..2c3de1b5 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -29,190 +29,12 @@ #include "bridge.h" #include "rv64_lock.h" -void arm_clflush(x64emu_t* emu, void* p) -{ - cleanDBFromAddressRange((uintptr_t)p, 8, 0); -} - -void rv64_ud(x64emu_t* emu) -{ - emit_signal(emu, SIGILL, (void*)R_RIP, 0); -} - -void rv64_priv(x64emu_t* emu) -{ - emit_signal(emu, SIGSEGV, (void*)R_RIP, 0); -} - void fpu_reset_scratch(dynarec_rv64_t* dyn) { //TODO } -#define F8 *(uint8_t*)(addr++) -#define F32 *(uint32_t*)(addr+=4, addr-4) -#define F32S64 (uint64_t)(int64_t)*(int32_t*)(addr+=4, addr-4) -// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD -int getedparity(dynarec_rv64_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta) -{ - (void)dyn; (void)ninst; - - uint32_t tested = (1<<parity)-1; - if((nextop&0xC0)==0xC0) - return 0; // direct register, no parity... - if(!(nextop&0xC0)) { - if((nextop&7)==4) { - uint8_t sib = F8; - int sib_reg = (sib>>3)&7; - if((sib&0x7)==5) { - uint64_t tmp = F32S64; - if (sib_reg!=4) { - // if XXXXXX+reg<<N then check parity of XXXXX and N should be enough - return ((tmp&tested)==0 && (sib>>6)>=parity)?1:0; - } else { - // just a constant... - return (tmp&tested)?0:1; - } - } else { - if(sib_reg==4 && parity<3) - return 0; // simple [reg] - // don't try [reg1 + reg2<<N], unless reg1 is ESP - return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0; - } - } else if((nextop&7)==5) { - uint64_t tmp = F32S64; - tmp+=addr+delta; - return (tmp&tested)?0:1; - } else { - return 0; - } - } else { - return 0; //Form [reg1 + reg2<<N + XXXXXX] - } -} - -// Do the GETED, but don't emit anything... -uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) -{ - (void)dyn; (void)addr; (void)ninst; - - if((nextop&0xC0)==0xC0) - return addr; - if(!(nextop&0xC0)) { - if((nextop&7)==4) { - uint8_t sib = F8; - if((sib&0x7)==5) { - addr+=4; - } - } else if((nextop&7)==5) { - addr+=4; - } - } else { - if((nextop&7)==4) { - ++addr; - } - if(nextop&0x80) { - addr+=4; - } else { - ++addr; - } - } - return addr; -} -#undef F8 -#undef F32 - -static int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) -{ - // TODO - return 0; -} - -static int flagsCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) -{ - int jmp = dyn->insts[ninst].x64.jmp_insts; - if(jmp<0) - return 0; - if(dyn->insts[ninst].f_exit.dfnone) // flags are fully known, nothing we can do more - return 0; -/* if((dyn->f.pending!=SF_SET) - && (dyn->f.pending!=SF_SET_PENDING)) { - if(dyn->f.pending!=SF_PENDING) {*/ - switch (dyn->insts[jmp].f_entry.pending) { - case SF_UNKNOWN: return 0; - case SF_SET: - if(dyn->insts[ninst].f_exit.pending!=SF_SET && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING) - return 1; - else - return 0; - case SF_SET_PENDING: - if(dyn->insts[ninst].f_exit.pending!=SF_SET - && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING - && dyn->insts[ninst].f_exit.pending!=SF_PENDING) - return 1; - else - return 0; - case SF_PENDING: - if(dyn->insts[ninst].f_exit.pending!=SF_SET - && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING - && dyn->insts[ninst].f_exit.pending!=SF_PENDING) - return 1; - else - return (dyn->insts[jmp].f_entry.dfnone == dyn->insts[ninst].f_exit.dfnone)?0:1; - } - if(dyn->insts[jmp].f_entry.dfnone && !dyn->insts[ninst].f_exit.dfnone) - return 1; - return 0; -} -int CacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) { - int ret = 0; - if (fpuCacheNeedsTransform(dyn, ninst)) ret|=1; - if (flagsCacheNeedsTransform(dyn, ninst)) ret|=2; - return ret; -} - -int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn) -{ - (void)dyn; - -#define PK(a) *(uint8_t*)(addr+a) -#define PK32(a) *(int32_t*)(addr+a) - - if(!addr || !getProtection(addr)) - return 0; - if(PK(0)==0xff && PK(1)==0x25) { // "absolute" jump, maybe the GOT (well, RIP relative in fact) - uintptr_t a1 = addr + 6 + (PK32(2)); // need to add a check to see if the address is from the GOT ! - addr = (uintptr_t)getAlternate(*(void**)a1); - } - if(!addr || !getProtection(addr)) - return 0; - onebridge_t *b = (onebridge_t*)(addr); - if(b->CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) { - // found ! - if(retn) *retn = (b->C3==0xC2)?b->N:0; - if(calladdress) *calladdress = addr+1; - return 1; - } - return 0; -#undef PK32 -#undef PK -} - -int isPred(dynarec_rv64_t* dyn, int ninst, int pred) { - for(int i=0; i<dyn->insts[ninst].pred_sz; ++i) - if(dyn->insts[ninst].pred[i]==pred) - return pred; - return -1; -} -int getNominalPred(dynarec_rv64_t* dyn, int ninst) { - if((ninst<=0) || !dyn->insts[ninst].pred_sz) - return -1; - if(isPred(dyn, ninst, ninst-1)!=-1) - return ninst-1; - return dyn->insts[ninst].pred[0]; -} - uint8_t extract_byte(uint32_t val, void* address){ int idx = (((uintptr_t)address)&3)*8; return (val>>idx)&0xff; diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h index 9e0fe7ab..af55ad81 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.h +++ b/src/dynarec/rv64/dynarec_rv64_functions.h @@ -1,31 +1,12 @@ #ifndef __DYNAREC_RV64_FUNCTIONS_H__ #define __DYNAREC_RV64_FUNCTIONS_H__ -#include <stdint.h> + +#include "../dynarec_native_functions.h" typedef struct x64emu_s x64emu_t; typedef struct dynarec_rv64_s dynarec_rv64_t; -void arm_clflush(x64emu_t* emu, void* p); - -void rv64_ud(x64emu_t* emu); -void rv64_priv(x64emu_t* emu); - // Reset scratch regs counter void fpu_reset_scratch(dynarec_rv64_t* dyn); -// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD -int getedparity(dynarec_rv64_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta); -// Do the GETED, but don't emit anything... -uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); - -// Is what pointed at addr a native call? And if yes, to what function? -int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); - -// FPU Cache transformation (for loops) -int CacheNeedsTransform(dynarec_rv64_t* dyn, int i1); - -// predecessor access -int isPred(dynarec_rv64_t* dyn, int ninst, int pred); -int getNominalPred(dynarec_rv64_t* dyn, int ninst); - #endif //__DYNAREC_RV64_FUNCTIONS_H__ \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 4ae11186..ca0628fe 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -568,9 +568,9 @@ static void flagsCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1) } void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) { - if(cacheupd&1) - fpuCacheTransform(dyn, ninst, s1, s2, s3); if(cacheupd&2) + fpuCacheTransform(dyn, ninst, s1, s2, s3); + if(cacheupd&1) flagsCacheTransform(dyn, ninst, s1); } |