diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-12-12 18:51:24 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-12 11:51:24 +0100 |
| commit | 55d6971a23d43f3f80919b47bc54b46192a89040 (patch) | |
| tree | 1c962ac0032a6d1794543616730d8c856966db77 /src | |
| parent | 7168167400d7fbe60e0d9034d95bdbcbf302af99 (diff) | |
| download | box64-55d6971a23d43f3f80919b47bc54b46192a89040.tar.gz box64-55d6971a23d43f3f80919b47bc54b46192a89040.zip | |
[RV64_DYNAREC] New register mapping (#2139)
* [RV64_DYNAREC] New register mapping * Fix
Diffstat (limited to 'src')
25 files changed, 639 insertions, 638 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_0.c b/src/dynarec/rv64/dynarec_rv64_00_0.c index ed2dfde0..1a0290d6 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_0.c +++ b/src/dynarec/rv64/dynarec_rv64_00_0.c @@ -541,7 +541,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETEB(x1, 0); GETGB(x2); - emit_cmp8(dyn, ninst, x1, x2, x9, x4, x5, x6); + emit_cmp8(dyn, ninst, x1, x2, x7, x4, x5, x6); break; case 0x39: INST_NAME("CMP Ed, Gd"); @@ -557,7 +557,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETEB(x1, 0); GETGB(x2); - emit_cmp8(dyn, ninst, x2, x1, x9, x4, x5, x6); + emit_cmp8(dyn, ninst, x2, x1, x7, x4, x5, x6); break; case 0x3B: INST_NAME("CMP Gd, Ed"); diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c index 9940d5ec..bee9fa33 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_1.c +++ b/src/dynarec/rv64/dynarec_rv64_00_1.c @@ -288,7 +288,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(x3); - CALL(native_priv, -1); + CALL(native_priv, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -300,7 +300,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(x3); - CALL(native_priv, -1); + CALL(native_priv, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index 6472d643..24968c74 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -118,7 +118,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = F8; if (u8) { ADDI(x2, xZR, u8); - emit_cmp8(dyn, ninst, x1, x2, x9, x4, x5, x6); + emit_cmp8(dyn, ninst, x1, x2, x7, x4, x5, x6); } else { emit_cmp8_0(dyn, ninst, x1, x3, x4); } @@ -175,7 +175,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int else i64 = F8S; MOV64xw(x5, i64); - emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x9); + emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x7); WBACK; break; case 3: // SBB @@ -308,13 +308,13 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ADDI(x4, xZR, 0xff); SLL(x4, x4, x1); NOT(x4, x4); - SLL(x9, gd, x1); + SLL(x7, gd, x1); // do aligned ll/sc sequence, reusing x2 (ed might be x2 but is no longer needed) MARKLOCK; LR_W(x2, x6, 1, 1); AND(x5, x2, x4); - OR(x5, x5, x9); + OR(x5, x5, x7); SC_W(x5, x5, x6, 1, 1); BNEZ_MARKLOCK(x5); diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index d4ff674a..8afa8442 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -63,7 +63,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(rol8, ed, x3); + CALL_(rol8, ed, x3, x1, x2); EBBACK(x5, 0); break; case 1: @@ -73,7 +73,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(ror8, ed, x3); + CALL_(ror8, ed, x3, x1, x2); EBBACK(x5, 0); break; case 2: @@ -84,7 +84,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(rcl8, ed, x3); + CALL_(rcl8, ed, x3, x1, x2); EBBACK(x5, 0); break; case 3: @@ -95,7 +95,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(rcr8, ed, x3); + CALL_(rcr8, ed, x3, x1, x2); EBBACK(x5, 0); break; case 4: @@ -196,7 +196,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = (F8) & (rex.w ? 0x3f : 0x1f); MOV32w(x2, u8); GETEDW(x4, x1, 0); - CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4); + CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4, x1, x2); WBACK; if (!wback && !rex.w) ZEROUP(ed); break; @@ -208,7 +208,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = (F8) & (rex.w ? 0x3f : 0x1f); MOV32w(x2, u8); GETEDW(x4, x1, 0); - CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4); + CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4, x1, x2); WBACK; if (!wback && !rex.w) ZEROUP(ed); break; @@ -446,7 +446,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETIP(ip + 1); // read the 0xCC STORE_XEMU_CALL(x3); ADDI(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip - CALL_S(x64Int3, -1); + CALL_S(x64Int3, -1, x1); LOAD_XEMU_CALL(); addr += 8 + 8; TABLE64(x3, addr); // expected return address @@ -468,7 +468,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BEQZ_MARK(x3); GETIP(addr); STORE_XEMU_CALL(x3); - CALL(native_int3, -1); + CALL(native_int3, -1, 0, 0); LOAD_XEMU_CALL(); MARK; jump_to_epilog(dyn, addr, 0, ninst); @@ -487,7 +487,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETIP(ip); // priviledged instruction, IP not updated STORE_XEMU_CALL(x3); MOV32w(x1, u8); - CALL(native_int, -1); + CALL(native_int, -1, x1, 0); LOAD_XEMU_CALL(); } else if (u8 == 0x80) { INST_NAME("32bits SYSCALL"); @@ -495,7 +495,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SMEND(); GETIP(addr); STORE_XEMU_CALL(x3); - CALL_S(x86Syscall, -1); + CALL_S(x86Syscall, -1, 0); LOAD_XEMU_CALL(); TABLE64(x3, addr); // expected return address BNE_MARK(xRIP, x3); @@ -509,7 +509,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(addr); STORE_XEMU_CALL(x3); - CALL(native_int3, -1); + CALL(native_int3, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -519,7 +519,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); // priviledged instruction, IP not updated STORE_XEMU_CALL(x3); - CALL(native_priv, -1); + CALL(native_priv, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -550,7 +550,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); - CALL_(rol8, ed, x3); + CALL_(rol8, ed, x3, x1, x2); EBBACK(x5, 0); break; case 1: @@ -565,7 +565,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); - CALL_(ror8, ed, x3); + CALL_(ror8, ed, x3, x1, x2); EBBACK(x5, 0); break; case 2: @@ -581,7 +581,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); - CALL_(rcl8, ed, x3); + CALL_(rcl8, ed, x3, x1, x2); EBBACK(x5, 0); break; case 3: @@ -597,7 +597,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); - CALL_(rcr8, ed, x3); + CALL_(rcr8, ed, x3, x1, x2); EBBACK(x5, 0); break; case 4: @@ -682,7 +682,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); MOV32w(x2, 1); GETEDW(x4, x1, 0); - CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4); + CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4, x1, x2); WBACK; if (!wback && !rex.w) ZEROUP(ed); break; @@ -693,7 +693,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); MOV32w(x2, 1); GETEDW(x4, x1, 0); - CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4); + CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4, x1, x2); WBACK; if (!wback && !rex.w) ZEROUP(ed); break; @@ -752,7 +752,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); ANDI(x2, xRCX, rex.w ? 0x3f : 0x1f); GETEDW(x4, x1, 0); - CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4); + CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4, x1, x2); WBACK; if (!wback && !rex.w) ZEROUP(ed); break; @@ -763,7 +763,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); ANDI(x2, xRCX, rex.w ? 0x3f : 0x1f); GETEDW(x4, x1, 0); - CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4); + CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4, x1, x2); WBACK; if (!wback && !rex.w) ZEROUP(ed); break; @@ -946,7 +946,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already STORE_XEMU_CALL(x3); ADDI(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip - CALL_S(x64Int3, -1); + CALL_S(x64Int3, -1, x1); LOAD_XEMU_CALL(); TABLE64(x3, dyn->insts[ninst].natcall); ADDI(x3, x3, 2 + 8 + 8); @@ -1074,7 +1074,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(xRIP); - CALL(native_priv, -1); + CALL(native_priv, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -1146,7 +1146,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 0); - CALL(div8, -1); + CALL(div8, -1, x1, 0); break; case 7: INST_NAME("IDIV Eb"); @@ -1154,7 +1154,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 0); - CALL(idiv8, -1); + CALL(idiv8, -1, x1, 0); break; } break; @@ -1268,14 +1268,14 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MESSAGE(LOG_INFO, "Divide by 0 hack\n"); GETIP(ip); STORE_XEMU_CALL(x3); - CALL(native_div0, -1); + CALL(native_div0, -1, 0, 0); LOAD_XEMU_CALL(); } else { if (box64_dynarec_div0) { BNE_MARK3(ed, xZR); GETIP_(ip); STORE_XEMU_CALL(x3); - CALL(native_div0, -1); + CALL(native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -1303,7 +1303,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BNE_MARK3(ed, xZR); GETIP_(ip); STORE_XEMU_CALL(x3); - CALL(native_div0, -1); + CALL(native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -1318,15 +1318,14 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BNE_MARK3(ed, xZR); GETIP_(ip); STORE_XEMU_CALL(x3); - CALL(native_div0, -1); + CALL(native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); MARK3; } BEQ_MARK(xRDX, xZR); - if (ed != x1) { MV(x1, ed); } - CALL(div64, -1); + CALL(div64, -1, ed, 0); B_NEXT_nocond; MARK; DIVU(x2, xRAX, ed); @@ -1346,7 +1345,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BNE_MARK3(ed, xZR); GETIP_(ip); STORE_XEMU_CALL(x3); - CALL(native_div0, -1); + CALL(native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -1369,7 +1368,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BNE_MARK3(ed, xZR); GETIP_(ip); STORE_XEMU_CALL(x3); - CALL(native_div0, -1); + CALL(native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -1384,7 +1383,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BNE_MARK3(ed, xZR); GETIP_(ip); STORE_XEMU_CALL(x3); - CALL(native_div0, -1); + CALL(native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -1399,8 +1398,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BNE_MARK3(x2, xZR); BLT_MARK(xRAX, xZR); MARK3; - if (ed != x1) MV(x1, ed); - CALL((void*)idiv64, -1); + CALL((void*)idiv64, -1, ed, 0); B_NEXT_nocond; MARK; DIV(x2, xRAX, ed); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 9838d9b8..12f7fb03 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -91,7 +91,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("RDTSCP"); NOTEST(x1); if (box64_rdtsc) { - CALL(ReadTSC, x3); // will return the u64 in x3 + CALL(ReadTSC, x3, 0, 0); // will return the u64 in x3 } else { CSRRS(x3, xZR, 0xC01); // RDTIME } @@ -118,7 +118,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMEND(); GETIP(addr); STORE_XEMU_CALL(x3); - CALL_S(x64Syscall, -1); + CALL_S(x64Syscall, -1, 0); LOAD_XEMU_CALL(); TABLE64(x3, addr); // expected return address BNE_MARK(xRIP, x3); @@ -134,7 +134,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(x3); - CALL(native_ud, -1); + CALL(native_ud, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -146,7 +146,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(x3); - CALL(native_ud, -1); + CALL(native_ud, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -416,7 +416,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("RDTSC"); NOTEST(x1); if (box64_rdtsc) { - CALL(ReadTSC, x3); // will return the u64 in x3 + CALL(ReadTSC, x3, 0, 0); // will return the u64 in x3 } else { CSRRS(x3, xZR, 0xC01); // RDTIME } @@ -554,11 +554,11 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni for (int i = 0; i < 4; ++i) { LBU(x3, gback, gdoffset + i * 2); LB(x4, wback, fixedaddress + i * 2); - MUL(x9, x3, x4); + MUL(x7, x3, x4); LBU(x3, gback, gdoffset + i * 2 + 1); LB(x4, wback, fixedaddress + i * 2 + 1); MUL(x3, x3, x4); - ADD(x3, x3, x9); + ADD(x3, x3, x7); if (rv64_zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); @@ -791,12 +791,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = (nextop & 7) + (rex.b << 3); sse_reflect_reg(dyn, ninst, x6, ed); ADDI(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x2; } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); - if (ed != x2) { - MV(x2, ed); - } } GETG; sse_forget_reg(dyn, ninst, x6, gd); @@ -804,22 +802,22 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni sse_reflect_reg(dyn, ninst, x6, 0); switch (u8) { case 0xC8: - CALL(sha1nexte, -1); + CALL(sha1nexte, -1, x1, ed); break; case 0xC9: - CALL(sha1msg1, -1); + CALL(sha1msg1, -1, x1, ed); break; case 0xCA: - CALL(sha1msg2, -1); + CALL(sha1msg2, -1, x1, ed); break; case 0xCB: - CALL(sha256rnds2, -1); + CALL(sha256rnds2, -1, x1, ed); break; case 0xCC: - CALL(sha256msg1, -1); + CALL(sha256msg1, -1, x1, ed); break; case 0xCD: - CALL(sha256msg2, -1); + CALL(sha256msg2, -1, x1, ed); break; } break; @@ -883,17 +881,17 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = (nextop & 7) + (rex.b << 3); sse_reflect_reg(dyn, ninst, x6, ed); ADDI(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + wback = x2; } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, 1); - if (wback != x2) MV(x2, wback); } u8 = F8; GETG; sse_forget_reg(dyn, ninst, x6, gd); ADDI(x1, xEmu, offsetof(x64emu_t, xmm[gd])); MOV32w(x3, u8); - CALL(sha1rnds4, -1); + CALL4(sha1rnds4, -1, x1, wback, x3, 0); break; default: DEFAULT; @@ -1746,8 +1744,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xA2: INST_NAME("CPUID"); NOTEST(x1); - MV(A1, xRAX); - CALL_(my_cpuid, -1, 0); + CALL_(my_cpuid, -1, 0, xRAX, 0); // BX and DX are not synchronized during the call, so need to force the update LD(xRDX, xEmu, offsetof(x64emu_t, regs[_DX])); LD(xRBX, xEmu, offsetof(x64emu_t, regs[_BX])); @@ -1890,8 +1887,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SKIPTEST(x1); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } - CALL(rex.is32bits ? ((void*)fpu_fxsave32) : ((void*)fpu_fxsave64), -1); + CALL(rex.is32bits ? ((void*)fpu_fxsave32) : ((void*)fpu_fxsave64), -1, ed, 0); break; case 1: INST_NAME("FXRSTOR Ed"); @@ -1899,8 +1895,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SKIPTEST(x1); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } - CALL(rex.is32bits ? ((void*)fpu_fxrstor32) : ((void*)fpu_fxrstor64), -1); + CALL(rex.is32bits ? ((void*)fpu_fxrstor32) : ((void*)fpu_fxrstor64), -1, ed, 0); break; case 2: INST_NAME("LDMXCSR Md"); @@ -1920,28 +1915,23 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("XSAVE Ed"); MESSAGE(LOG_DUMP, "Need Optimization\n"); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); - addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); MOV32w(x2, rex.w ? 0 : 1); - CALL((void*)fpu_xsave, -1); + CALL((void*)fpu_xsave, -1, ed, x2); break; case 5: INST_NAME("XRSTOR Ed"); MESSAGE(LOG_DUMP, "Need Optimization\n"); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); - addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); MOV32w(x2, rex.w ? 0 : 1); - CALL((void*)fpu_xrstor, -1); + CALL((void*)fpu_xrstor, -1, ed, x2); break; case 7: INST_NAME("CLFLUSH Ed"); MESSAGE(LOG_DUMP, "Need Optimization?\n"); - addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (wback != A1) { - MV(A1, wback); - } - CALL_(native_clflush, -1, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + CALL_(native_clflush, -1, 0, ed, 0); break; default: DEFAULT; @@ -2268,10 +2258,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEB(x1, 0); GETGB(x2); if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2))) - MV(x9, ed); + MV(x7, ed); emit_add8(dyn, ninst, ed, gd, x4, x5, x6); if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2))) - MV(gd, x9); + MV(gd, x7); EBBACK(x5, 0); if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2))) GBBACK(x5); @@ -2283,10 +2273,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGD; GETED(0); if (ed != gd) - MV(x9, ed); + MV(x7, ed); emit_add32(dyn, ninst, rex, ed, gd, x4, x5, x6); if (ed != gd) - MVxw(gd, x9); + MVxw(gd, x7); WBACK; break; case 0xC2: diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c index 282f39ee..4b7aaa94 100644 --- a/src/dynarec/rv64/dynarec_rv64_64.c +++ b/src/dynarec/rv64/dynarec_rv64_64.c @@ -260,7 +260,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; else i64 = F8S; - emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x9); + emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x7); WBACKO(x6); break; case 1: // OR @@ -293,7 +293,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F8S; MOV64xw(x5, i64); SD(x6, xEmu, offsetof(x64emu_t, scratch)); - emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x9); + emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x7); LD(x6, xEmu, offsetof(x64emu_t, scratch)); WBACKO(x6); break; @@ -311,7 +311,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni else i64 = F8S; MOV64xw(x5, i64); - emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4, x9); + emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4, x7); WBACKO(x6); break; case 4: // AND @@ -341,7 +341,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; else i64 = F8S; - emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x9); + emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x7); WBACKO(x6); break; case 6: // XOR diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index 80a28e13..b36614b8 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -344,7 +344,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGW(x2); GETEW(x1, 0); - emit_cmp16(dyn, ninst, x1, x2, x9, x4, x5, x6); + emit_cmp16(dyn, ninst, x1, x2, x7, x4, x5, x6); break; case 0x3B: INST_NAME("CMP Gw, Ew"); @@ -352,7 +352,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGW(x1); GETEW(x2, 0); - emit_cmp16(dyn, ninst, x1, x2, x9, x4, x5, x6); + emit_cmp16(dyn, ninst, x1, x2, x7, x4, x5, x6); break; case 0x3D: INST_NAME("CMP AX, Iw"); @@ -603,9 +603,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u64 = (uint16_t)(int16_t)F8S; if (u64) { MOV64x(x2, u64); - emit_cmp16(dyn, ninst, x1, x2, x9, x4, x5, x6); + emit_cmp16(dyn, ninst, x1, x2, x7, x4, x5, x6); } else - emit_cmp16_0(dyn, ninst, x1, x9, x4); + emit_cmp16_0(dyn, ninst, x1, x7, x4); break; default: DEFAULT; @@ -1052,7 +1052,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(rol16, x1, x3); + CALL_(rol16, x1, x3, x1, x2); EWBACK; break; case 1: @@ -1062,7 +1062,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(ror16, x1, x3); + CALL_(ror16, x1, x3, x1, x2); EWBACK; break; case 2: @@ -1073,7 +1073,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(rcl16, x1, x3); + CALL_(rcl16, x1, x3, x1, x2); EWBACK; break; case 3: @@ -1084,7 +1084,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(rcr16, x1, x3); + CALL_(rcr16, x1, x3, x1, x2); EWBACK; break; case 4: @@ -1161,7 +1161,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); - CALL_(rol16, x1, x3); + CALL_(rol16, x1, x3, x1, x2); EWBACK; break; case 1: @@ -1175,7 +1175,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); - CALL_(ror16, x1, x3); + CALL_(ror16, x1, x3, x1, x2); EWBACK; break; case 2: @@ -1190,7 +1190,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(X_CF); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); - CALL_(rcl16, x1, x3); + CALL_(rcl16, x1, x3, x1, x2); EWBACK; break; case 3: @@ -1205,7 +1205,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(X_CF); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); - CALL_(rcr16, x1, x3); + CALL_(rcr16, x1, x3, x1, x2); EWBACK; break; case 5: @@ -1326,22 +1326,22 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SET_DFNONE(); GETEW(x1, 0); ZEXTH(x2, xRAX); - SLLI(x9, xRDX, 48); - SRLI(x9, x9, 32); - OR(x2, x2, x9); + SLLI(x7, xRDX, 48); + SRLI(x7, x7, 32); + OR(x2, x2, x7); if (box64_dynarec_div0) { BNE_MARK3(ed, xZR); GETIP_(ip); STORE_XEMU_CALL(x6); - CALL(native_div0, -1); + CALL(native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); MARK3; } - DIVUW(x9, x2, ed); + DIVUW(x7, x2, ed); REMUW(x4, x2, ed); - INSHz(xRAX, x9, x5, x6, 1, 1); + INSHz(xRAX, x7, x5, x6, 1, 1); INSHz(xRDX, x4, x5, x6, 0, 1); break; case 7: @@ -1354,7 +1354,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BNE_MARK3(ed, xZR); GETIP_(ip); STORE_XEMU_CALL(x6); - CALL(native_div0, -1); + CALL(native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index f9547d8c..2bad6621 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -1451,10 +1451,10 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGW(x1); GETEW(x2, 0); if (!(MODREG && wback == TO_NAT(((nextop & 0x38) >> 3) + (rex.r << 3)))) - MV(x9, ed); + MV(x7, ed); emit_add16(dyn, ninst, ed, gd, x4, x5, x6); if (!(MODREG && wback == TO_NAT(((nextop & 0x38) >> 3) + (rex.r << 3)))) - MV(gd, x9); + MV(gd, x7); EWBACK; if (!(MODREG && wback == TO_NAT(((nextop & 0x38) >> 3) + (rex.r << 3)))) GWBACK; diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c index 5ac15061..ca1146f0 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f38.c +++ b/src/dynarec/rv64/dynarec_rv64_660f38.c @@ -197,11 +197,11 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 8; ++i) { LBU(x3, gback, gdoffset + i * 2); LB(x4, wback, fixedaddress + i * 2); - MUL(x9, x3, x4); + MUL(x7, x3, x4); LBU(x3, gback, gdoffset + i * 2 + 1); LB(x4, wback, fixedaddress + i * 2 + 1); MUL(x3, x3, x4); - ADD(x3, x3, x9); + ADD(x3, x3, x7); if (rv64_zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); @@ -742,16 +742,16 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, ed = (nextop & 7) + (rex.b << 3); sse_reflect_reg(dyn, ninst, x6, ed); ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x1; } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); - if (ed != x1) MV(x1, ed); } // prepare rest arguments MV(x2, xRDX); MV(x4, xRAX); u8 = F8; MOV32w(x5, u8); - CALL(sse42_compare_string_explicit_len, x1); + CALL6(sse42_compare_string_explicit_len, x1, ed, x2, x3, x4, x5, 0); ZEROUP(x1); BNEZ_MARK(x1); MOV32w(xRCX, (u8 & 1) ? 8 : 16); @@ -773,7 +773,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, SSE_LOOP_MV_Q(x3); sse_forget_reg(dyn, ninst, x6, gd); MOV32w(x1, gd); - CALL(native_aesimc, -1); + CALL(native_aesimc, -1, x1, 0); break; case 0xDC: INST_NAME("AESENC Gx, Ex"); // AES-NI @@ -781,7 +781,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, GETG; sse_forget_reg(dyn, ninst, x6, gd); MOV32w(x1, gd); - CALL(native_aese, -1); + CALL(native_aese, -1, x1, 0); GETGX(); GETEX(x2, 0, 8); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); @@ -792,7 +792,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, GETG; sse_forget_reg(dyn, ninst, x6, gd); MOV32w(x1, gd); - CALL(native_aeselast, -1); + CALL(native_aeselast, -1, x1, 0); GETGX(); GETEX(x2, 0, 8); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); @@ -803,7 +803,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, GETG; sse_forget_reg(dyn, ninst, x6, gd); MOV32w(x1, gd); - CALL(native_aesd, -1); + CALL(native_aesd, -1, x1, 0); GETGX(); GETEX(x2, 0, 8); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); @@ -815,7 +815,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, GETG; sse_forget_reg(dyn, ninst, x6, gd); MOV32w(x1, gd); - CALL(native_aesdlast, -1); + CALL(native_aesdlast, -1, x1, 0); GETGX(); GETEX(x2, 0, 8); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); @@ -1218,7 +1218,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, } u8 = F8; MOV32w(x4, u8); - CALL(native_pclmul, -1); + CALL4(native_pclmul, -1, x1, x2, x3, x4); break; case 0x63: INST_NAME("PCMPISTRI Gx, Ex, Ib"); @@ -1231,13 +1231,13 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, ed = (nextop & 7) + (rex.b << 3); sse_reflect_reg(dyn, ninst, x6, ed); ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x1; } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); - if (ed != x1) MV(x1, ed); } u8 = F8; MOV32w(x3, u8); - CALL(sse42_compare_string_implicit_len, x1); + CALL4(sse42_compare_string_implicit_len, x1, ed, x2, x3, 0); ZEROUP(x1); BNEZ_MARK(x1); MOV32w(xRCX, (u8 & 1) ? 8 : 16); @@ -1271,7 +1271,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, } u8 = F8; MOV32w(x4, u8); - CALL(native_aeskeygenassist, -1); + CALL4(native_aeskeygenassist, -1, x1, x2, x3, x4); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index 44fe914d..4ea79815 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -210,7 +210,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_refresh(dyn, ninst, x1, x2, 0); s0 = x87_stackcount(dyn, ninst, x1); - CALL(fpu_fxam, -1); // should be possible inline, but is it worth it? + CALL(fpu_fxam, -1, 0, 0); // should be possible inline, but is it worth it? x87_unstackcount(dyn, ninst, x1, s0); #endif break; @@ -266,7 +266,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_f2xm1, -1); + CALL(native_f2xm1, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xF1: @@ -275,7 +275,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fyl2x, -1); + CALL(native_fyl2x, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -284,7 +284,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_ftan, -1); + CALL(native_ftan, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F); if (ST_IS_F(0)) { @@ -301,7 +301,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fpatan, -1); + CALL(native_fpatan, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -311,7 +311,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fxtract, -1); + CALL(native_fxtract, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xF5: @@ -320,7 +320,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fprem1, -1); + CALL(native_fprem1, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xF6: @@ -345,7 +345,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fprem, -1); + CALL(native_fprem, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xF9: @@ -354,7 +354,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fyl2xp1, -1); + CALL(native_fyl2xp1, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -373,7 +373,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fsincos, -1); + CALL(native_fsincos, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xFC: @@ -422,7 +422,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fscale, -1); + CALL(native_fscale, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xFE: @@ -430,7 +430,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fsin, -1); + CALL(native_fsin, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xFF: @@ -438,7 +438,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fcos, -1); + CALL(native_fcos, -1, 0, 0); x87_unstackcount(dyn, ninst, x3, s0); break; @@ -494,11 +494,8 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE? addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { - MV(x1, ed); - } MOV32w(x2, 0); - CALL(fpu_loadenv, -1); + CALL(fpu_loadenv, -1, ed, x2); break; case 5: INST_NAME("FLDCW Ew"); @@ -510,11 +507,8 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE? addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { - MV(x1, ed); - } MOV32w(x2, 0); - CALL(fpu_savenv, -1); + CALL(fpu_savenv, -1, ed, x2); break; case 7: INST_NAME("FNSTCW Ew"); diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c index 70c77965..6943eac7 100644 --- a/src/dynarec/rv64/dynarec_rv64_db.c +++ b/src/dynarec/rv64/dynarec_rv64_db.c @@ -140,7 +140,7 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FNINIT"); MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_purgecache(dyn, ninst, 0, x1, x2, x3); - CALL(reset_fpu, -1); + CALL(reset_fpu, -1, 0, 0); break; case 0xE8: case 0xE9: @@ -268,7 +268,7 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDI(x1, ed, fixedaddress); } X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x3); - CALL(native_fld, -1); + CALL(native_fld, -1, x1, 0); } } break; @@ -281,11 +281,8 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { x87_forget(dyn, ninst, x1, x3, 0); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { - MV(x1, ed); - } s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fstp, -1); + CALL(native_fstp, -1, ed, 0); x87_unstackcount(dyn, ninst, x3, s0); } X87_POP_OR_FAIL(dyn, ninst, x3); diff --git a/src/dynarec/rv64/dynarec_rv64_dd.c b/src/dynarec/rv64/dynarec_rv64_dd.c index fccc97d5..2d2c22d4 100644 --- a/src/dynarec/rv64/dynarec_rv64_dd.c +++ b/src/dynarec/rv64/dynarec_rv64_dd.c @@ -55,7 +55,7 @@ uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_purgecache(dyn, ninst, 0, x1, x2, x3); MOV32w(x1, nextop & 7); - CALL(fpu_do_free, -1); + CALL(fpu_do_free, -1, x1, 0); break; case 0xD0: case 0xD1: @@ -202,8 +202,7 @@ uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } - CALL(native_fsave, -1); + CALL(native_fsave, -1, ed, 0); break; case 7: INST_NAME("FNSTSW m2byte"); diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c index 7a775c3c..119a2910 100644 --- a/src/dynarec/rv64/dynarec_rv64_df.c +++ b/src/dynarec/rv64/dynarec_rv64_df.c @@ -216,9 +216,8 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FBLD ST0, tbytes"); X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } s0 = x87_stackcount(dyn, ninst, x3); - CALL(fpu_fbld, -1); + CALL(fpu_fbld, -1, ed, 0); x87_unstackcount(dyn, ninst, x3, s0); break; case 5: @@ -253,9 +252,8 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FBSTP tbytes, ST0"); x87_forget(dyn, ninst, x1, x2, 0); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } s0 = x87_stackcount(dyn, ninst, x3); - CALL(fpu_fbst, -1); + CALL(fpu_fbst, -1, ed, 0); x87_unstackcount(dyn, ninst, x3, s0); X87_POP_OR_FAIL(dyn, ninst, x3); break; diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index 26664a03..b4e529a7 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -168,8 +168,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SLL(x1, x1, x5); OR(x1, x1, x2); ANDI(x2, wback, ~0b11); // align to 32bit again - SC_W(x9, x1, x2, 1, 1); - BNEZ_MARKLOCK(x9); + SC_W(x7, x1, x2, 1, 1); + BNEZ_MARKLOCK(x7); // done MARK; UFLAG_IF { emit_cmp8(dyn, ninst, x6, x4, x1, x2, x3, x5); } @@ -283,11 +283,11 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(xFlags, xFlags, ~(1 << F_ZF)); if (rex.w) { // there is no atomic move on 16bytes, so implement it with mutex - LD(x9, xEmu, offsetof(x64emu_t, context)); - ADDI(x9, x9, offsetof(box64context_t, mutex_16b)); + LD(x7, xEmu, offsetof(x64emu_t, context)); + ADDI(x7, x7, offsetof(box64context_t, mutex_16b)); ADDI(x4, xZR, 1); MARK2; - AMOSWAP_W(x4, x4, x9, 1, 1); + AMOSWAP_W(x4, x4, x7, 1, 1); // x4 == 1 if locked BNEZ_MARK2(x4); @@ -309,7 +309,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMDMB(); // unlock - AMOSWAP_W(xZR, xZR, x9, 1, 1); + AMOSWAP_W(xZR, xZR, x7, 1, 1); } else { SMDMB(); AND(x3, xRAX, xMASK); @@ -375,12 +375,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x4, x5, 0xff); // x4 = Ed.b[0] ANDI(x5, x5, ~0xff); // x5 = clear Ed.b[0] ADDW(x6, x4, x2); - ANDI(x9, xFlags, 1 << F_CF); - ADDW(x6, x6, x9); // x6 = adc + ANDI(x7, xFlags, 1 << F_CF); + ADDW(x6, x6, x7); // x6 = adc ANDI(x6, x6, 0xff); OR(x5, x5, x6); - SC_W(x9, x5, wback, 1, 1); - BNEZ_MARKLOCK(x9); + SC_W(x7, x5, wback, 1, 1); + BNEZ_MARKLOCK(x7); B_MARK3_nocond; MARK; SLLI(x3, x3, 3); @@ -391,9 +391,9 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SLL(x2, x2, x3); // x2 = extented Gb MARK2; LR_W(x6, wback, 1, 1); // x6 = Ed - AND(x9, x6, x4); // x9 = extended Ed.b[dest] + AND(x7, x6, x4); // x7 = extended Ed.b[dest] AND(x6, x6, x5); // x6 = clear Ed.b[dest] - ADDW(x5, x9, x2); + ADDW(x5, x7, x2); ANDI(x4, xFlags, 1 << F_CF); SLL(x4, x4, x3); // extented ADDW(x5, x5, x4); // x5 = adc @@ -402,7 +402,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BNEZ_MARK2(x4); IFXORNAT (X_ALL | X_PEND) { SRLI(x2, x2, x3); // Gb - SRLI(x4, x9, x3); // Eb + SRLI(x4, x7, x3); // Eb } MARK3; IFXORNAT (X_ALL | X_PEND) { @@ -533,11 +533,11 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; else i64 = F8S; - MOV64xw(x9, i64); + MOV64xw(x7, i64); ANDI(x1, wback, (1 << (rex.w + 2)) - 1); BNEZ_MARK3(x1); // Aligned - AMOADDxw(x1, x9, wback, 1, 1); + AMOADDxw(x1, x7, wback, 1, 1); B_MARK_nocond; MARK3; // Unaligned @@ -545,7 +545,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks. LDxw(x1, wback, 0); LRxw(x6, x5, 1, 1); - ADDxw(x4, x1, x9); + ADDxw(x4, x1, x7); SCxw(x3, x6, x5, 1, 1); BNEZ_MARK2(x3); SDxw(x4, wback, 0); @@ -601,8 +601,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; else i64 = F8S; - MOV64xw(x9, i64); - AMOANDxw(x1, x9, wback, 1, 1); + MOV64xw(x7, i64); + AMOANDxw(x1, x7, wback, 1, 1); IFXORNAT (X_ALL | X_PEND) emit_and32c(dyn, ninst, rex, x1, i64, x3, x4); } @@ -627,11 +627,11 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; else i64 = F8S; - MOV64xw(x9, i64); + MOV64xw(x7, i64); ANDI(x1, wback, (1 << (rex.w + 2)) - 1); BNEZ_MARK3(x1); // Aligned - SUB(x4, xZR, x9); + SUB(x4, xZR, x7); AMOADDxw(x1, x4, wback, 1, 1); B_MARK_nocond; MARK3; @@ -640,7 +640,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks. LDxw(x1, wback, 0); LRxw(x6, x5, 1, 1); - SUBxw(x4, x1, x9); + SUBxw(x4, x1, x7); SCxw(x3, x6, x5, 1, 1); BNEZ_MARK2(x3); SDxw(x4, wback, 0); @@ -670,8 +670,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; else i64 = F8S; - MOV64xw(x9, i64); - AMOXORxw(x1, x9, wback, 1, 1); + MOV64xw(x7, i64); + AMOXORxw(x1, x7, wback, 1, 1); IFXORNAT (X_ALL | X_PEND) emit_xor32c(dyn, ninst, rex, x1, i64, x3, x4); } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 4b53e81f..af55ce5e 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -585,24 +585,22 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 SLLI(x2, x2, 3); ADD(x3, x3, x2); } - LD(x2, x3, 0); // LR_D(x2, x3, 1, 1); + LD(x2, x3, 0); } } else { uintptr_t p = getJumpTableAddress64(ip); MAYUSE(p); TABLE64(x3, p); GETIP_(ip); - LD(x2, x3, 0); // LR_D(x2, x3, 1, 1); - } - if (reg != A1) { - MV(A1, xRIP); + LD(x2, x3, 0); } CLEARIP(); -#ifdef HAVE_TRACE -// MOVx(x3, 15); no access to PC reg -#endif SMEND(); +#ifdef HAVE_TRACE + JALR(xRA, x2); +#else JALR((dyn->insts[ninst].x64.has_callret ? xRA : xZR), x2); +#endif } void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex) @@ -779,7 +777,7 @@ void iret_to_epilog(dynarec_rv64_t* dyn, int ninst, int is64bits) CLEARIP(); } -void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg) +void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg, int arg1, int arg2, int arg3, int arg4, int arg5, int arg6) { MAYUSE(fnc); if (savereg == 0) @@ -791,40 +789,44 @@ void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav fpu_pushcache(dyn, ninst, reg, 0); if (ret != -2) { SUBI(xSP, xSP, 16); // RV64 stack needs to be 16byte aligned - SD(xEmu, xSP, 0); - SD(savereg, xSP, 8); - // x5..x8, x10..x17, x28..x31 those needs to be saved by caller - STORE_REG(RAX); - STORE_REG(RCX); + SD(savereg, xSP, 0); + STORE_REG(RDI); + STORE_REG(RSI); STORE_REG(RDX); - STORE_REG(R12); - STORE_REG(R13); - STORE_REG(R14); - STORE_REG(R15); + STORE_REG(RCX); + STORE_REG(R8); + STORE_REG(R9); + STORE_REG(RAX); SD(xRIP, xEmu, offsetof(x64emu_t, ip)); } TABLE64(reg, (uintptr_t)fnc); + MV(A0, xEmu); + if (arg1) MV(A1, arg1); + if (arg2) MV(A2, arg2); + if (arg3) MV(A3, arg3); + if (arg4) MV(A4, arg4); + if (arg5) MV(A5, arg5); + if (arg6) MV(A6, arg6); JALR(xRA, reg); if (ret >= 0) { - MV(ret, xEmu); + MV(ret, A0); } - if (ret != -2) { - LD(xEmu, xSP, 0); - LD(savereg, xSP, 8); - ADDI(xSP, xSP, 16); + + LD(savereg, xSP, 0); + ADDI(xSP, xSP, 16); #define GO(A) \ if (ret != x##A) { LOAD_REG(A); } - GO(RAX); - GO(RCX); - GO(RDX); - GO(R12); - GO(R13); - GO(R14); - GO(R15); - if (ret != xRIP) - LD(xRIP, xEmu, offsetof(x64emu_t, ip)); + GO(RDI); + GO(RSI); + GO(RDX); + GO(RCX); + GO(R8); + GO(R9); + GO(RAX); #undef GO - } + if (ret != xRIP) + LD(xRIP, xEmu, offsetof(x64emu_t, ip)); + // regenerate mask XORI(xMASK, xZR, -1); SRLI(xMASK, xMASK, 32); @@ -845,54 +847,16 @@ void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w) { MAYUSE(fnc); - FLAGS_ADJUST_TO11(xFlags, xFlags, x3); - SD(xFlags, xEmu, offsetof(x64emu_t, eflags)); fpu_pushcache(dyn, ninst, x3, 1); - // x5..x8, x10..x17, x28..x31 those needs to be saved by caller - // RDI, RSI, RDX, RCX, R8, R9 are used for function call - SUBI(xSP, xSP, 16); - SD(xEmu, xSP, 0); - SD(xRIP, xSP, 8); // RV64 stack needs to be 16byte aligned - STORE_REG(R12); - STORE_REG(R13); - STORE_REG(R14); - STORE_REG(R15); - /* - // float and double args - if (abs(w) > 1) { - MESSAGE(LOG_DUMP, "Getting %d XMM args\n", abs(w) - 1); - for (int i = 0; i < abs(w) - 1; ++i) { - sse_get_reg(dyn, ninst, x6, i, 0); - } - } - if (w < 0) { - MESSAGE(LOG_DUMP, "Return in XMM0\n"); - sse_get_reg_empty(dyn, ninst, x6, 0, 0); - } - */ - // prepare regs for native call - MV(A0, xRDI); - MV(A1, xRSI); - MV(A2, xRDX); - MV(A3, xRCX); - MV(A4, xR8); - MV(A5, xR9); // native call - TABLE64(xRAX, (uintptr_t)fnc); // using xRAX as scratch regs for call address - JALR(xRA, xRAX); + TABLE64(x3, (uintptr_t)fnc); + JALR(xRA, x3); // put return value in x64 regs if (w > 0) { MV(xRAX, A0); MV(xRDX, A1); } // all done, restore all regs - LD(xEmu, xSP, 0); - LD(xRIP, xSP, 8); - ADDI(xSP, xSP, 16); - LOAD_REG(R12); - LOAD_REG(R13); - LOAD_REG(R14); - LOAD_REG(R15); // regenerate mask XORI(xMASK, xZR, -1); SRLI(xMASK, xMASK, 32); @@ -902,8 +866,6 @@ void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w) vector_vsetvli(dyn, ninst, x3, dyn->vector_sew, VECTOR_LMUL1, 1); fpu_popcache(dyn, ninst, x3, 1); - LD(xFlags, xEmu, offsetof(x64emu_t, eflags)); - FLAGS_ADJUST_FROM11(xFlags, xFlags, x3); // SET_NODF(); } @@ -927,7 +889,7 @@ void grab_segdata(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, int reg, int s CBZ_MARKSEG(t1); } MOV64x(x1, segment); - call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 0, xFlags); + call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 0, xFlags, x1, 0, 0, 0, 0, 0); MARKSEG; MESSAGE(LOG_DUMP, "----%s Offset\n", (segment == _FS) ? "FS" : "GS"); } @@ -2813,7 +2775,7 @@ static void flagsCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1) j64 = (GETMARKF2) - (dyn->native_size); BEQZ(s1, j64); } - CALL_(UpdateFlags, -1, 0); + CALL_(UpdateFlags, -1, 0, 0, 0); MARKF2; } } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index ee61c13d..d78761cc 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -679,13 +679,15 @@ // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0) +#define CALL(F, ret, arg1, arg2) call_c(dyn, ninst, F, x6, ret, 1, 0, arg1, arg2, 0, 0, 0, 0) +#define CALL4(F, ret, arg1, arg2, arg3, arg4) call_c(dyn, ninst, F, x6, ret, 1, 0, arg1, arg2, arg3, arg4, 0, 0) +#define CALL6(F, ret, arg1, arg2, arg3, arg4, arg5, arg6) call_c(dyn, ninst, F, x6, ret, 1, 0, arg1, arg2, arg3, arg4, arg5, arg6) // CALL_ will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x6, ret, 1, reg) +#define CALL_(F, ret, reg, arg1, arg2) call_c(dyn, ninst, F, x6, ret, 1, reg, arg1, arg2, 0, 0, 0, 0) // CALL_S will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2. Flags are not save/restored -#define CALL_S(F, ret) call_c(dyn, ninst, F, x6, ret, 0, 0) +#define CALL_S(F, ret, arg1) call_c(dyn, ninst, F, x6, ret, 0, 0, arg1, 0, 0, 0, 0, 0) #define MARKi(i) dyn->insts[ninst].mark[i] = dyn->native_size #define GETMARKi(i) dyn->insts[ninst].mark[i] @@ -810,49 +812,29 @@ #define LOAD_REG(A) LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) // Need to also store current value of some register, as they may be used by functions like setjmp -#define STORE_XEMU_CALL(s0) \ - if (rv64_xtheadmempair) { \ - ADDI(s0, xEmu, offsetof(x64emu_t, regs[_RSP])); \ - TH_SDD(xRDX, xRBX, xEmu, 1); \ - TH_SDD(xRSP, xRBP, s0, 0); \ - TH_SDD(xRSI, xRDI, s0, 1); \ - TH_SDD(xR8, xR9, s0, 2); \ - TH_SDD(xR10, xR11, s0, 3); \ - } else { \ - STORE_REG(RBX); \ - STORE_REG(RDX); \ - STORE_REG(RSP); \ - STORE_REG(RBP); \ - STORE_REG(RDI); \ - STORE_REG(RSI); \ - STORE_REG(R8); \ - STORE_REG(R9); \ - STORE_REG(R10); \ - STORE_REG(R11); \ - } +#define STORE_XEMU_CALL(s0) \ + STORE_REG(RBX); \ + STORE_REG(RSP); \ + STORE_REG(RBP); \ + STORE_REG(R10); \ + STORE_REG(R11); \ + STORE_REG(R12); \ + STORE_REG(R13); \ + STORE_REG(R14); \ + STORE_REG(R15); #define LOAD_XEMU_CALL() -#define LOAD_XEMU_REM(s0) \ - if (rv64_xtheadmempair) { \ - ADDI(s0, xEmu, offsetof(x64emu_t, regs[_RSP])); \ - TH_LDD(xRDX, xRBX, xEmu, 1); \ - TH_LDD(xRSP, xRBP, s0, 0); \ - TH_LDD(xRSI, xRDI, s0, 1); \ - TH_LDD(xR8, xR9, s0, 2); \ - TH_LDD(xR10, xR11, s0, 3); \ - } else { \ - LOAD_REG(RBX); \ - LOAD_REG(RDX); \ - LOAD_REG(RSP); \ - LOAD_REG(RBP); \ - LOAD_REG(RDI); \ - LOAD_REG(RSI); \ - LOAD_REG(R8); \ - LOAD_REG(R9); \ - LOAD_REG(R10); \ - LOAD_REG(R11); \ - } +#define LOAD_XEMU_REM(s0) \ + LOAD_REG(RBX); \ + LOAD_REG(RSP); \ + LOAD_REG(RBP); \ + LOAD_REG(R10); \ + LOAD_REG(R11); \ + LOAD_REG(R12); \ + LOAD_REG(R13); \ + LOAD_REG(R14); \ + LOAD_REG(R15); #define SET_DFNONE() \ @@ -869,7 +851,7 @@ MOV_U12(S, (N)); \ SW(S, xEmu, offsetof(x64emu_t, df)); \ if (dyn->f.pending == SF_PENDING && dyn->insts[ninst].x64.need_after && !(dyn->insts[ninst].x64.need_after & X_PEND)) { \ - CALL_(UpdateFlags, -1, 0); \ + CALL_(UpdateFlags, -1, 0, 0, 0); \ dyn->f.pending = SF_SET; \ SET_NODF(); \ } \ @@ -1013,7 +995,7 @@ j64 = (GETMARKF) - (dyn->native_size); \ BEQ(x3, xZR, j64); \ } \ - CALL_(UpdateFlags, -1, 0); \ + CALL_(UpdateFlags, -1, 0, 0, 0); \ MARKF; \ dyn->f.pending = SF_SET; \ SET_DFOK(); \ @@ -1169,7 +1151,7 @@ void rv64_epilog(void); void rv64_epilog_fast(void); -void* rv64_next(x64emu_t* emu, uintptr_t addr); +void* rv64_next(void); #ifndef STEPNAME #define STEPNAME3(N, M) N##M @@ -1394,7 +1376,7 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex); void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex, int n); void iret_to_epilog(dynarec_rv64_t* dyn, int ninst, int is64bits); -void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); +void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg, int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w); void grab_segdata(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, int reg, int segment); void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6); diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 126ce630..9e876fd3 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -200,12 +200,12 @@ int Table64(dynarec_rv64_t *dyn, uint64_t val, int pass); // add a value to tab void CreateJmpNext(void* addr, void* next); -#define GO_TRACE(A, B, s0) \ - GETIP(addr); \ - MV(A1, xRIP); \ - STORE_XEMU_CALL(s0); \ - MOV64x(A2, B); \ - CALL(A, -1); \ +#define GO_TRACE(A, B, s0) \ + GETIP(addr); \ + MV(x1, xRIP); \ + STORE_XEMU_CALL(s0); \ + MOV64x(x2, B); \ + CALL(A, -1, x1, x2); \ LOAD_XEMU_CALL() #endif //__DYNAREC_RV64_PRIVATE_H_ diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 39e0b7bd..ecac8a46 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -1,85 +1,9 @@ #ifndef __RV64_EMITTER_H__ #define __RV64_EMITTER_H__ -/* - RV64 Emitter - -*/ - -// RV64 ABI -/* -reg name description saver ------------------------------------------------------- -x0 zero Hard-wired zero — -x1 ra Return address Caller -x2 sp Stack pointer Callee -x3 gp Global pointer — -x4 tp Thread pointer — -x5–7 t0–2 Temporaries Caller -x8 s0/fp Saved register/frame pointer Callee -x9 s1 Saved register Callee -x10–11 a0–1 Function arguments/return val. Caller -x12–17 a2–7 Function arguments Caller -x18–27 s2–11 Saved registers Callee -x28–31 t3–6 Temporaries Caller -------------------------------------------------------- -f0–7 ft0–7 FP temporaries Caller -f8–9 fs0–1 FP saved registers Callee -f10–11 fa0–1 FP arguments/return values Caller -f12–17 fa2–7 FP arguments Caller -f18–27 fs2–11 FP saved registers Callee -f28–31 ft8–11 FP temporaries Caller -*/ -// x86 Register mapping -#define xRAX 16 -#define xRCX 17 -#define xRDX 18 -#define xRBX 19 -#define xRSP 20 -#define xRBP 21 -#define xRSI 22 -#define xRDI 23 -#define xR8 24 -#define xR9 25 -#define xR10 26 -#define xR11 27 -#define xR12 28 -#define xR13 29 -#define xR14 30 -#define xR15 31 -#define xFlags 8 -#define xRIP 7 - -// convert a x86 register to native according to the register mapping -#define TO_NAT(A) (xRAX + (A)) - -// scratch registers -#define x1 11 -#define x2 12 -#define x3 13 -#define x4 14 -#define x5 15 -#define x6 6 -#define x9 9 -// used to clear the upper 32bits -#define xMASK 5 - -// emu is r10 -#define xEmu 10 -// RV64 RA -#define xRA 1 -#define xSP 2 -// RV64 args -#define A0 10 -#define A1 11 -#define A2 12 -#define A3 13 -#define A4 14 -#define A5 15 -#define A6 16 -#define A7 17 -// xZR reg is 0 -#define xZR 0 -#define wZR xZR + +#include "rv64_mapping.h" + +// RV64 Emitter // replacement for F_OF internaly, using a reserved bit. Need to use F_OF2 internaly, never F_OF directly! #define F_OF2 F_res3 diff --git a/src/dynarec/rv64/rv64_epilog.S b/src/dynarec/rv64/rv64_epilog.S index 1f819114..5c6c495a 100644 --- a/src/dynarec/rv64/rv64_epilog.S +++ b/src/dynarec/rv64/rv64_epilog.S @@ -1,7 +1,6 @@ -//riscv epilog for dynarec -//Save stuff, prepare stack and register -//called with pointer to emu as 1st parameter -//and address to jump to as 2nd parameter +#define ASM_MAPPING 1 +#include "rv64_mapping.h" +#undef ASM_MAPPING .text .align 4 @@ -9,60 +8,70 @@ .global rv64_epilog .global rv64_epilog_fast +// rv64_epilog(void) rv64_epilog: - //update register -> emu - sd x16, (a0) - sd x17, 8(a0) - sd x18, 16(a0) - sd x19, 24(a0) - sd x20, 32(a0) - sd x21, 40(a0) - sd x22, 48(a0) - sd x23, 56(a0) - sd x24, 64(a0) - sd x25, 72(a0) - sd x26, 80(a0) - sd x27, 88(a0) - sd x28, 96(a0) - sd x29, 104(a0) - sd x30, 112(a0) - sd x31, 120(a0) + // adjust flags bit 5 -> bit 11 - li x5, ~(1<<11) - and x8, x8, x5 - andi x5, x8, 1<<5 - slli x5, x5, 11-5 - or x8, x8, x5 - sd x8, 128(a0) //xFlags - sd x7, 136(a0) // put back reg value in emu, including EIP (so x7 must be EIP now) + li t3, ~(1<<11) + and Flags, Flags, t3 + andi t3, Flags, 1<<5 + slli t3, t3, 11-5 + or Flags, Flags, t3 + + // spill x86 registers to emu + sd RAX, 0(Emu) + sd RCX, 8(Emu) + sd RDX, 16(Emu) + sd RBX, 24(Emu) + sd RSP, 32(Emu) + sd RBP, 40(Emu) + sd RSI, 48(Emu) + sd RDI, 56(Emu) + sd R8, 64(Emu) + sd R9, 72(Emu) + sd R10, 80(Emu) + sd R11, 88(Emu) + sd R12, 96(Emu) + sd R13, 104(Emu) + sd R14, 112(Emu) + sd R15, 120(Emu) + sd Flags, 128(Emu) + sd RIP, 136(Emu) + + /*** switch to native register naming convection ***/ + // fallback to epilog_fast now, just restoring saved regs rv64_epilog_fast: - ld sp, 808(a0) // restore saved sp from emu->xSPSave, see rv64_prolog - ld x9, -8(sp) - sd x9, 808(a0) // put back old value - ld ra, (sp) // save ra - ld x8, 8(sp) // save fp - ld x18, (2*8)(sp) - ld x19, (3*8)(sp) - ld x20, (4*8)(sp) - ld x21, (5*8)(sp) - ld x22, (6*8)(sp) - ld x23, (7*8)(sp) - ld x24, (8*8)(sp) - ld x25, (9*8)(sp) - ld x26, (10*8)(sp) - ld x27, (11*8)(sp) - ld x9, (12*8)(sp) - fld f18, (13*8)(sp) - fld f19, (14*8)(sp) - fld f20, (15*8)(sp) - fld f21, (16*8)(sp) - fld f22, (17*8)(sp) - fld f23, (19*8)(sp) - fld f24, (19*8)(sp) - fld f25, (20*8)(sp) - fld f26, (21*8)(sp) - fld f27, (22*8)(sp) + // restore saved sp from emu->xSPSave + ld sp, 808(Emu) + ld t3, -8(sp) + // put back old value + sd t3, 808(Emu) + + ld ra, (0*8)(sp) + ld fp, (1*8)(sp) + ld s1, (2*8)(sp) + ld s2, (3*8)(sp) + ld s3, (4*8)(sp) + ld s4, (5*8)(sp) + ld s5, (6*8)(sp) + ld s6, (7*8)(sp) + ld s7, (8*8)(sp) + ld s8, (9*8)(sp) + ld s9, (10*8)(sp) + ld s10, (11*8)(sp) + ld s11, (12*8)(sp) + fld fs2, (13*8)(sp) + fld fs3, (14*8)(sp) + fld fs4, (15*8)(sp) + fld fs5, (16*8)(sp) + fld fs6, (17*8)(sp) + fld fs7, (19*8)(sp) + fld fs8, (19*8)(sp) + fld fs9, (20*8)(sp) + fld fs10, (21*8)(sp) + fld fs11, (22*8)(sp) + + // 16 bytes aligned addi sp, sp, (8 * 24) - //end, return... ret diff --git a/src/dynarec/rv64/rv64_mapping.h b/src/dynarec/rv64/rv64_mapping.h new file mode 100644 index 00000000..bc40184d --- /dev/null +++ b/src/dynarec/rv64/rv64_mapping.h @@ -0,0 +1,119 @@ +#ifndef __RV64_MAPPING_H__ +#define __RV64_MAPPING_H__ + +// RV64 Register Mapping Scheme +/***************************************************************************************** +reg name mapping native description Box64 description saver +****************************************************************************************** +x0 zero native zero Hard-wired zero N/A — +x1 ra native ra Return address N/A Caller +x2 sp native sp Stack pointer N/A Callee +x3 gp native gp Global pointer N/A — +x4 tp native tp Thread pointer N/A — +x5 t0 xMask Temporary Always 0xFFFFFFFF Caller +x6 t1 x1 Temporary Scratch Caller +x7 t2 x2 Temporary Scratch Caller +x8 s0/fp RBP Saved register/frame pointer - Callee +x9 s1 RSP Saved register - Callee +x10 a0 RDI Function argument/return val. - Caller +x11 a1 RSI Function argument/return val. - Caller +x12 a2 RDX Function argument - Caller +x13 a3 RCX Function argument - Caller +x14 a4 R8 Function argument - Caller +x15 a5 R9 Function argument - Caller +x16 a6 RAX Function argument - Caller +x17 a7 x7 Function argument The Emu struct Caller +x18 s2 R12 Saved register - Callee +x19 s3 R13 Saved register - Callee +x20 s4 R14 Saved register - Callee +x21 s5 R15 Saved register - Callee +x22 s6 RIP Saved register - Callee +x23 s7 FLAGS Saved register - Callee +x24 s8 RBX Saved register - Callee +x25 s9 xEmu Saved register Scratch Callee +x26 s10 R10 Saved register - Callee +x27 s11 R11 Saved register - Callee +x28 t3 x3 Temporary Scratch Caller +x29 t4 x4 Temporary Scratch Caller +x30 t5 x5 Temporary Scratch Caller +x31 t6 x6 Temporary Scratch Caller +******************************************************************************************/ + +#ifndef ASM_MAPPING + +#include <stdint.h> + +// x86 Register mapping +#define xRAX 16 +#define xRCX 13 +#define xRDX 12 +#define xRBX 24 +#define xRSP 9 +#define xRBP 8 +#define xRSI 11 +#define xRDI 10 +#define xR8 14 +#define xR9 15 +#define xR10 26 +#define xR11 27 +#define xR12 18 +#define xR13 19 +#define xR14 20 +#define xR15 21 +#define xRIP 22 +#define xFlags 23 + +// convert a x86 register to native according to the register mapping +#define TO_NAT(A) (((uint8_t[]) { 16, 13, 12, 24, 9, 8, 11, 10, 14, 15, 26, 27, 18, 19, 20, 21 })[(A)]) + +#define x1 6 +#define x2 7 +#define x3 28 +#define x4 29 +#define x5 30 +#define x6 31 +#define xEmu 25 + +#define xMASK 5 +#define x7 17 + +#define xRA 1 +#define xSP 2 +#define A0 10 +#define A1 11 +#define A2 12 +#define A3 13 +#define A4 14 +#define A5 15 +#define A6 16 +#define A7 17 + +#define xZR 0 + +#else + +// x86 Register mapping +#define RAX x16 +#define RCX x13 +#define RDX x12 +#define RBX x24 +#define RSP x9 +#define RBP x8 +#define RSI x11 +#define RDI x10 +#define R8 x14 +#define R9 x15 +#define R10 x26 +#define R11 x27 +#define R12 x18 +#define R13 x19 +#define R14 x20 +#define R15 x21 +#define RIP x22 +#define Flags x23 +#define Emu x25 +#define MASK x5 + +#endif + +#endif // __RV64_MAPPING_H__ diff --git a/src/dynarec/rv64/rv64_next.S b/src/dynarec/rv64/rv64_next.S index ce34bb7f..a631aac3 100644 --- a/src/dynarec/rv64/rv64_next.S +++ b/src/dynarec/rv64/rv64_next.S @@ -1,7 +1,6 @@ -//riscv update linker table for dynarec -//called with pointer to emu as 1st parameter -//and address of table to as 2nd parameter -//ip is at r12 +#define ASM_MAPPING 1 +#include "rv64_mapping.h" +#undef ASM_MAPPING .text .align 4 @@ -10,42 +9,56 @@ .global rv64_next - .8byte 0 // NULL pointer before rv64_next, for getDB + // NULL pointer before rv64_next, for getDB + .8byte 0 + +// rv64(void) rv64_next: - // emu is a0 - // IP address is a1 + + // 16 bytes aligned addi sp, sp, -(8 * 10) - sd a0, (sp) - sd a1, 8(sp) - sd x5, 16(sp) - sd x7, 24(sp) - sd x16, 32(sp) - sd x17, 40(sp) - sd x28, 48(sp) - sd x29, 56(sp) - sd x30, 64(sp) - sd x31, 72(sp) - - mv a2, ra // "from" is in ra, so put in a2 - addi a3, sp, 24 // a3 is address to change rip + + // push regs we care that might be destoryed + sd RDI, (0*8)(sp) + sd RSI, (1*8)(sp) + sd RDX, (2*8)(sp) + sd RCX, (3*8)(sp) + sd R8, (4*8)(sp) + sd R9, (5*8)(sp) + sd RAX, (6*8)(sp) + sd RIP, (8*8)(sp) + + mv a0, Emu + mv a1, RIP +#ifdef HAVE_TRACE + mv a2, ra // "from" is in ra, so put in a2 +#endif + addi a3, sp, 8*8 // a3 is address to change rip + // call the function 1: - auipc a4, %pcrel_hi(LinkNext) - jalr a4, %pcrel_lo(1b) - // preserve return value - mv a3, a0 + auipc t4, %pcrel_hi(LinkNext) + jalr t4, %pcrel_lo(1b) + + // reserve return value + mv t3, a0 + // pop regs - ld a0, (sp) - ld a1, 8(sp) - ld x5, 16(sp) - ld x7, 24(sp) - ld x16, 32(sp) - ld x17, 40(sp) - ld x28, 48(sp) - ld x29, 56(sp) - ld x30, 64(sp) - ld x31, 72(sp) + ld RDI, (0*8)(sp) + ld RSI, (1*8)(sp) + ld RDX, (2*8)(sp) + ld RCX, (3*8)(sp) + ld R8, (4*8)(sp) + ld R9, (5*8)(sp) + ld RAX, (6*8)(sp) + ld RIP, (8*8)(sp) + addi sp, sp, (8 * 10) + + // setup MASK + xori MASK, zero, -1 + srli MASK, MASK, 32 + // return offset is jump address - jr a3 + jr t3 diff --git a/src/dynarec/rv64/rv64_printer.c b/src/dynarec/rv64/rv64_printer.c index 6f7af4d2..3dc619ab 100644 --- a/src/dynarec/rv64/rv64_printer.c +++ b/src/dynarec/rv64/rv64_printer.c @@ -23,68 +23,68 @@ static const char gpr[32][9] = { "sp", "gp", "tp", - "t0_mask", + "mask_t0", "t1", - "t2_rip", - "s0_flags", - "s1", - "a0", - "a1", - "a2", - "a3", - "a4", - "a5", - "a6_rax", - "a7_rcx", - "s2_rdx", - "s3_rbx", - "s4_rsp", - "s5_rbp", - "s6_rsi", - "s7_rdi", - "s8_r8", - "s9_r9", - "s10_r10", - "s11_r11", - "t3_r12", - "t4_r13", - "t5_r14", - "t6_r15", + "t2", + "rbp_s0", + "rsp_s1", + "rdi_a0", + "rsi_a1", + "rdx_a2", + "rcx_a3", + "r8_a4", + "r9_a5", + "rax_a6", + "x7_a7", + "r12_s2", + "r13_s3", + "r14_s4", + "r15_s5", + "rip_s6", + "flags_s7", + "rbx_s8", + "emu_s9", + "r10_s10", + "r11_s11", + "t3", + "t4", + "t5", + "t6", }; static const char fpr[32][5] = { - "ft0", - "ft1", - "ft2", - "ft3", - "ft4", - "ft5", - "ft6", - "ft7", - "fs0", - "fs1", - "fa0", - "fa1", - "fa2", - "fa3", - "fa4", - "fa5", - "fa6", - "fa7", - "fs2", - "fs3", - "fs4", - "fs5", - "fs6", - "fs7", - "fs8", - "fs9", - "fs10", - "fs11", - "ft8", - "ft9", - "ft10", - "ft11", + "f0", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "f10", + "f11", + "f12", + "f13", + "f14", + "f15", + "f16", + "f17", + "f18", + "f19", + "f20", + "f21", + "f22", + "f23", + "f24", + "f25", + "f26", + "f27", + "f28", + "f29", + "f30", + "f31", }; static const char vpr[32][4] = { diff --git a/src/dynarec/rv64/rv64_prolog.S b/src/dynarec/rv64/rv64_prolog.S index 67af8253..7440b905 100644 --- a/src/dynarec/rv64/rv64_prolog.S +++ b/src/dynarec/rv64/rv64_prolog.S @@ -1,70 +1,86 @@ -//riscv prologue for dynarec -//Save stuff, prepare stack and register -//called with pointer to emu as 1st parameter -//and address to jump to as 2nd parameter +#define ASM_MAPPING 1 +#include "rv64_mapping.h" +#undef ASM_MAPPING .text .align 4 .global rv64_prolog + +// rv64_prolog(emu, jump_address) rv64_prolog: - //save all 18 used register - addi sp, sp, -(8 * 24) // 16 bytes aligned - sd ra, (sp) // save ra - sd x8, 8(sp) // save fp - sd x18, (2*8)(sp) - sd x19, (3*8)(sp) - sd x20, (4*8)(sp) - sd x21, (5*8)(sp) - sd x22, (6*8)(sp) - sd x23, (7*8)(sp) - sd x24, (8*8)(sp) - sd x25, (9*8)(sp) - sd x26, (10*8)(sp) - sd x27, (11*8)(sp) - sd x9, (12*8)(sp) - fsd f18, (13*8)(sp) - fsd f19, (14*8)(sp) - fsd f20, (15*8)(sp) - fsd f21, (16*8)(sp) - fsd f22, (17*8)(sp) - fsd f23, (18*8)(sp) - fsd f24, (19*8)(sp) - fsd f25, (20*8)(sp) - fsd f26, (21*8)(sp) - fsd f27, (22*8)(sp) - //setup emu -> register - ld x16, (a0) - ld x17, 8(a0) - ld x18, 16(a0) - ld x19, 24(a0) - ld x20, 32(a0) - ld x21, 40(a0) - ld x22, 48(a0) - ld x23, 56(a0) - ld x24, 64(a0) - ld x25, 72(a0) - ld x26, 80(a0) - ld x27, 88(a0) - ld x28, 96(a0) - ld x29, 104(a0) - ld x30, 112(a0) - ld x31, 120(a0) - ld x8, 128(a0) //xFlags - ld x7, 136(a0) // xRIP - // // adjust flags bit 11 -> bit 5 - andi x8, x8, ~(1<<5) // probably not usefull? - srli x5, x8, 11-5 - andi x5, x5, 1<<5 - or x8, x8, x5 - ld x5, 808(a0) // grab an old value of emu->xSPSave - sd sp, 808(a0) // save current sp to emu->xSPSave + + // 16 bytes aligned + addi sp, sp, -(8 * 24) + + // save callee-saved registers + sd ra, (0*8)(sp) + sd fp, (1*8)(sp) + sd s1, (2*8)(sp) + sd s2, (3*8)(sp) + sd s3, (4*8)(sp) + sd s4, (5*8)(sp) + sd s5, (6*8)(sp) + sd s6, (7*8)(sp) + sd s7, (8*8)(sp) + sd s8, (9*8)(sp) + sd s9, (10*8)(sp) + sd s10, (11*8)(sp) + sd s11, (12*8)(sp) + fsd fs2, (13*8)(sp) + fsd fs3, (14*8)(sp) + fsd fs4, (15*8)(sp) + fsd fs5, (16*8)(sp) + fsd fs6, (17*8)(sp) + fsd fs7, (18*8)(sp) + fsd fs8, (19*8)(sp) + fsd fs9, (20*8)(sp) + fsd fs10, (21*8)(sp) + fsd fs11, (22*8)(sp) + + // save a1 + mv t6, a1 + + /*** switch to box64 register naming convection ***/ + + // load x86 registers from emu + mv Emu, a0 + ld RAX, 0(Emu) + ld RCX, 8(Emu) + ld RDX, 16(Emu) + ld RBX, 24(Emu) + ld RSP, 32(Emu) + ld RBP, 40(Emu) + ld RSI, 48(Emu) + ld RDI, 56(Emu) + ld R8, 64(Emu) + ld R9, 72(Emu) + ld R10, 80(Emu) + ld R11, 88(Emu) + ld R12, 96(Emu) + ld R13, 104(Emu) + ld R14, 112(Emu) + ld R15, 120(Emu) + ld Flags, 128(Emu) + ld RIP, 136(Emu) + + // adjust flags bit 11 -> bit 5 + andi Flags, Flags, ~(1<<5) // probably not usefull? + srli t3, Flags, 11-5 + andi t3, t3, 1<<5 + or Flags, Flags, t3 + + ld t3, 808(Emu) // grab an old value of emu->xSPSave + sd sp, 808(Emu) // save current sp to emu->xSPSave + // push sentinel onto the stack - sd x5, -16(sp) + sd t3, -16(sp) sd zero, -8(sp) addi sp, sp, -16 - // setup xMASK - xori x5, x0, -1 - srli x5, x5, 32 + + // setup MASK + xori MASK, zero, -1 + srli MASK, MASK, 32 + // jump to block - jr a1 + jr t6 diff --git a/src/libtools/signal32.c b/src/libtools/signal32.c index 6dace456..3742d0c7 100644 --- a/src/libtools/signal32.c +++ b/src/libtools/signal32.c @@ -487,7 +487,7 @@ void my_sigactionhandler_oldcode_32(int32_t sig, int simple, siginfo_t* info, vo if(p) { pc = (void*)p->uc_mcontext.__gregs[0]; if(db) - frame = (uintptr_t)p->uc_mcontext.__gregs[16+_SP]; + frame = (uintptr_t)p->uc_mcontext.__gregs[9]; } #else #error Unsupported architecture @@ -573,13 +573,13 @@ void my_sigactionhandler_oldcode_32(int32_t sig, int simple, siginfo_t* info, vo #elif defined(RV64) if(db && p) { sigcontext->uc_mcontext.gregs[I386_EAX] = p->uc_mcontext.__gregs[16]; - sigcontext->uc_mcontext.gregs[I386_ECX] = p->uc_mcontext.__gregs[17]; - sigcontext->uc_mcontext.gregs[I386_EDX] = p->uc_mcontext.__gregs[18]; - sigcontext->uc_mcontext.gregs[I386_EBX] = p->uc_mcontext.__gregs[19]; - sigcontext->uc_mcontext.gregs[I386_ESP] = p->uc_mcontext.__gregs[20]; - sigcontext->uc_mcontext.gregs[I386_EBP] = p->uc_mcontext.__gregs[21]; - sigcontext->uc_mcontext.gregs[I386_ESI] = p->uc_mcontext.__gregs[22]; - sigcontext->uc_mcontext.gregs[I386_EDI] = p->uc_mcontext.__gregs[23]; + sigcontext->uc_mcontext.gregs[I386_ECX] = p->uc_mcontext.__gregs[13]; + sigcontext->uc_mcontext.gregs[I386_EDX] = p->uc_mcontext.__gregs[12]; + sigcontext->uc_mcontext.gregs[I386_EBX] = p->uc_mcontext.__gregs[24]; + sigcontext->uc_mcontext.gregs[I386_ESP] = p->uc_mcontext.__gregs[9]; + sigcontext->uc_mcontext.gregs[I386_EBP] = p->uc_mcontext.__gregs[8]; + sigcontext->uc_mcontext.gregs[I386_ESI] = p->uc_mcontext.__gregs[11]; + sigcontext->uc_mcontext.gregs[I386_EDI] = p->uc_mcontext.__gregs[10]; sigcontext->uc_mcontext.gregs[I386_EIP] = getX64Address(db, (uintptr_t)pc); } #else diff --git a/src/libtools/signals.c b/src/libtools/signals.c index db4a5709..290452a2 100644 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -501,8 +501,8 @@ x64emu_t* getEmuSignal(x64emu_t* emu, ucontext_t* p, dynablock_t* db) emu = (x64emu_t*)p->uc_mcontext.__gregs[4]; } #elif defined(RV64) - if(db && p->uc_mcontext.__gregs[10]>0x10000) { - emu = (x64emu_t*)p->uc_mcontext.__gregs[10]; + if(db && p->uc_mcontext.__gregs[25]>0x10000) { + emu = (x64emu_t*)p->uc_mcontext.__gregs[25]; } #else #error Unsupported Architecture @@ -604,23 +604,23 @@ void copyUCTXreg2Emu(x64emu_t* emu, ucontext_t* p, uintptr_t ip) { emu->eflags.x64 = p->uc_mcontext.__gregs[31]; #elif defined(RV64) emu->regs[_AX].q[0] = p->uc_mcontext.__gregs[16]; - emu->regs[_CX].q[0] = p->uc_mcontext.__gregs[17]; - emu->regs[_DX].q[0] = p->uc_mcontext.__gregs[18]; - emu->regs[_BX].q[0] = p->uc_mcontext.__gregs[19]; - emu->regs[_SP].q[0] = p->uc_mcontext.__gregs[20]; - emu->regs[_BP].q[0] = p->uc_mcontext.__gregs[21]; - emu->regs[_SI].q[0] = p->uc_mcontext.__gregs[22]; - emu->regs[_DI].q[0] = p->uc_mcontext.__gregs[23]; - emu->regs[_R8].q[0] = p->uc_mcontext.__gregs[24]; - emu->regs[_R9].q[0] = p->uc_mcontext.__gregs[25]; + emu->regs[_CX].q[0] = p->uc_mcontext.__gregs[13]; + emu->regs[_DX].q[0] = p->uc_mcontext.__gregs[12]; + emu->regs[_BX].q[0] = p->uc_mcontext.__gregs[24]; + emu->regs[_SP].q[0] = p->uc_mcontext.__gregs[9]; + emu->regs[_BP].q[0] = p->uc_mcontext.__gregs[8]; + emu->regs[_SI].q[0] = p->uc_mcontext.__gregs[11]; + emu->regs[_DI].q[0] = p->uc_mcontext.__gregs[10]; + emu->regs[_R8].q[0] = p->uc_mcontext.__gregs[14]; + emu->regs[_R9].q[0] = p->uc_mcontext.__gregs[15]; emu->regs[_R10].q[0] = p->uc_mcontext.__gregs[26]; emu->regs[_R11].q[0] = p->uc_mcontext.__gregs[27]; - emu->regs[_R12].q[0] = p->uc_mcontext.__gregs[28]; - emu->regs[_R13].q[0] = p->uc_mcontext.__gregs[29]; - emu->regs[_R14].q[0] = p->uc_mcontext.__gregs[30]; - emu->regs[_R15].q[0] = p->uc_mcontext.__gregs[31]; - emu->ip.q[0] = ip; - emu->eflags.x64 = p->uc_mcontext.__gregs[8]; + emu->regs[_R12].q[0] = p->uc_mcontext.__gregs[18]; + emu->regs[_R13].q[0] = p->uc_mcontext.__gregs[19]; + emu->regs[_R14].q[0] = p->uc_mcontext.__gregs[20]; + emu->regs[_R15].q[0] = p->uc_mcontext.__gregs[21]; + emu->ip.q[0] = ip; + emu->eflags.x64 = p->uc_mcontext.__gregs[23]; #else #error Unsupported architecture #endif @@ -972,7 +972,7 @@ void my_sigactionhandler_oldcode(x64emu_t* emu, int32_t sig, int simple, siginfo if(p) { pc = (void*)p->uc_mcontext.__gregs[0]; if(db) - frame = (uintptr_t)p->uc_mcontext.__gregs[16+_SP]; + frame = (uintptr_t)p->uc_mcontext.__gregs[9]; } #else #error Unsupported architecture @@ -1075,21 +1075,21 @@ void my_sigactionhandler_oldcode(x64emu_t* emu, int32_t sig, int simple, siginfo #elif defined(RV64) if(db && p) { sigcontext->uc_mcontext.gregs[X64_RAX] = p->uc_mcontext.__gregs[16]; - sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.__gregs[17]; - sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.__gregs[18]; - sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.__gregs[19]; - sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.__gregs[20]; - sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.__gregs[21]; - sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.__gregs[22]; - sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.__gregs[23]; - sigcontext->uc_mcontext.gregs[X64_R8] = p->uc_mcontext.__gregs[24]; - sigcontext->uc_mcontext.gregs[X64_R9] = p->uc_mcontext.__gregs[25]; + sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.__gregs[13]; + sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.__gregs[12]; + sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.__gregs[24]; + sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.__gregs[9]; + sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.__gregs[8]; + sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.__gregs[11]; + sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.__gregs[10]; + sigcontext->uc_mcontext.gregs[X64_R8] = p->uc_mcontext.__gregs[14]; + sigcontext->uc_mcontext.gregs[X64_R9] = p->uc_mcontext.__gregs[15]; sigcontext->uc_mcontext.gregs[X64_R10] = p->uc_mcontext.__gregs[26]; sigcontext->uc_mcontext.gregs[X64_R11] = p->uc_mcontext.__gregs[27]; - sigcontext->uc_mcontext.gregs[X64_R12] = p->uc_mcontext.__gregs[28]; - sigcontext->uc_mcontext.gregs[X64_R13] = p->uc_mcontext.__gregs[29]; - sigcontext->uc_mcontext.gregs[X64_R14] = p->uc_mcontext.__gregs[30]; - sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.__gregs[31]; + sigcontext->uc_mcontext.gregs[X64_R12] = p->uc_mcontext.__gregs[18]; + sigcontext->uc_mcontext.gregs[X64_R13] = p->uc_mcontext.__gregs[19]; + sigcontext->uc_mcontext.gregs[X64_R14] = p->uc_mcontext.__gregs[20]; + sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.__gregs[21]; sigcontext->uc_mcontext.gregs[X64_RIP] = getX64Address(db, (uintptr_t)pc); } #else @@ -1676,12 +1676,12 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for rsp = (void*)p->uc_mcontext.__gregs[12+_SP]; } #elif defined(RV64) - if(db && p->uc_mcontext.__gregs[10]>0x10000) { - emu = (x64emu_t*)p->uc_mcontext.__gregs[10]; + if(db && p->uc_mcontext.__gregs[25]>0x10000) { + emu = (x64emu_t*)p->uc_mcontext.__gregs[25]; } if(db) { x64pc = getX64Address(db, (uintptr_t)pc); - rsp = (void*)p->uc_mcontext.__gregs[16+_SP]; + rsp = (void*)p->uc_mcontext.__gregs[9]; } #else #error Unsupported Architecture @@ -1837,7 +1837,7 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for shown_regs = 1; for (int i=0; i<16; ++i) { if(!(i%4)) printf_log(log_minimum, "\n"); - printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[16+i]); + printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[(((uint8_t[]) { 16, 13, 12, 24, 9, 8, 11, 10, 14, 15, 26, 27, 18, 19, 20, 21 })[i])]); } printf_log(log_minimum, "\n"); for (int i=0; i<6; ++i) |