diff options
| author | Leslie Zhai <zhaixiang@loongson.cn> | 2025-09-19 14:10:41 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-19 08:10:41 +0200 |
| commit | 5bfb27d3fa867c5d204d5f7e23507e36b87c82f7 (patch) | |
| tree | bcfabe4007a9e9d01a928fc79f42bdde0af83aee /src | |
| parent | 0d355aaf01e0e76968a0b9ea7e4478e9b4228948 (diff) | |
| download | box64-5bfb27d3fa867c5d204d5f7e23507e36b87c82f7.tar.gz box64-5bfb27d3fa867c5d204d5f7e23507e36b87c82f7.zip | |
[LA64_DYNAREC] Refactor register mapping (#2940)
* [LA64_DYNAREC] Refactor register mapping * [LA64_DYNAREC] Fix typo * [LA64_DYNAREC] Remapping xSavedSP to fp ($r22) * [LA64_DYNAREC] Fix VPCLMULQDQ x3 and x4 issue * [LA64_DYNAREC] Fix typo * [LA64_DYNAREC] Fix typo
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 34 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 44 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_66.c | 10 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 24 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f38.c | 10 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f3a.c | 4 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 48 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 25 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_private.h | 12 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_epilog.S | 58 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_lock.S | 6 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_mapping.h | 161 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_next.S | 66 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_prolog.S | 59 | ||||
| -rw-r--r-- | src/emu/x64emu_private.h | 2 |
17 files changed, 318 insertions, 249 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 7faaa01a..e2bc02d6 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -674,7 +674,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } GETIP(ip, x7); STORE_XEMU_CALL(); - CALL(const_native_priv, -1); + CALL(const_native_priv, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -690,7 +690,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } GETIP(ip, x7); STORE_XEMU_CALL(); - CALL(const_native_priv, -1); + CALL(const_native_priv, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -1671,7 +1671,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(const_rol8, ed, x3); + CALL_(const_rol8, ed, x3, x1, x2); EBBACK(); break; case 4: @@ -1983,7 +1983,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni STORE_XEMU_CALL(); ADDI_D(x3, xRIP, 8 + 8 + 2); // expected return address ADDI_D(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip - CALL_(const_int3, -1, x3); + CALL_(const_int3, -1, x3, x1, 0); LOAD_XEMU_CALL(); addr += 8 + 8; BNE_MARK(xRIP, x3); @@ -2002,7 +2002,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BEQZ_MARK(x3); GETIP(addr, x7); STORE_XEMU_CALL(); - CALL(const_native_int3, -1); + CALL(const_native_int3, -1, 0, 0); LOAD_XEMU_CALL(); MARK; jump_to_epilog(dyn, addr, 0, ninst); @@ -2020,7 +2020,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETIP(ip, x7); // priviledged instruction, IP not updated STORE_XEMU_CALL(); MOV32w(x1, u8); - CALL(const_native_int, -1); + CALL(const_native_int, -1, x1, 0); LOAD_XEMU_CALL(); } else if (u8 == 0x80) { INST_NAME("32bits SYSCALL"); @@ -2028,7 +2028,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMEND(); GETIP(addr, x7); STORE_XEMU_CALL(); - CALL_S(const_x86syscall, -1); + CALL_S(const_x86syscall, -1, 0); LOAD_XEMU_CALL(); TABLE64(x3, addr); // expected return address BNE_MARK(xRIP, x3); @@ -2046,7 +2046,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } GETIP(addr, x7); STORE_XEMU_CALL(); - CALL(const_native_int3, -1); + CALL(const_native_int3, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -2060,7 +2060,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } GETIP(ip, x7); // priviledged instruction, IP not updated STORE_XEMU_CALL(); - CALL(const_native_priv, -1); + CALL(const_native_priv, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -2091,7 +2091,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); - CALL_(const_rol8, ed, x3); + CALL_(const_rol8, ed, x3, x1, x2); EBBACK(); break; case 4: @@ -2173,7 +2173,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); MOV32w(x2, 1); GETEDW(x4, x1, 0); - CALL_(rex.w ? const_rcr64 : const_rcr32, ed, x4); + CALL_(rex.w ? const_rcr64 : const_rcr32, ed, x4, x1, x2); WBACK; if (!wback && !rex.w) ZEROUP(ed); break; @@ -2413,7 +2413,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETIP_(dyn->insts[ninst].natcall, x7); // read the 0xCC already STORE_XEMU_CALL(); ADDI_D(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip - CALL_S(const_int3, -1); + CALL_S(const_int3, -1, x1); LOAD_XEMU_CALL(); MOV64x(x3, dyn->insts[ninst].natcall); ADDI_D(x3, x3, 2 + 8 + 8); @@ -2545,7 +2545,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } GETIP(ip, x7); STORE_XEMU_CALL(); - CALL(const_native_priv, -1); + CALL(const_native_priv, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -2611,7 +2611,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 0); - CALL(const_div8, -1); + CALL(const_div8, -1, x1, 0); break; default: DEFAULT; @@ -2721,8 +2721,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { GETEDH(x4, x1, 0); // get edd changed addr, so cannot be called 2 times for same op... BEQ_MARK(xRDX, xZR); - if (ed != x1) { MV(x1, ed); } - CALL(const_div64, -1); + CALL(const_div64, -1, ed, 0); B_NEXT_nocond; MARK; DIV_DU(x2, xRAX, ed); @@ -2767,8 +2766,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BNE_MARK3(x2, xZR); BLT_MARK(xRAX, xZR); MARK3; - if (ed != x1) MV(x1, ed); - CALL(const_idiv64, -1); + CALL(const_idiv64, -1, ed, 0); B_NEXT_nocond; MARK; DIV_D(x2, xRAX, ed); diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index cb815e9a..6e27e59b 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -88,7 +88,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMEND(); GETIP(addr, x7); STORE_XEMU_CALL(); - CALL_S(const_x64syscall, -1); + CALL_S(const_x64syscall, -1, 0); LOAD_XEMU_CALL(); TABLE64(x3, addr); // expected return address BNE_MARK(xRIP, x3); @@ -107,7 +107,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } GETIP(ip, x7); STORE_XEMU_CALL(); - CALL(const_native_ud, -1); + CALL(const_native_ud, -1, 0, 0); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; @@ -420,7 +420,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("RDTSC"); NOTEST(x1); if (box64_rdtsc) { - CALL(const_readtsc, x3); // will return the u64 in x3 + CALL(const_readtsc, x3, 0, 0); // will return the u64 in x3 } else { RDTIME_D(x3, xZR); } @@ -620,12 +620,10 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = (nextop & 7) + (rex.b << 3); sse_reflect_reg(dyn, ninst, ed); ADDI_D(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x2; } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); - if (ed != x2) { - MV(x2, ed); - } } GETG; sse_forget_reg(dyn, ninst, gd); @@ -633,22 +631,22 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni sse_reflect_reg(dyn, ninst, 0); switch (u8) { case 0xC8: - CALL(const_sha1nexte, -1); + CALL(const_sha1nexte, -1, x1, ed); break; case 0xC9: - CALL(const_sha1msg1, -1); + CALL(const_sha1msg1, -1, x1, ed); break; case 0xCA: - CALL(const_sha1msg2, -1); + CALL(const_sha1msg2, -1, x1, ed); break; case 0xCB: - CALL(const_sha256rnds2, -1); + CALL(const_sha256rnds2, -1, x1, ed); break; case 0xCC: - CALL(const_sha256msg1, -1); + CALL(const_sha256msg1, -1, x1, ed); break; case 0xCD: - CALL(const_sha256msg2, -1); + CALL(const_sha256msg2, -1, x1, ed); break; } break; @@ -710,17 +708,17 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = (nextop & 7) + (rex.b << 3); sse_reflect_reg(dyn, ninst, ed); ADDI_D(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + wback = x2; } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, 1); - if (wback != x2) MV(x2, wback); } u8 = F8; GETG; sse_forget_reg(dyn, ninst, gd); ADDI_D(x1, xEmu, offsetof(x64emu_t, xmm[gd])); MOV32w(x3, u8); - CALL(const_sha1rnds4, -1); + CALL4(const_sha1rnds4, -1, x1, wback, x3, 0); break; default: DEFAULT; @@ -1368,8 +1366,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xA2: INST_NAME("CPUID"); NOTEST(x1); - MV(A1, xRAX); - CALL_(const_cpuid, -1, 0); + CALL_(const_cpuid, -1, 0, xRAX, 0); // BX and DX are not synchronized durring the call, so need to force the update LD_D(xRDX, xEmu, offsetof(x64emu_t, regs[_DX])); LD_D(xRBX, xEmu, offsetof(x64emu_t, regs[_BX])); @@ -1497,8 +1494,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } - CALL(rex.is32bits ? const_fpu_fxsave32 : const_fpu_fxsave64, -1); + CALL(rex.is32bits ? const_fpu_fxsave32 : const_fpu_fxsave64, -1, ed, 0); } break; case 1: @@ -1507,8 +1503,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SKIPTEST(x1); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } - CALL(rex.is32bits ? const_fpu_fxrstor32 : const_fpu_fxrstor64, -1); + CALL(rex.is32bits ? const_fpu_fxrstor32 : const_fpu_fxrstor64, -1, ed, 0); break; case 2: INST_NAME("LDMXCSR Md"); @@ -1563,25 +1558,22 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } MOV32w(x2, rex.w ? 0 : 1); - CALL(const_fpu_xsave, -1); + CALL(const_fpu_xsave, -1, ed, x2); break; case 5: INST_NAME("XRSTOR Ed"); MESSAGE(LOG_DUMP, "Need Optimization\n"); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } MOV32w(x2, rex.w ? 0 : 1); - CALL(const_fpu_xrstor, -1); + CALL(const_fpu_xrstor, -1, ed, x2); break; case 7: INST_NAME("CLFLUSH Ed"); MESSAGE(LOG_DUMP, "Need Optimization?\n"); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if (ed != x1) { MV(x1, ed); } - CALL_(const_native_clflush, -1, 0); + CALL_(const_native_clflush, -1, 0, ed, 0); break; default: DEFAULT; diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index cc29e3bc..95ef995d 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -782,7 +782,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(const_rol16, x1, x3); + CALL_(const_rol16, x1, x3, x1, x2); EWBACK; break; case 1: @@ -792,7 +792,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); - CALL_(const_ror16, x1, x3); + CALL_(const_ror16, x1, x3, x1, x2); EWBACK; break; case 4: @@ -872,7 +872,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); if (BOX64DRENV(dynarec_safeflags) > 1) MAYSETFLAGS(); GETEW(x1, 1); - CALL_(const_rol16, x1, x3); + CALL_(const_rol16, x1, x3, x1, x2); EWBACK; break; case 5: @@ -970,7 +970,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BNE_MARK3(ed, xZR); GETIP_(ip, x6); STORE_XEMU_CALL(); - CALL(const_native_div0, -1); + CALL(const_native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -991,7 +991,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BNE_MARK3(ed, xZR); GETIP_(ip, x7); STORE_XEMU_CALL(); - CALL(const_native_div0, -1); + CALL(const_native_div0, -1, 0, 0); CLEARIP(); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index ef2a1e3f..2a82cea1 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -797,7 +797,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aesimc, -1); + CALL(const_native_aesimc, -1, x1, 0); break; case 0xDC: INST_NAME("AESENC Gx, Ex"); // AES-NI @@ -811,7 +811,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int d0 = -1; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aese, -1); + CALL(const_native_aese, -1, x1, 0); GETGX(q0, 1); VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); break; @@ -827,7 +827,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int d0 = -1; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aeselast, -1); + CALL(const_native_aeselast, -1, x1, 0); GETGX(q0, 1); VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); break; @@ -843,7 +843,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int d0 = -1; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aesd, -1); + CALL(const_native_aesd, -1, x1, 0); GETGX(q0, 1); VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); break; @@ -859,7 +859,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int d0 = -1; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aesdlast, -1); + CALL(const_native_aesdlast, -1, x1, 0); GETGX(q0, 1); VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); break; @@ -1256,7 +1256,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } u8 = F8; MOV32w(x4, u8); - CALL(const_native_pclmul, -1); + CALL4(const_native_pclmul, -1, x1, x2, x3, x4); break; case 0x61: INST_NAME("PCMPESTRI Gx, Ex, Ib"); @@ -1272,15 +1272,15 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int if (ed > 7) sse_reflect_reg(dyn, ninst, ed); ADDI_D(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x1; } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x5, &fixedaddress, rex, NULL, 0, 1); - if (ed != x1) MV(x1, ed); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); } MV(x2, xRDX); MV(x4, xRAX); u8 = F8; MOV32w(x5, u8); - CALL(const_sse42_compare_string_explicit_len, x1); + CALL6(const_sse42_compare_string_explicit_len, x1, ed, x2, x3, x4, x5, 0); ZEROUP(x1); BNEZ_MARK(x1); MOV32w(xRCX, (u8 & 1) ? 8 : 16); @@ -1305,13 +1305,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ed = (nextop & 7) + (rex.b << 3); if (ed > 7) sse_reflect_reg(dyn, ninst, ed); ADDI_D(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x1; } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); - if (ed != x1) MV(x1, ed); } u8 = F8; MOV32w(x3, u8); - CALL(const_sse42_compare_string_implicit_len, x1); + CALL4(const_sse42_compare_string_implicit_len, x1, ed, x2, x3, 0); BNEZ_MARK(x1); MOV32w(xRCX, (u8 & 1) ? 8 : 16); B_NEXT_nocond; @@ -1344,7 +1344,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } u8 = F8; MOV32w(x4, u8); - CALL(const_native_aeskeygenassist, -1); + CALL4(const_native_aeskeygenassist, -1, x1, x2, x3, x4); break; default: DEFAULT; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c index d2afd48c..0a04c586 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c @@ -1278,7 +1278,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } avx_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aesimc, -1); + CALL(const_native_aesimc, -1, x1, 0); if (!vex.l) { ST_D(xZR, xEmu, offsetof(x64emu_t, ymm[gd])); ST_D(xZR, xEmu, offsetof(x64emu_t, ymm[gd]) + 8); @@ -1296,7 +1296,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i d0 = -1; avx_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aese, -1); + CALL(const_native_aese, -1, x1, 0); GETGYx(q0, 1); VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); if (!vex.l) { @@ -1316,7 +1316,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i d0 = -1; avx_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aeselast, -1); + CALL(const_native_aeselast, -1, x1, 0); GETGYx(q0, 1); VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); if (!vex.l) { @@ -1336,7 +1336,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i d0 = -1; avx_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aesd, -1); + CALL(const_native_aesd, -1, x1, 0); GETGYx(q0, 1); VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); if (!vex.l) { @@ -1356,7 +1356,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i d0 = -1; avx_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); - CALL(const_native_aesdlast, -1); + CALL(const_native_aesdlast, -1, x1, 0); GETGYx(q0, 1); VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); if (!vex.l) { diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c index c91eb1bc..691b2c1e 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c @@ -737,7 +737,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } u8 = F8; MOV32w(x4, u8); - CALL_(vex.l ? const_native_pclmul_y : const_native_pclmul_x, -1, x3); + CALL4_(vex.l ? const_native_pclmul_y : const_native_pclmul_x, -1, x3, x1, x2, x3, x4); if (!vex.l) { ST_D(xZR, xEmu, offsetof(x64emu_t, ymm[gd])); ST_D(xZR, xEmu, offsetof(x64emu_t, ymm[gd]) + 8); @@ -796,7 +796,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } u8 = F8; MOV32w(x4, u8); - CALL(const_native_aeskeygenassist, -1); + CALL4(const_native_aeskeygenassist, -1, x1, x2, x3, x4); if (!vex.l) { ST_D(xZR, xEmu, offsetof(x64emu_t, ymm[gd])); ST_D(xZR, xEmu, offsetof(x64emu_t, ymm[gd]) + 8); diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index 049803ce..d283c09f 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -597,7 +597,7 @@ void ret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex) MVz(x1, xRIP); SMEND(); if (BOX64DRENV(dynarec_callret)) { - // pop the actual return address from RV64 stack + // pop the actual return address from LA64 stack LD_D(xRA, xSP, 0); // native addr LD_D(x6, xSP, 8); // x86 addr ADDI_D(xSP, xSP, 16); // pop @@ -627,7 +627,7 @@ void retn_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int MVz(x1, xRIP); SMEND(); if (BOX64DRENV(dynarec_callret)) { - // pop the actual return address from RV64 stack + // pop the actual return address from LA64 stack LD_D(xRA, xSP, 0); // native addr LD_D(x6, xSP, 8); // x86 addr ADDI_D(xSP, xSP, 16); // pop @@ -691,7 +691,7 @@ void iret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, int is64bits) CLEARIP(); } -void call_c(dynarec_la64_t* dyn, int ninst, la64_consts_t fnc, int reg, int ret, int saveflags, int savereg) +void call_c(dynarec_la64_t* dyn, int ninst, la64_consts_t fnc, int reg, int ret, int saveflags, int savereg, int arg1, int arg2, int arg3, int arg4, int arg5, int arg6) { MAYUSE(fnc); if (savereg == 0) @@ -702,39 +702,47 @@ void call_c(dynarec_la64_t* dyn, int ninst, la64_consts_t fnc, int reg, int ret, } fpu_pushcache(dyn, ninst, reg, 0); if (ret != -2) { - ADDI_D(xSP, xSP, -16); // RV64 stack needs to be 16byte aligned - ST_D(xEmu, xSP, 0); - ST_D(savereg, xSP, 8); - // $r4..$r20 needs to be saved by caller - STORE_REG(RAX); - STORE_REG(RCX); + ADDI_D(xSP, xSP, -16); // LA64 stack needs to be 16byte aligned + ST_D(savereg, xSP, 0); + STORE_REG(RDI); + STORE_REG(RSI); STORE_REG(RDX); + STORE_REG(RCX); + STORE_REG(R8); + STORE_REG(R9); + STORE_REG(RAX); STORE_REG(RBX); STORE_REG(RSP); STORE_REG(RBP); - STORE_REG(RSI); - STORE_REG(RDI); ST_D(xRIP, xEmu, offsetof(x64emu_t, ip)); } TABLE64C(reg, fnc); + MV(A0, xEmu); + if (arg1) MV(A1, arg1); + if (arg2) MV(A2, arg2); + if (arg3) MV(A3, arg3); + if (arg4) MV(A4, arg4); + if (arg5) MV(A5, arg5); + if (arg6) MV(A6, arg6); JIRL(xRA, reg, 0); if (ret >= 0) { - MV(ret, xEmu); + MV(ret, A0); } if (ret != -2) { - LD_D(xEmu, xSP, 0); - LD_D(savereg, xSP, 8); + LD_D(savereg, xSP, 0); ADDI_D(xSP, xSP, 16); #define GO(A) \ if (ret != x##A) { LOAD_REG(A); } - GO(RAX); - GO(RCX); + GO(RDI); + GO(RSI); GO(RDX); + GO(RCX); + GO(R8); + GO(R9); + GO(RAX); GO(RBX); GO(RSP); GO(RBP); - GO(RSI); - GO(RDI); if (ret != xRIP) LD_D(xRIP, xEmu, offsetof(x64emu_t, ip)); #undef GO @@ -770,7 +778,7 @@ void grab_segdata(dynarec_la64_t* dyn, uintptr_t addr, int ninst, int reg, int s CBZ_MARKSEG(t1); } MOV64x(x1, segment); - call_c(dyn, ninst, const_getsegmentbase, t2, reg, 0, xFlags); + call_c(dyn, ninst, const_getsegmentbase, t2, reg, 0, xFlags, x1, 0, 0, 0, 0, 0); MARKSEG; MESSAGE(LOG_DUMP, "----%s Offset\n", (segment == _FS) ? "FS" : "GS"); } @@ -1689,7 +1697,7 @@ static void flagsCacheTransform(dynarec_la64_t* dyn, int ninst, int s1) j64 = (GETMARKF2) - (dyn->native_size); BEQZ(s1, j64); } - CALL_(const_updateflags, -1, 0); + CALL_(const_updateflags, -1, 0, 0, 0); MARKF2; } } diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index ba375b7f..740c6909 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -674,13 +674,16 @@ // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0) +#define CALL(F, ret, arg1, arg2) call_c(dyn, ninst, F, x6, ret, 1, 0, arg1, arg2, 0, 0, 0, 0) +#define CALL4(F, ret, arg1, arg2, arg3, arg4) call_c(dyn, ninst, F, x6, ret, 1, 0, arg1, arg2, arg3, arg4, 0, 0) +#define CALL6(F, ret, arg1, arg2, arg3, arg4, arg5, arg6) call_c(dyn, ninst, F, x6, ret, 1, 0, arg1, arg2, arg3, arg4, arg5, arg6) // CALL_ will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x6, ret, 1, reg) +#define CALL_(F, ret, reg, arg1, arg2) call_c(dyn, ninst, F, x6, ret, 1, reg, arg1, arg2, 0, 0, 0, 0) +#define CALL4_(F, ret, reg, arg1, arg2, arg3, arg4) call_c(dyn, ninst, F, x6, ret, 1, reg, arg1, arg2, arg3, arg4, 0, 0) // CALL_S will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2. Flags are not save/restored -#define CALL_S(F, ret) call_c(dyn, ninst, F, x6, ret, 0, 0) +#define CALL_S(F, ret, arg1) call_c(dyn, ninst, F, x6, ret, 0, 0, arg1, 0, 0, 0, 0, 0) #define MARKi(i) dyn->insts[ninst].mark[i] = dyn->native_size #define GETMARKi(i) dyn->insts[ninst].mark[i] @@ -848,8 +851,9 @@ // Need to also store current value of some register, as they may be used by functions like setjmp #define STORE_XEMU_CALL() \ - STORE_REG(R8); \ - STORE_REG(R9); \ + STORE_REG(RBX); \ + STORE_REG(RSP); \ + STORE_REG(RBP); \ STORE_REG(R10); \ STORE_REG(R11); \ STORE_REG(R12); \ @@ -860,8 +864,9 @@ #define LOAD_XEMU_CALL() #define LOAD_XEMU_REM() \ - LOAD_REG(R8); \ - LOAD_REG(R9); \ + LOAD_REG(RBX); \ + LOAD_REG(RSP); \ + LOAD_REG(RBP); \ LOAD_REG(R10); \ LOAD_REG(R11); \ LOAD_REG(R12); \ @@ -888,7 +893,7 @@ if (dyn->f.pending == SF_PENDING \ && dyn->insts[ninst].x64.need_after \ && !(dyn->insts[ninst].x64.need_after & X_PEND)) { \ - CALL_(const_updateflags, -1, 0); \ + CALL_(const_updateflags, -1, 0, 0, 0); \ dyn->f.pending = SF_SET; \ SET_NODF(); \ } \ @@ -958,7 +963,7 @@ j64 = (GETMARKF) - (dyn->native_size); \ BEQ(x3, xZR, j64); \ } \ - CALL_(const_updateflags, -1, 0); \ + CALL_(const_updateflags, -1, 0, 0, 0); \ MARKF; \ dyn->f.pending = SF_SET; \ SET_DFOK(); \ @@ -1272,7 +1277,7 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 void ret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex); void retn_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int n); void iret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, int is64bits); -void call_c(dynarec_la64_t* dyn, int ninst, la64_consts_t fnc, int reg, int ret, int saveflags, int save_reg); +void call_c(dynarec_la64_t* dyn, int ninst, la64_consts_t fnc, int reg, int ret, int saveflags, int save_reg, int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); void grab_segdata(dynarec_la64_t* dyn, uintptr_t addr, int ninst, int reg, int segment, int modreg); void emit_cmp8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6); void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6); diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index 00f19bc6..120fc14e 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -179,12 +179,12 @@ int Table64(dynarec_la64_t *dyn, uint64_t val, int pass); // add a value to tab void CreateJmpNext(void* addr, void* next); -#define GO_TRACE(A, B, s0) \ - GETIP(addr, s0); \ - MV(A1, xRIP); \ - STORE_XEMU_CALL(); \ - MOV64x(A2, B); \ - CALL(const_##A, -1); \ +#define GO_TRACE(A, B, s0) \ + GETIP(addr, s0); \ + MV(x1, xRIP); \ + STORE_XEMU_CALL(); \ + MOV64x(x2, B); \ + CALL(const_##A, -1, x1, x2); \ LOAD_XEMU_CALL() #endif //__DYNAREC_ARM_PRIVATE_H_ diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 466180f7..7a00c0cc 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -131,6 +131,8 @@ #define DMB_ISHLD() DBAR_R_RW() #define DMB_ISHST() DBAR_W_RW() +#define BRK(hint) EMIT(type_hint(0b00000000001010100, hint)) + // GR[rd] = GR[rj] & GR[rk] #define AND(rd, rj, rk) EMIT(type_3R(0b00000000000101001, rk, rj, rd)) // GR[rd] = GR[rj] | GR[rk] diff --git a/src/dynarec/la64/la64_epilog.S b/src/dynarec/la64/la64_epilog.S index ab6a80d6..701b8898 100644 --- a/src/dynarec/la64/la64_epilog.S +++ b/src/dynarec/la64/la64_epilog.S @@ -1,3 +1,7 @@ +#define ASM_MAPPING 1 +#include "la64_mapping.h" +#undef ASM_MAPPING + //la64 epilog for dynarec //Save stuff, prepare stack and register //called with pointer to emu as 1st parameter @@ -11,37 +15,37 @@ la64_epilog: // update register -> emu - st.d $r12, $r4, (8 * 0) - st.d $r13, $r4, (8 * 1) - st.d $r14, $r4, (8 * 2) - st.d $r15, $r4, (8 * 3) - st.d $r16, $r4, (8 * 4) - st.d $r17, $r4, (8 * 5) - st.d $r18, $r4, (8 * 6) - st.d $r19, $r4, (8 * 7) - st.d $r23, $r4, (8 * 8) - st.d $r24, $r4, (8 * 9) - st.d $r25, $r4, (8 * 10) - st.d $r26, $r4, (8 * 11) - st.d $r27, $r4, (8 * 12) - st.d $r28, $r4, (8 * 13) - st.d $r29, $r4, (8 * 14) - st.d $r30, $r4, (8 * 15) + st.d RAX, Emu, (8 * 0) + st.d RCX, Emu, (8 * 1) + st.d RDX, Emu, (8 * 2) + st.d RBX, Emu, (8 * 3) + st.d RSP, Emu, (8 * 4) + st.d RBP, Emu, (8 * 5) + st.d RSI, Emu, (8 * 6) + st.d RDI, Emu, (8 * 7) + st.d R8, Emu, (8 * 8) + st.d R9, Emu, (8 * 9) + st.d R10, Emu, (8 * 10) + st.d R11, Emu, (8 * 11) + st.d R12, Emu, (8 * 12) + st.d R13, Emu, (8 * 13) + st.d R14, Emu, (8 * 14) + st.d R15, Emu, (8 * 15) // restore xFlags from LBT.eflags - la.global $r12, cpuext - ldptr.d $r12, $r12, 0 - andi $r12, $r12, 1 - beqz $r12, 1f - ori $r13, $r0, 0b100011010101 - andn $r31, $r31, $r13 - x86mfflag $r13, 0b111111 - or $r31, $r31, $r13 + la.global $r19, cpuext + ldptr.d $r19, $r19, 0 + andi $r19, $r19, 1 + beqz $r19, 1f + ori $r19, $r0, 0b100011010101 + andn Flags, Flags, $r19 + x86mfflag $r19, 0b111111 + or Flags, Flags, $r19 1: - st.d $r31, $r4, (8 * 16) // xFlags - st.d $r20, $r4, (8 * 17) // put back reg value in emu, including EIP (so $r20 must be EIP now) + st.d Flags, Emu, (8 * 16) // xFlags + st.d RIP, Emu, (8 * 17) // put back reg value in emu, including EIP (so $r29 must be EIP now) // fallback to epilog_fast now, just restoring saved regs la64_epilog_fast: - addi.d $sp, $r22, 0 // restore save sp from xSavedSP + addi.d $sp, SavedSP, 0 // restore save sp from xSavedSP // restore all used register ld.d $r1, $sp, (8 * 0) // load ra ld.d $r22, $sp, (8 * 1) // load fp diff --git a/src/dynarec/la64/la64_lock.S b/src/dynarec/la64/la64_lock.S index df7cfd83..fe025261 100644 --- a/src/dynarec/la64/la64_lock.S +++ b/src/dynarec/la64/la64_lock.S @@ -1,3 +1,7 @@ +#define ASM_MAPPING 1 +#include "la64_mapping.h" +#undef ASM_MAPPING + // LA64 lock helper // there is 2 part: read and write // write return 0 on success, 1 on fail (value has been changed) @@ -226,4 +230,4 @@ la64_lock_get_d: la64_lock_get_dd: dbar 0 ld.d $a0, $a0, 0 - ret \ No newline at end of file + ret diff --git a/src/dynarec/la64/la64_mapping.h b/src/dynarec/la64/la64_mapping.h index c373fe0c..3bb6c1d4 100644 --- a/src/dynarec/la64/la64_mapping.h +++ b/src/dynarec/la64/la64_mapping.h @@ -1,76 +1,87 @@ #ifndef __LA64_MAPPING_H__ #define __LA64_MAPPING_H__ +// LA64 Register Mapping Scheme +/***************************************************************************************** +name alias mapping native description Box64 description saver +****************************************************************************************** +r0 zero native zero Hard-wired zero N/A - +r1 ra native ra Return address N/A Caller +r2 tp - Thread pointer N/A - +r3 sp native sp Stack pointer N/A Callee +r4 a0 RDI Function argument/return val. - Caller +r5 a1 RSI Function argument/return val. - Caller +r6 a2 RDX Function argument - Caller +r7 a3 RCX Function argument - Caller +r8 a4 R8 Function argument - Caller +r9 a5 R9 Function argument - Caller +r10 a6 RBX Function argument - Caller +r11 a7 RSP Function argument - Caller +r12 t0 RAX Temporary - Caller +r13 t1 RBP Temporary - Caller +r14 t2 x1 Temporary Scratch Caller +r15 t3 x2 Temporary Scratch Caller +r16 t4 x3 Temporary Scratch Caller +r17 t5 x4 Temporary Scratch Caller +r18 t6 x5 Temporary Scratch Caller +r19 t7 x6 Temporary Scratch Caller +r20 t8 x7 Temporary Scratch Caller +r21 rx - Reserved N/A - +r22 fp SavedSP Saved register/frame pointer - Callee +r23 s0 R10 Saved register - Callee +r24 s1 R11 Saved register - Callee +r25 s2 R12 Saved register - Callee +r26 s3 R13 Saved register - Callee +r27 s4 R14 Saved register - Callee +r28 s5 R15 Saved register - Callee +r29 s6 RIP Saved register - Callee +r30 s7 FLAGS Saved register - Callee +r31 s8 xEmu Saved register The Emu struct Callee +******************************************************************************************/ + +#ifndef ASM_MAPPING -// LA64 ABI -/* -Name Alias Meaning saver ---------------------------------------------------------- -r0 zero Zero register - -r1 ra Return address Callee -r2 tp Thread pointer - -r3 sp Stack pointer Callee -r4-r5 a0-a1 Function arguments,Return val. Caller -r6-r11 a2-a7 Function arguments Caller -r12-r20 t0-t8 Temp registers Caller -r21 Reserved Non-allocatable - -r22 fp/s9 Frame pointer/Static register Callee -r23-31 s0-s8 Static registers Callee ---------------------------------------------------------- -f0-f1 fa0-fa1 Function arguments,Return val. Caller -f2-f7 fa2-fa7 Function arguments Caller -f8-f23 ft0-ft15 Temp registers Caller -f24-f31 fs0-fs7 Static registers Callee -*/ -/* - LA64 GPR mapping - There is no 15 registers free, so split the regs in 2 part - AX..DI : r12-r19 - R8..R15: r23-r30 - flags in r31 - ip in r20 -*/ // x86 Register mapping #define xRAX 12 -#define xRCX 13 -#define xRDX 14 -#define xRBX 15 -#define xRSP 16 -#define xRBP 17 -#define xRSI 18 -#define xRDI 19 -#define xR8 23 -#define xR9 24 -#define xR10 25 -#define xR11 26 -#define xR12 27 -#define xR13 28 -#define xR14 29 -#define xR15 30 -#define xFlags 31 -#define xRIP 20 +#define xRCX 7 +#define xRDX 6 +#define xRBX 10 +#define xRSP 11 +#define xRBP 13 +#define xRSI 5 +#define xRDI 4 +#define xR8 8 +#define xR9 9 +#define xR10 23 +#define xR11 24 +#define xR12 25 +#define xR13 26 +#define xR14 27 +#define xR15 28 +#define xFlags 30 +#define xRIP 29 #define xSavedSP 22 // convert a x86 register to native according to the register mapping -#define TO_NAT(A) (xRAX + (A) + (((A) > 7) ? 3 : 0)) +#define TO_NAT(A) (((uint8_t[]) { 12, 7, 6, 10, 11, 13, 5, 4, 8, 9, 23, 24, 25, 26, 27, 28 })[(A)]) // scratch registers -#define x1 5 -#define x2 6 -#define x3 7 -#define x4 8 -#define x5 9 -#define x6 10 -#define x7 11 - -// emu is r0 -#define xEmu 4 +#define x1 14 +#define x2 15 +#define x3 16 +#define x4 17 +#define x5 18 +#define x6 19 +#define x7 20 + +// emu is $r31 +#define xEmu 31 // LA64 RA #define xRA 1 #define ra xRA // LA64 SP #define xSP 3 -// RV64 args +// LA64 args #define A0 4 #define A1 5 #define A2 6 @@ -127,4 +138,38 @@ f24-f31 fs0-fs7 Static registers Callee #define FR_U 25 #define FR_I 24 -#endif //__LA64_MAPPING_H__ \ No newline at end of file +#else + +// x86 Register mapping +#define RAX $r12 +#define RCX $r7 +#define RDX $r6 +#define RBX $r10 +#define RSP $r11 +#define RBP $r13 +#define RSI $r5 +#define RDI $r4 +#define R8 $r8 +#define R9 $r9 +#define R10 $r23 +#define R11 $r24 +#define R12 $r25 +#define R13 $r26 +#define R14 $r27 +#define R15 $r28 +#define Flags $r30 +#define RIP $r29 +#define Emu $r31 +#define SavedSP $r22 + +#ifdef LA64_ABI_1 + +.macro ret + jr $ra +.endm + +#endif // LA64_ABI_1 + +#endif // ASM_MAPPING + +#endif //__LA64_MAPPING_H__ diff --git a/src/dynarec/la64/la64_next.S b/src/dynarec/la64/la64_next.S index f98558b0..fd33ad00 100644 --- a/src/dynarec/la64/la64_next.S +++ b/src/dynarec/la64/la64_next.S @@ -1,3 +1,7 @@ +#define ASM_MAPPING 1 +#include "la64_mapping.h" +#undef ASM_MAPPING + //la64 update linker table for dynarec //called with pointer to emu as 1st parameter //and address of table to as 2nd parameter @@ -12,41 +16,41 @@ .8byte 0 // NULL pointer before la64_next, for getDB la64_next: - // emu is a0 - // IP address is a1 - addi.d $sp, $sp, -(8 * 12) - st.d $a0, $sp, 0 - st.d $a1, $sp, 8 - st.d $r11, $sp, 16 - st.d $r12, $sp, 24 - st.d $r13, $sp, 32 - st.d $r14, $sp, 40 - st.d $r15, $sp, 48 - st.d $r16, $sp, 56 - st.d $r17, $sp, 64 - st.d $r18, $sp, 72 - st.d $r19, $sp, 80 - st.d $r20, $sp, 88 // also save r20(rip) to allow change in LinkNext + // move emu to a0 + // move IP address to a1 + addi.d $sp, $sp, -(8 * 11) + st.d RDI, $sp, 0 + st.d RSI, $sp, 8 + st.d RDX, $sp, 16 + st.d RCX, $sp, 24 + st.d R8, $sp, 32 + st.d R9, $sp, 40 + st.d RAX, $sp, 48 + st.d RBX, $sp, 56 + st.d RSP, $sp, 64 + st.d RBP, $sp, 72 + st.d RIP, $sp, 80 // also save r29(rip) to allow change in LinkNext + move $a0, Emu + move $a1, RIP move $a2, $ra // "from" is in ra, so put in a2 - addi.d $a3, $sp, 88 // a3 is address to change rip + addi.d $a3, $sp, 80 // a3 is address to change rip // call the function bl LinkNext // preserve return value - move $a3, $a0 + move $r16, $a0 // pop regs - ld.d $a0, $sp, 0 - ld.d $a1, $sp, 8 - ld.d $r11, $sp, 16 - ld.d $r12, $sp, 24 - ld.d $r13, $sp, 32 - ld.d $r14, $sp, 40 - ld.d $r15, $sp, 48 - ld.d $r16, $sp, 56 - ld.d $r17, $sp, 64 - ld.d $r18, $sp, 72 - ld.d $r19, $sp, 80 - ld.d $r20, $sp, 88 - addi.d $sp, $sp, (8 * 12) + ld.d RDI, $sp, 0 + ld.d RSI, $sp, 8 + ld.d RDX, $sp, 16 + ld.d RCX, $sp, 24 + ld.d R8, $sp, 32 + ld.d R9, $sp, 40 + ld.d RAX, $sp, 48 + ld.d RBX, $sp, 56 + ld.d RSP, $sp, 64 + ld.d RBP, $sp, 72 + ld.d RIP, $sp, 80 + addi.d $sp, $sp, (8 * 11) // return offset is jump address - jr $a3 \ No newline at end of file + jr $r16 diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 3b984d32..ba86aacf 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -6,7 +6,7 @@ #include "la64_printer.h" #include "debug.h" -static const char* Xt[] = { "xZR", "r1", "r2", "sp", "xEmu", "x1_r5", "x2_r6", "x3_r7", "x4_r8", "x5_r9", "x6_r10", "x7_r11", "xRAX_r12", "xRCX_r13", "xRDX_r14", "xRBX_r15", "xRSP_r16", "xRBP_r17", "xRSI_r18", "xRDI_r19", "xRIP_r20", "r21", "r22", "xR8_r23", "xR9_r24", "xR10_r25", "xR11_r26", "xR12_r27", "xR13_r28", "xR14_r29", "xR15_r30", "xFlags_r31" }; +static const char* Xt[] = { "xZR", "r1", "r2", "sp", "xRDI_r4", "xRSI_r5", "xRDX_r6", "xRCX_r7", "xR8_r8", "xR9_r9", "xRBX_r10", "xRSP_r11", "xRAX_r12", "xRBP_r13", "x1_r14", "x2_r15", "x3_r16", "x4_r17", "x5_r18", "x6_r19", "x7_r20", "r21", "xSavedSP_r22", "xR10_r23", "xR11_r24", "xR12_r25", "xR13_r26", "xR14_r27", "xR15_r28", "xRIP_r29", "xFlags_r30", "xEmu_r31" }; static const char* Ft[] = { "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11", "ft12", "ft13", "ft14", "ft15", "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7" }; static const char* Vt[] = { "vra0", "vra1", "vra2", "vra3", "vra4", "vra5", "vra6", "vra7", "vrt0", "vrt1", "vrt2", "vrt3", "vrt4", "vrt5", "vrt6", "vrt7", "vrt8", "vrt9", "vrt10", "vrt11", "vrt12", "vrt13", "vrt14", "vrt15", "vrs0", "vrs1", "vrs2", "vrs3", "vrs4", "vrs5", "vrs6", "vrs7" }; static const char* XVt[] = { "xvra0", "xvra1", "xvra2", "xvra3", "xvra4", "xvra5", "xvra6", "xvra7", "xvrt0", "xvrt1", "xvrt2", "xvrt3", "xvrt4", "xvrt5", "xvrt6", "xvrt7", "xvrt8", "xvrt9", "xvrt10", "xvrt11", "xvrt12", "xvrt13", "xvrt14", "xvrt15", "xvrs0", "xvrs1", "xvrs2", "xvrs3", "xvrs4", "xvrs5", "xvrs6", "xvrs7" }; diff --git a/src/dynarec/la64/la64_prolog.S b/src/dynarec/la64/la64_prolog.S index fc6bf34c..2cad9457 100644 --- a/src/dynarec/la64/la64_prolog.S +++ b/src/dynarec/la64/la64_prolog.S @@ -1,3 +1,7 @@ +#define ASM_MAPPING 1 +#include "la64_mapping.h" +#undef ASM_MAPPING + //loongarch prologue for dynarec //Save stuff, prepare stack and register //called with pointer to emu as 1st parameter @@ -33,37 +37,40 @@ la64_prolog: fst.d $f29, $sp, (8 * 16) fst.d $f30, $sp, (8 * 17) fst.d $f31, $sp, (8 * 18) + // save a1 + move $r16, $a1 // setup emu -> register - ld.d $r12, $r4, (8 * 0) - ld.d $r13, $r4, (8 * 1) - ld.d $r14, $r4, (8 * 2) - ld.d $r15, $r4, (8 * 3) - ld.d $r16, $r4, (8 * 4) - ld.d $r17, $r4, (8 * 5) - ld.d $r18, $r4, (8 * 6) - ld.d $r19, $r4, (8 * 7) - ld.d $r23, $r4, (8 * 8) - ld.d $r24, $r4, (8 * 9) - ld.d $r25, $r4, (8 * 10) - ld.d $r26, $r4, (8 * 11) - ld.d $r27, $r4, (8 * 12) - ld.d $r28, $r4, (8 * 13) - ld.d $r29, $r4, (8 * 14) - ld.d $r30, $r4, (8 * 15) - ld.d $r31, $r4, (8 * 16) // xFlags - ld.d $r20, $r4, (8 * 17) // xRIP + move Emu, $a0 + ld.d RAX, Emu, (8 * 0) + ld.d RCX, Emu, (8 * 1) + ld.d RDX, Emu, (8 * 2) + ld.d RBX, Emu, (8 * 3) + ld.d RSP, Emu, (8 * 4) + ld.d RBP, Emu, (8 * 5) + ld.d RSI, Emu, (8 * 6) + ld.d RDI, Emu, (8 * 7) + ld.d R8, Emu, (8 * 8) + ld.d R9, Emu, (8 * 9) + ld.d R10, Emu, (8 * 10) + ld.d R11, Emu, (8 * 11) + ld.d R12, Emu, (8 * 12) + ld.d R13, Emu, (8 * 13) + ld.d R14, Emu, (8 * 14) + ld.d R15, Emu, (8 * 15) + ld.d Flags, Emu, (8 * 16) // xFlags + ld.d RIP, Emu, (8 * 17) // xRIP // spill xFlags to LBT.eflags - la.global $a6, cpuext - ldptr.d $a6, $a6, 0 - andi $a6, $a6, 1 - beqz $a6, 1f - x86mtflag $r31, 0b111111 + la.global $r19, cpuext + ldptr.d $r19, $r19, 0 + andi $r19, $r19, 1 + beqz $r19, 1f + x86mtflag Flags, 0b111111 1: // push sentinel onto the stack - st.d $r0, $sp, -16 + st.d $r0, $sp, -16 st.d $r0, $sp, -8 addi.d $sp, $sp, -16 // save old sp into xSavedSP - addi.d $r22, $sp, 16 + addi.d SavedSP, $sp, 16 //jump to function - jirl $r0, $a1, 0 + jirl $r0, $r16, 0 diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index 15e85fa4..aba2b3ed 100644 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -71,7 +71,7 @@ typedef struct x64emu_s { x87control_t cw; uint16_t dummy_cw; // align... mmxcontrol_t mxcsr; - #ifdef RV64 // it would be better to use a dedicated register for this like arm64 xSavedSP, but we're running of of free registers. + #ifdef RV64 // it would be better to use a dedicated register for this like arm64 xSavedSP, but we're running out of free registers. uintptr_t xSPSave; // sp base value of current dynarec frame, used by call/ret optimization to reset stack when unmatch. #endif fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst |