diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-03-04 04:41:31 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-03 21:41:31 +0100 |
| commit | d0edf2e8ddd264067d6c635fcd042ac9eadc4c78 (patch) | |
| tree | 5a7267dcab63f87f96ae3e1e1999d48cff2072d1 | |
| parent | a159cb5efeb8fe6b848692f86977fda27aa0703b (diff) | |
| download | box64-d0edf2e8ddd264067d6c635fcd042ac9eadc4c78.tar.gz box64-d0edf2e8ddd264067d6c635fcd042ac9eadc4c78.zip | |
[LA64_DYNAREC] Added more opcodes and more fixes, SuperHexagon is working (#1319)
* Made printer slightly better * [SIGNAL] Fixed a typo * Removed useless comma * Another typo * Keep'em in order * Added more opcodes and more fixes * A big fix of GOCOND macro * [LA64_DYNAREC] Added E8 CALL opcode * [LA64_DYNAREC] Added C6 MOV opcode * [LA64_DYNAREC] Added 39 CMP opcode * [LA64_DYNAREC] Added 0F 1F NOP opcode * BEWARE: shift instructions are NOT sign-extended! * More fixes * Try to be safe * Fixed printer * Fixed emitter & printer for B, briing superhexagon back
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 310 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 5 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_math.c | 6 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_tests.c | 181 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 45 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 55 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_next.S | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 16 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 2 | ||||
| -rw-r--r-- | src/libtools/signals.c | 8 |
11 files changed, 535 insertions, 97 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 351ec85c..d4b79011 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -94,7 +94,8 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; ANDI(x1, xRAX, 0xff); emit_add8c(dyn, ninst, x1, u8, x3, x4, x5); - ANDI(xRAX, xRAX, ~0xff); + ADDI_W(x3, xZR, 0xf00); + AND(xRAX, xRAX, x3); OR(xRAX, xRAX, x1); break; case 0x05: @@ -156,7 +157,8 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; ANDI(x1, xRAX, 0xff); emit_sub8c(dyn, ninst, x1, u8, x2, x3, x4, x5); - ANDI(xRAX, xRAX, ~0xff); + ADDI_W(x3, xZR, 0xf00); + AND(xRAX, xRAX, x3); OR(xRAX, xRAX, x1); break; case 0x2D: @@ -165,6 +167,24 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; emit_sub32c(dyn, ninst, rex, xRAX, i64, x2, x3, x4, x5); break; + case 0x39: + INST_NAME("CMP Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_cmp32(dyn, ninst, rex, ed, gd, x3, x4, x5, x6); + break; + case 0x3D: + INST_NAME("CMP EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + if(i64) { + MOV64xw(x2, i64); + emit_cmp32(dyn, ninst, rex, xRAX, x2, x3, x4, x5, x6); + } else + emit_cmp32_0(dyn, ninst, rex, xRAX, x3, x4); + break; case 0x50: case 0x51: case 0x52: @@ -193,6 +213,66 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x66: addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; + + #define GO(GETFLAGS, NO, YES, F, I) \ + READFLAGS(F); \ + i8 = F8S; \ + BARRIER(BARRIER_MAYBE); \ + JUMP(addr + i8, 1); \ + if (la64_lbt && (opcode - 0x70) >= 0xC) { \ + X64_SET_EFLAGS(xFlags, F); \ + X64_SETJ(x1, I); \ + } else { \ + GETFLAGS; \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ + /* out of block */ \ + i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ + if (la64_lbt && (opcode - 0x70) >= 0xC) \ + BEQZ_safe(x1, i32); \ + else \ + B##NO##_safe(x1, i32); \ + if (dyn->insts[ninst].x64.jmp_insts == -1) { \ + if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ + fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ + jump_to_next(dyn, addr + i8, 0, ninst, rex.is32bits); \ + } else { \ + CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + B(i32); \ + } \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + if (la64_lbt && (opcode - 0x70) >= 0xC) \ + BNEZ_safe(x1, i32); \ + else \ + B##YES##_safe(x1, i32); \ + } + + GOCOND(0x70, "J", "ib"); + + #undef GO + + case 0x80: + nextop = F8; + switch((nextop>>3)&7) { + case 7: // CMP + INST_NAME("CMP Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + if(u8) { + ADDI_D(x2, xZR, u8); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5, x6); + } else { + emit_cmp8_0(dyn, ninst, x1, x3, x4); + } + break; + default: + DEFAULT; + } + break; case 0x81: case 0x83: nextop = F8; @@ -224,51 +304,30 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6); WBACK; break; + case 7: // CMP + if (opcode == 0x81) { + INST_NAME("CMP Ed, Id"); + } else { + INST_NAME("CMP Ed, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode == 0x81) ? 4 : 1); + if (opcode == 0x81) i64 = F32S; else i64 = F8S; + if (i64) { + MOV64xw(x2, i64); + emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5, x6); + } else { + if (!rex.w && MODREG) { + AND(x1, ed, xMASK); + ed = x1; + } + emit_cmp32_0(dyn, ninst, rex, ed, x3, x4); + } + break; default: DEFAULT; } break; - - #define GO(GETFLAGS, NO, YES, F, I) \ - READFLAGS(F); \ - i8 = F8S; \ - BARRIER(BARRIER_MAYBE); \ - JUMP(addr + i8, 1); \ - if (la64_lbt && I >= 0xC) { \ - X64_SET_EFLAGS(xFlags, F); \ - X64_SETJ(x1, I); \ - } else { \ - GETFLAGS; \ - } \ - if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ - /* out of block */ \ - i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ - if (la64_lbt && I >= 0xC) \ - BEQZ(x1, i32); \ - else \ - B##NO(x1, i32); \ - if (dyn->insts[ninst].x64.jmp_insts == -1) { \ - if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ - fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ - jump_to_next(dyn, addr + i8, 0, ninst, rex.is32bits); \ - } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - B(i32); \ - } \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - if (la64_lbt && I >= 0xC) \ - BNEZ(x1, i32); \ - else \ - B##YES(x1, i32); \ - } - - GOCOND(0x70, "J", "ib"); - - #undef GO - case 0x85: INST_NAME("TEST Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -371,6 +430,51 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni *need_epilog = 0; *ok = 0; break; + case 0xC6: + INST_NAME("MOV Eb, Ib"); + nextop = F8; + if (MODREG) { // reg <= u8 + u8 = F8; + if (!rex.rex) { + ed = (nextop & 7); + eb1 = TO_LA64((ed & 3)); // Ax, Cx, Dx or Bx + eb2 = (ed & 4) >> 2; // L or H + } else { + eb1 = TO_LA64((nextop & 7) + (rex.b << 3)); + eb2 = 0; + } + if (eb2) { + // load a mask to x3 (ffffffffffff00ff) + LU12I_W(x3, 0xffff0); + ORI(x3, x3, 0xff); + // apply mask + AND(eb1, eb1, x3); + if (u8) { + if ((u8 << 8) < 2048) { + ADDI_D(x4, xZR, u8 << 8); + } else { + ADDI_D(x4, xZR, u8); + SLLI_D(x4, x4, 8); + } + OR(eb1, eb1, x4); + } + } else { + ADDI_W(x3, xZR, 0xf00); // mask ffffffffffffff00 + AND(eb1, eb1, x3); + ORI(eb1, eb1, u8); + } + } else { // mem <= u8 + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 0, 1); + u8 = F8; + if (u8) { + ADDI_D(x3, xZR, u8); + ed = x3; + } else + ed = xZR; + ST_B(ed, wback, fixedaddress); + SMWRITELOCK(lock); + } + break; case 0xC7: INST_NAME("MOV Ed, Id"); nextop = F8; @@ -461,6 +565,126 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0xE8: + INST_NAME("CALL Id"); + i32 = F32S; + if (addr + i32 == 0) { + #if STEP == 3 + printf_log(LOG_INFO, "Warning, CALL to 0x0 at %p (%p)\n", (void*)addr, (void*)(addr - 1)); + #endif + } + #if STEP < 2 + if (!rex.is32bits && isNativeCall(dyn, addr + i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn)) + tmp = dyn->insts[ninst].pass2choice = 3; + else + tmp = dyn->insts[ninst].pass2choice = 0; + #else + tmp = dyn->insts[ninst].pass2choice; + #endif + switch (tmp) { + case 3: + SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state + SKIPTEST(x1); + BARRIER(BARRIER_FULL); + // BARRIER_NEXT(BARRIER_FULL); + if (dyn->last_ip && (addr - dyn->last_ip < 0x1000)) { + ADDI_D(x2, xRIP, addr - dyn->last_ip); + } else { + TABLE64(x2, addr); + } + PUSH1(x2); + MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall - 1)), dyn->insts[ninst].retn); + // calling a native function + sse_purge07cache(dyn, ninst, x3); + if ((box64_log < 2 && !cycle_log) && dyn->insts[ninst].natcall) { + // FIXME: Add basic support for isSimpleWrapper + tmp = 0; // isSimpleWrapper(*(wrapper_t*)(dyn->insts[ninst].natcall + 2)); + } else + tmp = 0; + if (tmp < 0 || tmp > 1) + tmp = 0; // TODO: removed when FP is in place + // FIXME: if (dyn->insts[ninst].natcall && isRetX87Wrapper(*(wrapper_t*)(dyn->insts[ninst].natcall + 2))) + // // return value will be on the stack, so the stack depth needs to be updated + // x87_purgecache(dyn, ninst, 0, x3, x1, x4); + if ((box64_log < 2 && !cycle_log) && dyn->insts[ninst].natcall && tmp) { + // GETIP(ip+3+8+8); // read the 0xCC + // FIXME: call_n(dyn, ninst, *(void**)(dyn->insts[ninst].natcall + 2 + 8), tmp); + POP1(xRIP); // pop the return address + dyn->last_ip = addr; + } else { + GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already + STORE_XEMU_CALL(); + ADDI_D(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip + CALL_S(x64Int3, -1); + LOAD_XEMU_CALL(); + TABLE64(x3, dyn->insts[ninst].natcall); + ADDI_D(x3, x3, 2 + 8 + 8); + BNE_MARK(xRIP, x3); // Not the expected address, exit dynarec block + POP1(xRIP); // pop the return address + if (dyn->insts[ninst].retn) { + if (dyn->insts[ninst].retn < 0x1000) { + ADDI_D(xRSP, xRSP, dyn->insts[ninst].retn); + } else { + MOV64x(x3, dyn->insts[ninst].retn); + ADD_D(xRSP, xRSP, x3); + } + } + TABLE64(x3, addr); + BNE_MARK(xRIP, x3); // Not the expected address again + LD_W(w1, xEmu, offsetof(x64emu_t, quit)); + CBZ_NEXT(w1); + MARK; + jump_to_epilog_fast(dyn, 0, xRIP, ninst); + dyn->last_ip = addr; + } + break; + default: + if ((box64_dynarec_safeflags > 1) || (ninst && dyn->insts[ninst - 1].x64.set_flags)) { + READFLAGS(X_PEND); // that's suspicious + } else { + SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state + } + // regular call + if (box64_dynarec_callret && box64_dynarec_bigblock > 1) { + BARRIER(BARRIER_FULL); + } else { + BARRIER(BARRIER_FLOAT); + *need_epilog = 0; + *ok = 0; + } + + if (rex.is32bits) { + MOV32w(x2, addr); + } else { + TABLE64(x2, addr); + } + PUSH1z(x2); + if (box64_dynarec_callret) { + SET_HASCALLRET(); + // Push actual return address + if (addr < (dyn->start + dyn->isize)) { + // there is a next... + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; + PCADDU12I(x4, ((j64 + 0x800) >> 12) & 0xfffff); + ADDI_D(x4, x4, j64 & 0xfff); + MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64 >> 2); + } else { + MESSAGE(LOG_NONE, "\tCALLRET set return to Jmptable(%p)\n", (void*)addr); + j64 = getJumpTableAddress64(addr); + TABLE64(x4, j64); + LD_D(x4, x4, 0); + } + ADDI_D(xSP, xSP, -16); + ST_D(x4, xSP, 0); + ST_D(x2, xSP, 8); + } else { + *ok = 0; + *need_epilog = 0; + } + jump_to_next(dyn, addr + i32, 0, ninst, rex.is32bits); + break; + } + break; case 0xE9: case 0xEB: BARRIER(BARRIER_MAYBE); diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 4c1a2aba..92f09657 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -75,6 +75,11 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LOAD_XEMU_REM(); jump_to_epilog(dyn, 0, xRIP, ninst); break; + case 0x1F: + INST_NAME("NOP (multibyte)"); + nextop = F8; + FAKEED; + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index ffcc9301..30dc3313 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -441,7 +441,7 @@ void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX(X_AF | X_CF | X_OF) { // for later flag calculation - NOT(s5, s1); + NOR(s5, xZR, s1); } SUB_D(s1, s1, s2); @@ -499,7 +499,7 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_AF | X_CF | X_OF) { // for later flag calculation - NOT(s5, s1); + NOR(s5, xZR, s1); } SUBxw(s1, s1, s2); @@ -565,7 +565,7 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i IFX(X_AF | X_CF | X_OF) { // for later flag calculation - NOT(s5, s1); + NOR(s5, xZR, s1); } if (c > -2048 && c <= 2048) { diff --git a/src/dynarec/la64/dynarec_la64_emit_tests.c b/src/dynarec/la64/dynarec_la64_emit_tests.c index c425d057..e6ab543e 100644 --- a/src/dynarec/la64/dynarec_la64_emit_tests.c +++ b/src/dynarec/la64/dynarec_la64_emit_tests.c @@ -22,6 +22,187 @@ #include "dynarec_la64_helper.h" +// emit CMP8 instruction, from cmp s1, s2, using s3, s4, s5 and s6 as scratch +void emit_cmp8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6) +{ + CLEAR_FLAGS(s3); + IFX_PENDOR0 { + ST_B(s1, xEmu, offsetof(x64emu_t, op1)); + ST_B(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_cmp8); + } else { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX(X_ALL) { + X64_SUB_B(s1, s2); + X64_GET_EFLAGS(s3, X_ALL); + OR(xFlags, xFlags, s3); + } + + IFX_PENDOR0 { + SUB_D(s6, s1, s2); + ST_B(s6, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + IFX(X_AF | X_CF | X_OF) { + // for later flag calculation + NOR(s5, xZR, s1); + } + + // It's a cmp, we can't store the result back to s1. + SUB_D(s6, s1, s2); + ANDI(s6, s6, 0xff); + IFX_PENDOR0 { + ST_B(s6, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_SF) { + SRLI_D(s3, s6, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 8); + IFX(X_ZF) { + BNEZ(s6, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s6, s3, s4); + } +} + +// emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch +void emit_cmp8_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4) +{ + CLEAR_FLAGS(s3); + IFX_PENDOR0 { + ST_B(s1, xEmu, offsetof(x64emu_t, op1)); + ST_B(xZR, xEmu, offsetof(x64emu_t, op2)); + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + SET_DF(s3, d_cmp8); + } else { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX(X_ALL) { + X64_SUB_B(s1, xZR); + X64_GET_EFLAGS(s3, X_ALL); + OR(xFlags, xFlags, s3); + } + return; + } + + IFX(X_SF) { + SRLI_D(s3, s1, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit CMP32 instruction, from cmp s1, s2, using s3 and s4 as scratch +void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6) +{ + CLEAR_FLAGS(s3); + IFX_PENDOR0 { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_cmp64:d_cmp32); + } else { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX(X_ALL) { + if (rex.w) X64_SUB_D(s1, s2); else X64_SUB_W(s1, s2); + X64_GET_EFLAGS(s3, X_ALL); + OR(xFlags, xFlags, s3); + } + + IFX_PENDOR0 { + SUBxw(s6, s1, s2); + SDxw(s6, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + IFX(X_AF | X_CF | X_OF) { + // for later flag calculation + NOR(s5, xZR, s1); + } + + // It's a cmp, we can't store the result back to s1. + SUBxw(s6, s1, s2); + IFX_PENDOR0 { + SDxw(s6, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_SF) { + BGE(s6, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s6); + } + CALC_SUB_FLAGS(s5, s2, s6, s3, s4, rex.w?64:32); + IFX(X_ZF) { + BNEZ(s6, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s6, s3, s4); + } +} + +// emit CMP32 instruction, from cmp s1, 0, using s3 and s4 as scratch +void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4) +{ + CLEAR_FLAGS(s3); + IFX_PENDOR0 { + ST_D(s1, xEmu, offsetof(x64emu_t, op1)); + ST_D(xZR, xEmu, offsetof(x64emu_t, op2)); + ST_D(s1, xEmu, offsetof(x64emu_t, res)); + SET_DF(s4, rex.w?d_cmp64:d_cmp32); + } else { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX(X_ALL) { + if (rex.w) X64_SUB_D(s1, xZR); else X64_SUB_W(s1, xZR); + X64_GET_EFLAGS(s3, X_ALL); + OR(xFlags, xFlags, s3); + } + return; + } + + IFX(X_SF) { + if (rex.w) { + BGE(s1, xZR, 8); + } else { + SRLI_D(s3, s1, 31); + BEQZ(s3, 8); + } + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index f8cd8de1..ea0f7d11 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -576,7 +576,7 @@ void emit_pf(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4) ANDI(s3, s3, 28); ADD_D(s4, s4, s3); LD_W(s4, s4, 0); - NOT(s4, s4); + NOR(s4, xZR, s4); SRL_W(s4, s4, s1); ANDI(s4, s4, 1); diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 67d562b2..c844c049 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -40,7 +40,6 @@ // All Write operation that might use a lock all have a memory barrier if strongmem is >= SMWRITE_MIN // Opcode will read #define SMREAD() \ - ; \ if ((dyn->smread == 0) && (box64_dynarec_strongmem > SMREAD_MIN)) { \ SMDMB(); \ } else \ @@ -177,7 +176,8 @@ SLLI_D(s1, gd, 8); \ OR(gb1, gb1, s1); \ } else { \ - ANDI(gb1, gb1, ~0xff); \ + ADDI_W(s1, xZR, 0xf00); \ + AND(gb1, gb1, s1); \ OR(gb1, gb1, gd); \ } @@ -193,7 +193,8 @@ SLLI_D(s1, ed, 8); \ OR(wback, wback, s1); \ } else { \ - ANDI(wback, wback, ~0xff); \ + ADDI_W(s1, xZR, 0xf00); \ + AND(wback, wback, s1); \ if (c) { ANDI(ed, ed, 0xff); } \ OR(wback, wback, ed); \ } @@ -459,6 +460,10 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define jump_to_next STEPNAME(jump_to_next) #define ret_to_epilog STEPNAME(ret_to_epilog) #define call_c STEPNAME(call_c) +#define emit_cmp32 STEPNAME(emit_cmp32) +#define emit_cmp32_0 STEPNAME(emit_cmp32_0) +#define emit_cmp8 STEPNAME(emit_cmp8) +#define emit_cmp8_0 STEPNAME(emit_cmp8_0) #define emit_test32 STEPNAME(emit_test32) #define emit_add32 STEPNAME(emit_add32) #define emit_add32c STEPNAME(emit_add32c) @@ -499,6 +504,10 @@ void jump_to_epilog_fast(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst); void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits); void ret_to_epilog(dynarec_la64_t* dyn, int ninst, rex_t rex); void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); +void emit_cmp8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6); +void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6); +void emit_cmp8_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4); +void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5); @@ -569,19 +578,19 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int break; \ case B + 0x2: \ INST_NAME(T1 "C " T2); \ - GO(ANDI(x1, xFlags, 1 << F_CF), EQZ, NEZ, X_CF, X64_JMP_JB); \ + GO(ANDI(x1, xFlags, 1 << F_CF), EQZ, NEZ, X_CF, X64_JMP_JC); \ break; \ case B + 0x3: \ INST_NAME(T1 "NC " T2); \ - GO(ANDI(x1, xFlags, 1 << F_CF), NEZ, EQZ, X_CF, X64_JMP_JNB); \ + GO(ANDI(x1, xFlags, 1 << F_CF), NEZ, EQZ, X_CF, X64_JMP_JNC); \ break; \ case B + 0x4: \ INST_NAME(T1 "Z " T2); \ - GO(ANDI(x1, xFlags, 1 << F_ZF), EQZ, NEZ, X_ZF, X64_JMP_JE); \ + GO(ANDI(x1, xFlags, 1 << F_ZF), EQZ, NEZ, X_ZF, X64_JMP_JZ); \ break; \ case B + 0x5: \ INST_NAME(T1 "NZ " T2); \ - GO(ANDI(x1, xFlags, 1 << F_ZF), NEZ, EQZ, X_ZF, X64_JMP_JNE); \ + GO(ANDI(x1, xFlags, 1 << F_ZF), NEZ, EQZ, X_ZF, X64_JMP_JNZ); \ break; \ case B + 0x6: \ INST_NAME(T1 "BE " T2); \ @@ -589,7 +598,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int break; \ case B + 0x7: \ INST_NAME(T1 "NBE " T2); \ - GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), NEZ, EQZ, X_CF | X_ZF, X64_JMP_JA); \ + GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), NEZ, EQZ, X_CF | X_ZF, X64_JMP_JNBE); \ break; \ case B + 0x8: \ INST_NAME(T1 "S " T2); \ @@ -609,33 +618,33 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int break; \ case B + 0xC: \ INST_NAME(T1 "L " T2); \ - GO(SRLI_D(x1, xFlags, F_SF - F_OF); \ + GO(SRLI_D(x1, xFlags, F_OF - F_SF); \ XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_OF), EQZ, NEZ, X_SF | X_OF, X64_JMP_JL); \ + ANDI(x1, x1, 1 << F_SF), EQZ, NEZ, X_SF | X_OF, X64_JMP_JL); \ break; \ case B + 0xD: \ INST_NAME(T1 "GE " T2); \ - GO(SRLI_D(x1, xFlags, F_SF - F_OF); \ + GO(SRLI_D(x1, xFlags, F_OF - F_SF); \ XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_OF), NEZ, EQZ, X_SF | X_OF, X64_JMP_JGE); \ + ANDI(x1, x1, 1 << F_SF), NEZ, EQZ, X_SF | X_OF, X64_JMP_JGE); \ break; \ case B + 0xE: \ INST_NAME(T1 "LE " T2); \ - GO(SRLI_D(x1, xFlags, F_SF - F_OF); \ + GO(SRLI_D(x1, xFlags, F_OF - F_SF); \ XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_OF); \ + ANDI(x1, x1, 1 << F_SF); \ ANDI(x3, xFlags, 1 << F_ZF); \ OR(x1, x1, x3); \ - ANDI(x1, x1, (1 << F_OF) | (1 << F_ZF)), EQZ, NEZ, X_SF | X_OF | X_ZF, X64_JMP_JLE); \ + ANDI(x1, x1, (1 << F_SF) | (1 << F_ZF)), EQZ, NEZ, X_SF | X_OF | X_ZF, X64_JMP_JLE); \ break; \ case B + 0xF: \ INST_NAME(T1 "G " T2); \ - GO(SRLI_D(x1, xFlags, F_SF - F_OF); \ + GO(SRLI_D(x1, xFlags, F_OF - F_SF); \ XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_OF); \ + ANDI(x1, x1, 1 << F_SF); \ ANDI(x3, xFlags, 1 << F_ZF); \ OR(x1, x1, x3); \ - ANDI(x1, x1, (1 << F_OF) | (1 << F_ZF)), NEZ, EQZ, X_SF | X_OF | X_ZF, X64_JMP_JG); \ + ANDI(x1, x1, (1 << F_SF) | (1 << F_ZF)), NEZ, EQZ, X_SF | X_OF | X_ZF, X64_JMP_JG); \ break #define NOTEST(s1) \ diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index a92fd180..35b593ba 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -128,7 +128,7 @@ f24-f31 fs0-fs7 Static registers Callee #define type_1RI20(opc, imm20, rd) ((opc) << 25 | ((imm20) & 0xFFFFF) << 5 | (rd)) #define type_1RI21(opc, imm21, rj) ((opc) << 26 | ((imm21) & 0xFFFF) << 10 | (rj) << 5 | ((imm21) & 0x1F0000) >> 16) #define type_hint(opc, imm15) ((opc) << 15 | ((imm15) & 0x7FFF)) -#define type_I26(opc, imm26) ((opc) << 26 | ((imm26) & 0xFFFF) << 10 | ((imm26) & 0x3FF0000)) +#define type_I26(opc, imm26) ((opc) << 26 | ((imm26) & 0xFFFF) << 10 | ((imm26 >> 16) & 0x3FF)) // Made-up formats not found in the spec. #define type_2RI3(opc, imm3, rj, rd) ((opc) << 13 | ((imm3) & 0x7 ) << 10 | (rj) << 5 | (rd)) @@ -354,6 +354,28 @@ f24-f31 fs0-fs7 Static registers Callee B(imm - 4); \ } +#define BEQZ_safe(rj, imm) \ + do { \ + if ((imm) > -0x70000 && (imm) < 0x70000) { \ + BEQZ(rj, imm); \ + NOP(); \ + } else { \ + BNEZ(rj, 8); \ + B(imm - 4); \ + } \ + } while (0) + +#define BNEZ_safe(rj, imm) \ + do { \ + if ((imm) > -0x70000 && (imm) < 0x70000) { \ + BNEZ(rj, imm); \ + NOP(); \ + } else { \ + BEQZ(rj, 8); \ + B(imm - 4); \ + } \ + } while (0) + // vaddr = GR[rj] + SignExtend(imm12, GRLEN) // AddressComplianceCheck(vaddr) // paddr = AddressTranslation(vaddr) @@ -456,20 +478,20 @@ f24-f31 fs0-fs7 Static registers Callee // Reads OF/SF/ZF/CF/PF, set rd based on imm. #define X64_SETJ(rd, imm) EMIT(type_2RI4(0b000000000011011010, imm, 0, rd)) // Here are the available enums: -#define X64_JMP_JO 0 /* OF=1 */ -#define X64_JMP_JNO 1 /* OF=0 */ -#define X64_JMP_JS 2 /* SF=1 */ -#define X64_JMP_JNS 3 /* SF=0 */ -#define X64_JMP_JE 4 /* ZF=1 */ -#define X64_JMP_JNE 5 /* ZF=0 */ -#define X64_JMP_JB 6 /* CF=1 */ -#define X64_JMP_JNB 7 /* CF=0 */ -#define X64_JMP_JBE 8 /* CF=1 || ZF=1 */ -#define X64_JMP_JA 9 /* CF=0 && ZF=0 */ -#define X64_JMP_JL 10 /* SF != OF */ -#define X64_JMP_JGE 11 /* SF == OF */ -#define X64_JMP_JLE 12 /* ZF=1 || SF != OF */ -#define X64_JMP_JG 13 /* ZF=0 && SF == OF */ +#define X64_JMP_JNBE 0 /* CF=0 && ZF=0 */ +#define X64_JMP_JNC 1 /* CF=0 */ +#define X64_JMP_JC 2 /* CF=1 */ +#define X64_JMP_JBE 3 /* CF=1 || ZF=1 */ +#define X64_JMP_JZ 4 /* ZF=1 */ +#define X64_JMP_JNZ 5 /* ZF=0 */ +#define X64_JMP_JG 6 /* ZF=0 && SF == OF */ +#define X64_JMP_JGE 7 /* SF == OF */ +#define X64_JMP_JL 8 /* SF != OF */ +#define X64_JMP_JLE 9 /* ZF=1 || SF != OF */ +#define X64_JMP_JS 10 /* SF=1 */ +#define X64_JMP_JNS 11 /* SF=0 */ +#define X64_JMP_JO 12 /* OF=1 */ +#define X64_JMP_JNO 13 /* OF=0 */ #define X64_JMP_JP 14 /* PF=1 */ #define X64_JMP_JNP 15 /* PF=0 */ @@ -632,9 +654,6 @@ f24-f31 fs0-fs7 Static registers Callee MV(rd, rj); \ } -// rd = !rs1 -#define NOT(rd, rs1) XORI(rd, rs1, -1) - #define ADDIxw(rd, rj, imm12) \ if (rex.w) \ ADDI_D(rd, rj, imm12); \ diff --git a/src/dynarec/la64/la64_next.S b/src/dynarec/la64/la64_next.S index d683ab80..f98558b0 100644 --- a/src/dynarec/la64/la64_next.S +++ b/src/dynarec/la64/la64_next.S @@ -26,7 +26,7 @@ la64_next: st.d $r17, $sp, 64 st.d $r18, $sp, 72 st.d $r19, $sp, 80 - st.d $r20, $sp, 88 // also save r30(rip) to allow change in LinkNext + st.d $r20, $sp, 88 // also save r20(rip) to allow change in LinkNext move $a2, $ra // "from" is in ra, so put in a2 addi.d $a3, $sp, 88 // a3 is address to change rip diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 72ada67d..cdb9a4c6 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -112,7 +112,7 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) } // LU12I.W if(isMask(opcode, "0001010iiiiiiiiiiiiiiiiiiiiddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %d", "LU12I.W", Xt[Rd], imm); + snprintf(buff, sizeof(buff), "%-15s %s, 0x%x", "LU12I.W", Xt[Rd], imm); return buff; } // LU32I.D @@ -130,19 +130,19 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCADDI", Xt[Rd], imm); return buff; } - // PCADDU12I + // PCALAU12I if(isMask(opcode, "0001101iiiiiiiiiiiiiiiiiiiiddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCADDU12I", Xt[Rd], imm); + snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCALAU12I", Xt[Rd], imm); return buff; } - // PCADDU18I + // PCADDU12I if(isMask(opcode, "0001110iiiiiiiiiiiiiiiiiiiiddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCADDU18I", Xt[Rd], imm); + snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCADDU12I", Xt[Rd], imm); return buff; } - // PCALAU12I + // PCADDU18I if(isMask(opcode, "0001111iiiiiiiiiiiiiiiiiiiiddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCALAU12I", Xt[Rd], imm); + snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCADDU18I", Xt[Rd], imm); return buff; } // AND @@ -307,7 +307,7 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) } // B if(isMask(opcode, "010100iiiiiiiiiiiiiiiiiiiiiiiiii", &a)) { - snprintf(buff, sizeof(buff), "%-15s 0x%x", "B", imm); + snprintf(buff, sizeof(buff), "%-15s 0x%x", "B", (((imm & 0x3FF) << 16) | ((uint32_t)imm >> 10)) << 6 >> 4); return buff; } // LD.B diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index a7b3f5f1..bb2572ad 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -40,7 +40,7 @@ // Sequence of Write will trigger a DMB on "last" write if strongmem is >= 1 // All Write operation that might use a lock all have a memory barrier if strongmem is >= SMWRITE_MIN // Opcode will read -#define SMREAD(); if((dyn->smread==0) && (box64_dynarec_strongmem>SMREAD_MIN)) {SMDMB();} else dyn->smread=1 +#define SMREAD() if((dyn->smread==0) && (box64_dynarec_strongmem>SMREAD_MIN)) {SMDMB();} else dyn->smread=1 // Opcode will read with option forced lock #define SMREADLOCK(lock) if((lock) || ((dyn->smread==0) && (box64_dynarec_strongmem>SMREAD_MIN))) {SMDMB();} // Opcode might read (depend on nextop) diff --git a/src/libtools/signals.c b/src/libtools/signals.c index 1fe7a1de..f4c17edc 100644 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -1571,8 +1571,8 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for int nptrs; void *buffer[BT_BUF_SIZE]; char **strings; - - #ifndef ANDROID + +#ifndef ANDROID nptrs = backtrace(buffer, BT_BUF_SIZE); strings = backtrace_symbols(buffer, nptrs); if(strings) { @@ -1581,7 +1581,7 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for free(strings); } else printf_log(log_minimum, "NativeBT: none (%d/%s)\n", errno, strerror(errno)); - #endif +#endif extern int my_backtrace_ip(x64emu_t* emu, void** buffer, int size); // in wrappedlibc extern char** my_backtrace_symbols(x64emu_t* emu, uintptr_t* buffer, int size); // save and set real RIP/RSP @@ -1689,7 +1689,7 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for shown_regs = 1; for (int i=0; i<16; ++i) { if(!(i%4)) printf_log(log_minimum, "\n"); - printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[16+i]); + printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[12+i]); } printf_log(log_minimum, "\n"); for (int i=0; i<6; ++i) |