diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-04-06 18:46:14 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-06 12:46:14 +0200 |
| commit | d84faf57ab384344017f57c1b1d261352a320bab (patch) | |
| tree | 7573c85513c886358cdec9545f8bc6dd86dd4f65 /src | |
| parent | 89f0c494e4e1d4eac32d839869e45f1ca1d384d8 (diff) | |
| download | box64-d84faf57ab384344017f57c1b1d261352a320bab.tar.gz box64-d84faf57ab384344017f57c1b1d261352a320bab.zip | |
[LA64_DYNAREC] Added more opcodes (#1424)
* [LA64_DYNAREC] Added 66 C1 /5 SHR opcode * Added 69 IMUL opcode * Added F7 /6 DIV opcode * Added 6B IMUL opcode * Added C1 /1 ROR opcode * [LA64_DYNAREC] Added 87 XCHG opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 157 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_66.c | 20 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_shift.c | 61 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 24 |
4 files changed, 260 insertions, 2 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index ea3dd6fa..2100ae73 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -405,13 +405,78 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni PUSH1z(x3); } break; - + case 0x69: + INST_NAME("IMUL Gd, Ed, Id"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + GETGD; + GETED(4); + i64 = F32S; + MOV64xw(x4, i64); + if (rex.w) { + // 64bits imul + UFLAG_IF { + MULH_D(x3, ed, x4); + MUL_D(gd, ed, x4); + UFLAG_OP1(x3); + UFLAG_RES(gd); + UFLAG_DF(x3, d_imul64); + } else { + MULxw(gd, ed, x4); + } + } else { + // 32bits imul + UFLAG_IF { + MUL_D(gd, ed, x4); + UFLAG_RES(gd); + SRLI_D(x3, gd, 32); + UFLAG_OP1(x3); + UFLAG_DF(x3, d_imul32); + } else { + MULxw(gd, ed, x4); + } + ZEROUP(gd); + } + break; case 0x6A: INST_NAME("PUSH Ib"); i64 = F8S; MOV64z(x3, i64); PUSH1z(x3); break; + case 0x6B: + INST_NAME("IMUL Gd, Ed, Ib"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + GETGD; + GETED(1); + i64 = F8S; + MOV64xw(x4, i64); + if (rex.w) { + // 64bits imul + UFLAG_IF { + MULH_D(x3, ed, x4); + MUL_D(gd, ed, x4); + UFLAG_OP1(x3); + UFLAG_RES(gd); + UFLAG_DF(x3, d_imul64); + } else { + MUL_D(gd, ed, x4); + } + } else { + // 32bits imul + UFLAG_IF { + MUL_D(gd, ed, x4); + UFLAG_RES(gd); + SRLI_D(x3, gd, 32); + UFLAG_OP1(x3); + UFLAG_DF(x3, d_imul32); + } else { + MUL_W(gd, ed, x4); + } + ZEROUP(gd); + } + break; #define GO(GETFLAGS, NO, YES, F, I) \ READFLAGS(F); \ @@ -589,6 +654,35 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(0); emit_test32(dyn, ninst, rex, ed, gd, x3, x4, x5); break; + case 0x87: + INST_NAME("(LOCK) XCHG Ed, Gd"); + nextop = F8; + if (MODREG) { + GETGD; + GETED(0); + MVxw(x1, gd); + MVxw(gd, ed); + MVxw(ed, x1); + } else { + GETGD; + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); + SMDMB(); + ANDI(x3, ed, (1 << (2 + rex.w)) - 1); + BNEZ_MARK(x3); + MARKLOCK; + LLxw(x1, ed, 0); + MVxw(x3, gd); + SCxw(x3, ed, 0); + BEQZ_MARKLOCK(x3); + B_MARK2_nocond; + MARK; + LDxw(x1, ed, 0); + SDxw(gd, ed, 0); + MARK2; + SMDMB(); + MVxw(gd, x1); + } + break; case 0x88: INST_NAME("MOV Eb, Gb"); nextop = F8; @@ -914,6 +1008,26 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xC1: nextop = F8; switch ((nextop >> 3) & 7) { + case 1: + INST_NAME("ROR Ed, Ib"); + u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); + // flags are not affected if count is 0, we make it a nop if possible. + if (u8) { + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING); + GETED(1); + F8; + emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); + WBACK; + } else { + if (MODREG && !rex.w) { + GETED(1); + ZEROUP(ed); + } else { + FAKEED; + } + F8; + } + break; case 4: case 6: INST_NAME("SHL Ed, Ib"); @@ -1408,6 +1522,47 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni UFLAG_RES(xRAX); UFLAG_OP1(xRDX); break; + case 6: + INST_NAME("DIV Ed"); + SETFLAGS(X_ALL, SF_SET); + if (!rex.w) { + SET_DFNONE(); + GETED(0); + SLLI_D(x3, xRDX, 32); + AND(x2, xRAX, xMASK); + OR(x3, x3, x2); + if (MODREG) { + AND(x4, ed, xMASK); + ed = x4; + } + DIV_DU(x2, x3, ed); + MOD_DU(xRDX, x3, ed); + AND(xRAX, x2, xMASK); + ZEROUP(xRDX); + } else { + if (ninst + && dyn->insts[ninst - 1].x64.addr + && *(uint8_t*)(dyn->insts[ninst - 1].x64.addr) == 0x31 + && *(uint8_t*)(dyn->insts[ninst - 1].x64.addr + 1) == 0xD2) { + SET_DFNONE(); + GETED(0); + DIV_DU(x2, xRAX, ed); + MOD_DU(xRDX, xRAX, ed); + MV(xRAX, x2); + } else { + GETEDH(x1, 0); // get edd changed addr, so cannot be called 2 times for same op... + BEQ_MARK(xRDX, xZR); + if (ed != x1) { MV(x1, ed); } + CALL(div64, -1); + B_NEXT_nocond; + MARK; + DIV_DU(x2, xRAX, ed); + MOD_DU(xRDX, xRAX, ed); + MV(xRAX, x2); + SET_DFNONE(); + } + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index b4d35233..71c73324 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -133,6 +133,26 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BSTRPICK_D(gd, x2, 15, 0); } break; + case 0xC1: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 5: + INST_NAME("SHR Ew, Ib"); + UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); } + SETFLAGS(X_ALL, SF_PENDING); + GETEW(x1, 1); + u8 = F8; + UFLAG_IF { MOV32w(x2, (u8 & 15)); } + UFLAG_OP12(ed, x2) + SRLI_D(ed, ed, u8 & 15); + EWBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_shr16); + break; + default: + DEFAULT; + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index 377250d5..3cf6d41d 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -318,4 +318,63 @@ void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } -} \ No newline at end of file +} + + +// emit ROR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) +{ + if (!c) return; + + IFX (X_PEND) { + MOV32w(s3, c); + SDxw(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w ? d_ror64 : d_ror32); + } else IFX (X_ALL) { + SET_DFNONE(); + } + if (!c) { + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + if (la64_lbt) { + IFX (X_ALL) { + if (rex.w) + X64_ROTRI_D(s1, c); + else + X64_ROTRI_W(s1, c); + } + } + + SRLIxw(s3, s1, c); + SLLIxw(s1, s1, (rex.w ? 64 : 32) - c); + OR(s1, s3, s1); + + if (!rex.w) ZEROUP(s1); + + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) return; + + CLEAR_FLAGS(s3); + IFX (X_CF) { + SRLIxw(s3, s1, rex.w ? 63 : 31); + OR(xFlags, xFlags, s3); + } + IFX (X_OF) { + // the OF flag is set to the exclusive OR of the two most-significant bits of the result + if (c == 1) { + SRLI_D(s3, s1, rex.w ? 62 : 30); + SRLI_D(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); + SLLI_D(s3, s3, F_OF); + OR(xFlags, xFlags, s3); + } + } +} diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 664384a3..481dbb68 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -111,6 +111,17 @@ LDz(x1, wback, fixedaddress); \ ed = x1; \ } +// GETEDH can use hint for ed, and x1 or x2 for wback (depending on hint), might also use x3. wback is 0 if ed is xEAX..xEDI +#define GETEDH(hint, D) \ + if (MODREG) { \ + ed = TO_LA64((nextop & 7) + (rex.b << 3)); \ + wback = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, (hint == x2) ? x1 : x2, (hint == x1) ? x1 : x3, &fixedaddress, rex, NULL, 1, D); \ + LDxw(hint, wback, fixedaddress); \ + ed = hint; \ + } // GETEWW will use i for ed, and can use w for wback. #define GETEWW(w, i, D) \ if (MODREG) { \ @@ -157,6 +168,17 @@ SMWRITE(); \ } +// Write w back to original register / memory (w needs to be 16bits only!) +#define EWBACKW(w) \ + if (wb1) { \ + ST_H(w, wback, fixedaddress); \ + SMWRITE(); \ + } else { \ + BSTRINS_D(wback, w, 15, 0); \ + } +// Write ed back to original register / memory +#define EWBACK EWBACKW(ed) + // GETEB will use i for ed, and can use r3 for wback. #define GETEB(i, D) \ if (MODREG) { \ @@ -640,6 +662,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_shl32c STEPNAME(emit_shl32c) #define emit_shr32c STEPNAME(emit_shr32c) #define emit_sar32c STEPNAME(emit_sar32c) +#define emit_ror32c STEPNAME(emit_ror32c) #define emit_pf STEPNAME(emit_pf) @@ -707,6 +730,7 @@ void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_pf(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4); |