diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 71 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 80 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_66.c | 26 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 23 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f0.c | 73 |
5 files changed, 272 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index fbbad121..9f618456 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -1053,6 +1053,24 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } } break; + case 0x8F: + INST_NAME("POP Ed"); + nextop = F8; + if (MODREG) { + POP1z(xRAX + (nextop & 7) + (rex.b << 3)); + } else { + POP1z(x2); // so this can handle POP [ESP] and maybe some variant too + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, &lock, 1, 0); + if (ed == xRSP) { + SDz(x2, ed, fixedaddress); + } else { + // complicated to just allow a segfault that can be recovered correctly + ADDIz(xRSP, xRSP, rex.is32bits ? -4 : -8); + SDz(x2, ed, fixedaddress); + ADDIz(xRSP, xRSP, rex.is32bits ? 4 : 8); + } + } + break; case 0x90: case 0x91: case 0x92: @@ -1379,6 +1397,59 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; } break; + case 0xAF: + switch (rep) { + case 1: + case 2: + if (rep == 1) { + INST_NAME("REPNZ SCASD"); + } else { + INST_NAME("REPZ SCASD"); + } + MAYSETFLAGS(); + SETFLAGS(X_ALL, SF_SET_PENDING); + CBZ_NEXT(xRCX); + if (rex.w) { + MV(x1, xRAX); + } else { + ZEROUP2(x1, xRAX); + } + ANDI(x2, xFlags, 1 << F_DF); + BNEZ_MARK2(x2); + MARK; // Part with DF==0 + LDxw(x2, xRDI, 0); + ADDI_D(xRDI, xRDI, rex.w ? 8 : 4); + ADDI_D(xRCX, xRCX, -1); + if (rep == 1) { + BEQ_MARK3(x1, x2); + } else { + BNE_MARK3(x1, x2); + } + BNE_MARK(xRCX, xZR); + B_MARK3_nocond; + MARK2; // Part with DF==1 + LDxw(x2, xRDI, 0); + ADDI_D(xRDI, xRDI, rex.w ? -8 : -4); + ADDI_D(xRCX, xRCX, -1); + if (rep == 1) { + BEQ_MARK3(x1, x2); + } else { + BNE_MARK3(x1, x2); + } + BNE_MARK2(xRCX, xZR); + MARK3; // end + emit_cmp32(dyn, ninst, rex, x1, x2, x3, x4, x5, x6); + break; + default: + INST_NAME("SCASD"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETDIR(x3, x1, rex.w ? 8 : 4); + LDxw(x2, xRDI, 0); + ADD_D(xRDI, xRDI, x3); + emit_cmp32(dyn, ninst, rex, xRAX, x2, x3, x4, x5, x6); + break; + } + break; case 0xB0: case 0xB1: case 0xB2: diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 0bc20158..bc584b86 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -110,6 +110,17 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni *need_epilog = 0; *ok = 0; break; + case 0x0D: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 1: + INST_NAME("PREFETCHW"); + FAKEED; + break; + default: //??? + DEFAULT; + } + break; case 0x10: INST_NAME("MOVUPS Gx,Ex"); nextop = F8; @@ -325,6 +336,17 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SPILL_EFLAGS(); } break; + case 0x31: + INST_NAME("RDTSC"); + NOTEST(x1); + // TODO: how to read the wall-clock real time on LoongArch? + CALL(ReadTSC, x3); // will return the u64 in x3 + if (box64_rdtsc_shift) { + SRLI_D(x3, x3, box64_rdtsc_shift); + } + SRLI_D(xRDX, x3, 32); + ZEROUP2(xRDX, x3); + break; case 0x38: // SSE3 nextop = F8; @@ -771,6 +793,15 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni CALL(rex.is32bits ? ((void*)fpu_fxsave32) : ((void*)fpu_fxsave64), -1); } break; + case 1: + INST_NAME("FXRSTOR Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SKIPTEST(x1); + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0); + if (ed != x1) { MV(x1, ed); } + CALL(rex.is32bits ? ((void*)fpu_fxrstor32) : ((void*)fpu_fxrstor64), -1); + break; case 2: INST_NAME("LDMXCSR Md"); GETED(0); @@ -785,6 +816,33 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LD_WU(x4, xEmu, offsetof(x64emu_t, mxcsr)); ST_W(x4, wback, fixedaddress); break; + case 4: + INST_NAME("XSAVE Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); + if (ed != x1) { MV(x1, ed); } + MOV32w(x2, rex.is32bits); + CALL((void*)fpu_xsave, -1); + break; + case 5: + INST_NAME("XRSTOR Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); + if (ed != x1) { MV(x1, ed); } + MOV32w(x2, rex.is32bits); + CALL((void*)fpu_xrstor, -1); + break; + case 7: + INST_NAME("CLFLUSH Ed"); + MESSAGE(LOG_DUMP, "Need Optimization?\n"); + addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); + if (wback != A1) { + MV(A1, wback); + } + CALL_(native_clflush, -1, 0); + break; default: DEFAULT; } @@ -955,6 +1013,28 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } MARK; break; + case 7: + INST_NAME("BTC Ed, Ib"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(); + GETED(1); + u8 = F8; + u8 &= rex.w ? 0x3f : 0x1f; + BSTRPICK_D(x3, ed, u8, u8); + BSTRINS_D(xFlags, x3, 0, 0); + if (u8 <= 10) { + XORI(ed, ed, (1LL << u8)); + } else { + MOV64xw(x3, (1LL << u8)); + XOR(ed, ed, x3); + } + if (wback) { + SDxw(ed, wback, fixedaddress); + SMWRITE(); + } else if (!rex.w) { + ZEROUP(ed); + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index 4af61163..bfe64ef1 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -695,6 +695,30 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_neg16(dyn, ninst, ed, x2, x4); EWBACK; break; + case 6: + INST_NAME("DIV Ew"); + SETFLAGS(X_ALL, SF_SET); + SET_DFNONE(); + GETEW(x1, 0); + BSTRPICK_D(x2, xRAX, 15, 0); + SLLI_D(x7, xRDX, 48); + SRLI_D(x7, x7, 32); + OR(x2, x2, x7); + if(box64_dynarec_div0) { + BNE_MARK3(ed, xZR); + GETIP_(ip); + STORE_XEMU_CALL(); + CALL(native_div0, -1); + CLEARIP(); + LOAD_XEMU_CALL(); + jump_to_epilog(dyn, 0, xRIP, ninst); + MARK3; + } + DIV_WU(x7, x2, ed); + MOD_WU(x4, x2, ed); + BSTRINSz(xRAX, x7, 15, 0); + BSTRINSz(xRDX, x4, 15, 0); + break; case 7: INST_NAME("IDIV Ew"); NOTEST(x1); @@ -717,7 +741,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DIV_W(x3, x2, ed); MOD_W(x4, x2, ed); BSTRINSz(xRAX, x3, 15, 0); - BSTRINSz(xRAX, x4, 15, 0); + BSTRINSz(xRDX, x4, 15, 0); break; default: DEFAULT; diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index c348a788..f0e382ae 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -107,6 +107,21 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } VEXTRINS_D(v0, v1, 0x10); break; + case 0x16: + INST_NAME("MOVHPD Gx, Eq"); + nextop = F8; + GETGX(v0, 1); + if (MODREG) { + // access register instead of memory is bad opcode! + DEFAULT; + return addr; + } + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + v1 = fpu_get_scratch(dyn); + FLD_D(v1, ed, fixedaddress); + VEXTRINS_D(v0, v1, 0x10); + break; case 0x1F: INST_NAME("NOP (multibyte)"); nextop = F8; @@ -1217,6 +1232,14 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(v1, 0, 0); VMUH_H(v0, v0, v1); break; + case 0xE6: + INST_NAME("CVTTPD2DQ Gx, Ex"); + nextop = F8; + GETEX(v1, 0, 0); + GETGX_empty(v0); + // TODO: fastround + VFTINTRZ_W_D(v0, v1, v1); + break; case 0xE7: INST_NAME("MOVNTDQ Ex, Gx"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c index e857999e..22457c0d 100644 --- a/src/dynarec/la64/dynarec_la64_f0.c +++ b/src/dynarec/la64/dynarec_la64_f0.c @@ -99,6 +99,79 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x0F: nextop = F8; switch (nextop) { + case 0xB0: + switch (rep) { + case 0: + INST_NAME("LOCK CMPXCHG Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + ANDI(x6, xRAX, 0xff); // AL + SMDMB(); + if (MODREG) { + if (rex.rex) { + wback = TO_LA64((nextop & 7) + (rex.b << 3)); + wb2 = 0; + } else { + wback = (nextop & 7); + wb2 = (wback >> 2) * 8; + wback = TO_LA64(wback & 3); + } + BSTRPICK_D(x2, wback, wb2 + 7, wb2); + wb1 = 0; + ed = x2; + UFLAG_IF { + emit_cmp8(dyn, ninst, x6, ed, x3, x4, x5, x1); + } + BNE_MARK2(x6, x2); + BSTRPICK_D(wback, x2, wb2 + 7, wb2); + GETGB(x1); + MV(ed, gd); + MARK2; + BSTRINS_D(xRAX, x2, 7, 0); + B_NEXT_nocond; + } else { + if (rex.rex) { + gb1 = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3)); + gb2 = 0; + } else { + gd = (nextop & 0x38) >> 3; + gb2 = ((gd & 4) >> 2) * 8; + gb1 = TO_LA64(gd & 3); + } + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); + ANDI(x5, wback, 0b11); + SLLI_D(x5, x5, 3); // shamt + MARKLOCK; + ADDI_D(x7, xZR, ~0b11); + AND(x7, wback, x7); // align to 32bit + LD_WU(x1, x7, 0); + LL_W(x4, x7, 0); + SRL_D(x4, x4, x5); + ANDI(x4, x4, 0xff); + BNE_MARK(x6, x4); // compare AL with m8 + // AL == m8, r8 is loaded into m8 + ADDI_D(x2, xZR, 0xff); + SLL_D(x2, x2, x5); + NOR(x2, x2, xZR); + AND(x2, x1, x2); + BSTRPICK_D(x1, gb1, gb2 + 7, gb2); + SLL_D(x1, x1, x5); + OR(x1, x1, x2); + SC_W(x1, x7, 0); + BEQZ_MARKLOCK(x1); + // done + MARK; + UFLAG_IF { emit_cmp8(dyn, ninst, x6, x4, x1, x2, x3, x5); } + // load m8 into AL + ANDI(xRAX, xRAX, ~0xff); + OR(xRAX, xRAX, x4); + } + SMDMB(); + break; + default: + DEFAULT; + } + break; case 0xB1: switch (rep) { case 0: |