From f44d60c58f418b8844be3dc4b2505ae097a71f3f Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Sun, 2 Jun 2024 16:25:57 +0800 Subject: [LA64_DYNAREC] Added more opcodes (#1549) * [LA64_DYNAREC] Added more opcodes * fastnan handling and fixed PALIGNR... --- src/dynarec/la64/dynarec_la64_00.c | 62 +++++++++++ src/dynarec/la64/dynarec_la64_0f.c | 131 +++++++++++++++++++++++ src/dynarec/la64/dynarec_la64_66.c | 30 ++++++ src/dynarec/la64/dynarec_la64_660f.c | 70 ++++++++++++ src/dynarec/la64/dynarec_la64_67.c | 109 +++++++++++++++++++ src/dynarec/la64/dynarec_la64_emit_shift.c | 58 ++++++++++ src/dynarec/la64/dynarec_la64_helper.c | 165 +++++++++++++++++++++++++++++ src/dynarec/la64/dynarec_la64_helper.h | 21 ++++ src/dynarec/la64/la64_emitter.h | 8 ++ 9 files changed, 654 insertions(+) create mode 100644 src/dynarec/la64/dynarec_la64_67.c (limited to 'src') diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 51818d83..a7207003 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -167,6 +167,25 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0x11: + INST_NAME("ADC Ed, Gd"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_adc32(dyn, ninst, rex, ed, gd, x3, x4, x5, x6); + WBACK; + break; + case 0x13: + INST_NAME("ADC Gd, Ed"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_adc32(dyn, ninst, rex, gd, ed, x3, x4, x5, x6); + break; case 0x18: INST_NAME("SBB Eb, Gb"); READFLAGS(X_CF); @@ -187,6 +206,15 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_sbb32(dyn, ninst, rex, ed, gd, x3, x4, x5); WBACK; break; + case 0x1B: + INST_NAME("SBB Gd, Ed"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_sbb32(dyn, ninst, rex, gd, ed, x3, x4, x5); + break; case 0x1C: INST_NAME("SBB AL, Ib"); READFLAGS(X_CF); @@ -460,6 +488,12 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x66: addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; + case 0x67: + if (rex.is32bits) { + DEFAULT; + } else + addr = dynarec64_67(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; case 0x68: INST_NAME("PUSH Id"); i64 = F32S; @@ -1372,6 +1406,26 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xC1: nextop = F8; switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("ROL Ed, Ib"); + u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); + // flags are not affected if count is 0, we make it a nop if possible. + if (u8) { + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING); + GETED(1); + F8; + emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); + WBACK; + } else { + if (MODREG && !rex.w) { + GETED(1); + ZEROUP(ed); + } else { + FAKEED; + } + F8; + } + break; case 1: INST_NAME("ROR Ed, Ib"); u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); @@ -1607,6 +1661,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xD1: nextop = F8; switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("ROL Ed, 1"); + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING); + GETED(0); + emit_rol32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACK; + if (!wback && !rex.w) ZEROUP(ed); + break; case 4: case 6: INST_NAME("SHL Ed, 1"); diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 7255c32e..e96c3437 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -311,6 +311,74 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SPILL_EFLAGS(); } break; + case 0x38: + // SSE3 + nextop = F8; + switch (nextop) { + case 0xC8 ... 0xCD: + u8 = nextop; + switch (u8) { + case 0xC8: + INST_NAME("SHA1NEXTE Gx, Ex"); + break; + case 0xC9: + INST_NAME("SHA1MSG1 Gx, Ex"); + break; + case 0xCA: + INST_NAME("SHA1MSG2 Gx, Ex"); + break; + case 0xCB: + INST_NAME("SHA256RNDS2 Gx, Ex"); + break; + case 0xCC: + INST_NAME("SHA256MSG1 Gx, Ex"); + break; + case 0xCD: + INST_NAME("SHA256MSG2 Gx, Ex"); + break; + } + nextop = F8; + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + sse_reflect_reg(dyn, ninst, ed); + ADDI_D(x2, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); + if (ed != x2) { + MV(x2, ed); + } + } + GETG; + sse_forget_reg(dyn, ninst, gd); + ADDI_D(x1, xEmu, offsetof(x64emu_t, xmm[gd])); + sse_reflect_reg(dyn, ninst, 0); + switch (u8) { + case 0xC8: + CALL(sha1nexte, -1); + break; + case 0xC9: + CALL(sha1msg1, -1); + break; + case 0xCA: + CALL(sha1msg2, -1); + break; + case 0xCB: + CALL(sha256rnds2, -1); + break; + case 0xCC: + CALL(sha256msg1, -1); + break; + case 0xCD: + CALL(sha256msg2, -1); + break; + } + break; + default: + DEFAULT; + } + break; + #define GO(GETFLAGS, NO, YES, F, I) \ READFLAGS(F); \ if (la64_lbt) { \ @@ -341,6 +409,16 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni #undef GO + case 0x50: + INST_NAME("MOVMSPKPS Gd, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGD; + q1 = fpu_get_scratch(dyn); + VMSKLTZ_W(q1, q0); + MOVFR2GR_S(gd, q1); + BSTRPICK_D(gd, gd, 31, 0); + break; case 0x51: INST_NAME("SQRTPS Gx, Ex"); nextop = F8; @@ -356,6 +434,15 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX_empty(q1); VFRSQRT_S(q1, q0); break; + case 0x53: + INST_NAME("RCPPS Gx, Ex"); + nextop = F8; + SKIPTEST(x1); + GETEX(q0, 0, 0); + GETGX_empty(q1); + // TODO: use v1.1 vfrecipe when possible + VFRECIP_S(q1, q0); + break; case 0x54: INST_NAME("ANDPS Gx, Ex"); nextop = F8; @@ -426,6 +513,21 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(v0, 1); VFSUB_S(v0, v0, q0); break; + case 0x5D: + INST_NAME("MINPS Gx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETEX(v1, 0, 0); + if (!box64_dynarec_fastnan && v0 != v1) { + q0 = fpu_get_scratch(dyn); + // always copy from v1 if any oprand is NaN + VFCMP_S(q0, v0, v1, cUN); + VANDN_V(v0, q0, v0); + VAND_V(q0, q0, v1); + VOR_V(v0, v0, q0); + } + VFMIN_S(v0, v0, v1); + break; case 0x5E: INST_NAME("DIVPS Gx, Ex"); nextop = F8; @@ -433,6 +535,21 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(v0, 1); VFDIV_S(v0, v0, q0); break; + case 0x5F: + INST_NAME("MAXPS Gx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETEX(v1, 0, 0); + if (!box64_dynarec_fastnan && v0 != v1) { + q0 = fpu_get_scratch(dyn); + // always copy from v1 if any oprand is NaN + VFCMP_S(q0, v0, v1, cUN); + VANDN_V(v0, q0, v0); + VAND_V(q0, q0, v1); + VOR_V(v0, v0, q0); + } + VFMAX_S(v0, v0, v1); + break; #define GO(GETFLAGS, NO, YES, F, I) \ if (box64_dynarec_test == 2) { NOTEST(x1); } \ @@ -556,6 +673,20 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else switch ((nextop >> 3) & 7) { + case 2: + INST_NAME("LDMXCSR Md"); + GETED(0); + ST_W(ed, xEmu, offsetof(x64emu_t, mxcsr)); + if (box64_sse_flushto0) { + // TODO + } + break; + case 3: + INST_NAME("STMXCSR Md"); + addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); + LD_WU(x4, xEmu, offsetof(x64emu_t, mxcsr)); + ST_W(x4, wback, fixedaddress); + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index 8e7764d0..3d27d369 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -103,6 +103,15 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_or16(dyn, ninst, x1, x2, x4, x5); GWBACK; break; + case 0x0D: + INST_NAME("OR AX, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + BSTRPICK_D(x1, xRAX, 15, 0); + MOV32w(x2, i32); + emit_or16(dyn, ninst, x1, x2, x3, x4); + BSTRINS_D(xRAX, x1, 15, 0); + break; case 0x0F: switch (rep) { case 0: addr = dynarec64_660F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; @@ -211,6 +220,27 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_cmp16_0(dyn, ninst, x1, x3, x4); } break; + case 0x69: + case 0x6B: + if (opcode == 0x69) { + INST_NAME("IMUL Gw,Ew,Iw"); + } else { + INST_NAME("IMUL Gw,Ew,Ib"); + } + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + GETSEW(x1, (opcode == 0x69) ? 2 : 1); + if (opcode == 0x69) + i32 = F16S; + else + i32 = F8S; + MOV32w(x2, i32); + MUL_W(x2, x2, x1); + UFLAG_RES(x2); + gd = x2; + GWBACK; + UFLAG_DF(x1, d_imul16); + break; case 0x81: case 0x83: nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index c9057c1e..5768cc62 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -277,6 +277,76 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int DEFAULT; } break; + case 0x3A: // these are some more SSSE3+ opcodes + opcode = F8; + switch (opcode) { + case 0x0F: + INST_NAME("PALIGNR Gx, Ex, Ib"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 1); + u8 = F8; + if (u8 > 31) { + VXOR_V(q0, q0, q0); + } else if (u8 > 15) { + VBSRL_V(q0, q0, u8 - 16); + } else if (!u8) { + VOR_V(q0, q1, q1); + } else { + d0 = fpu_get_scratch(dyn); + VBSLL_V(q0, q0, 16 - u8); + VBSRL_V(d0, q1, u8); + VOR_V(q0, q0, d0); + } + break; + case 0x44: + INST_NAME("PCLMULQDQ Gx, Ex, Ib"); + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); // gx + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + sse_forget_reg(dyn, ninst, ed); + MOV32w(x2, ed); + MOV32w(x3, 0); // p = NULL + } else { + MOV32w(x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x5, &fixedaddress, rex, NULL, 0, 1); + if (ed != x3) { + MV(x3, ed); + } + } + u8 = F8; + MOV32w(x4, u8); + CALL(native_pclmul, -1); + break; + case 0xDF: + INST_NAME("AESKEYGENASSIST Gx, Ex, Ib"); // AES-NI + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); // gx + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + sse_forget_reg(dyn, ninst, ed); + MOV32w(x2, ed); + MOV32w(x3, 0); // p = NULL + } else { + MOV32w(x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 1); + if (ed != x3) { + MV(x3, ed); + } + } + u8 = F8; + MOV32w(x4, u8); + CALL(native_aeskeygenassist, -1); + break; + default: + DEFAULT; + } + break; case 0x54: INST_NAME("ANDPD Gx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_67.c b/src/dynarec/la64/dynarec_la64_67.c new file mode 100644 index 00000000..5b81eb9d --- /dev/null +++ b/src/dynarec/la64/dynarec_la64_67.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "la64_emitter.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "la64_printer.h" +#include "dynarec_la64_private.h" +#include "dynarec_la64_helper.h" +#include "dynarec_la64_functions.h" + +uintptr_t dynarec64_67(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop; + uint8_t gd, ed, wback, wb, wb1, wb2, gb1, gb2, eb1, eb2; + int64_t fixedaddress; + int unscaled; + int8_t i8; + uint8_t u8; + int32_t i32; + int64_t j64, i64; + int cacheupd = 0; + int lock; + int v0, v1, s0; + MAYUSE(i32); + MAYUSE(j64); + MAYUSE(v0); + MAYUSE(v1); + MAYUSE(s0); + MAYUSE(lock); + MAYUSE(cacheupd); + + if(rex.is32bits) { + // should do a different file + DEFAULT; + return addr; + } + + GETREX(); + + rep = 0; + while((opcode==0xF2) || (opcode==0xF3)) { + rep = opcode-0xF1; + opcode = F8; + } + + switch(opcode) { + case 0x89: + INST_NAME("MOV Ed, Gd"); + nextop = F8; + GETGD; + if (MODREG) { // reg <= reg + MVxw(TO_LA64((nextop & 7) + (rex.b << 3)), gd); + } else { // mem <= reg + addr = geted32(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); + SDxw(gd, ed, fixedaddress); + SMWRITELOCK(lock); + } + break; + case 0xF7: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 4: + INST_NAME("MUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + GETED32(0); + if (rex.w) { + if (ed == xRDX) + gd = x3; + else + gd = xRDX; + MULH_DU(gd, xRAX, ed); + MUL_D(xRAX, xRAX, ed); + if (gd != xRDX) MV(xRDX, gd); + } else { + MUL_D(xRDX, xRAX, ed); // 64 <- 32x32 + AND(xRAX, xRDX, xMASK); + SRLI_W(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + UFLAG_DF(x2, rex.w ? d_mul64 : d_mul32); + break; + default: + DEFAULT; + } + break; + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index 1fe8aba8..cff14cc8 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -751,3 +751,61 @@ void emit_rol32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s OR(xFlags, xFlags, s3); } } + + +// emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_rol32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) +{ + if (!c) return; + + IFX (X_PEND) { + MOV32w(s3, c); + SDxw(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w ? d_rol64 : d_rol32); + } else IFX (X_ALL) { + SET_DFNONE(); + } + if (!c) { + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + if (la64_lbt) { + IFX (X_CF | X_OF) { + if (rex.w) + X64_ROTLI_D(s1, c); + else + X64_ROTLI_W(s1, c); + } + } + + ROTRIxw(s1, s1, (rex.w ? 64 : 32) - c); + + if (!rex.w) ZEROUP(s1); + + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) return; + + IFX (X_CF | X_OF) { + MOV64x(s3, (1UL << F_CF | 1UL << F_OF)); + ANDN(xFlags, xFlags, s3); + } + IFX (X_CF | X_OF) { + ANDI(s4, s1, 1 << F_CF); + IFX (X_CF) OR(xFlags, xFlags, s4); + } + IFX (X_OF) { + // the OF flag is set to the exclusive OR of the CF bit (after the rotate) and the most-significant bit of the result. + if (c == 1) { + SRLIxw(s3, s1, rex.w ? 63 : 31); + XOR(s3, s3, s4); + SLLI_D(s3, s3, F_OF); + OR(xFlags, xFlags, s3); + } + } +} \ No newline at end of file diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index 85bd950c..f6b6a72e 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -324,6 +324,150 @@ static uintptr_t geted_32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_ return addr; } +/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ +uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta) +{ + MAYUSE(dyn); + MAYUSE(ninst); + MAYUSE(delta); + + int lock = l ? ((l == LOCK_LOCK) ? 1 : 2) : 0; + if (lock == 2) + *l = 0; + uint8_t ret = x2; + *fixaddress = 0; + if (hint > 0) ret = hint; + int maxval = 2047; + if (i12 > 1) + maxval -= i12; + MAYUSE(scratch); + if (!(nextop & 0xC0)) { + if ((nextop & 7) == 4) { + uint8_t sib = F8; + int sib_reg = ((sib >> 3) & 0x7) + (rex.x << 3); + int sib_reg2 = (sib & 0x7) + (rex.b << 3); + if ((sib & 0x7) == 5) { + int64_t tmp = F32S; + if (sib_reg != 4) { + if (tmp && ((tmp < -2048) || (tmp > maxval) || !i12)) { + MOV64x(scratch, tmp); + if ((sib >> 6)) { + SLLI_D(ret, TO_LA64(sib_reg), sib >> 6); + ADD_W(ret, ret, scratch); + } else + ADD_W(ret, TO_LA64(sib_reg), scratch); + } else { + if (sib >> 6) + SLLI_D(ret, TO_LA64(sib_reg), (sib >> 6)); + else + ret = TO_LA64(sib_reg); + *fixaddress = tmp; + } + } else { + switch (lock) { + case 1: addLockAddress(tmp); break; + case 2: + if (isLockAddress(tmp)) *l = 1; + break; + } + MOV64x(ret, tmp); + } + } else { + if (sib_reg != 4) { + if ((sib >> 6)) { + SLLI_D(ret, TO_LA64(sib_reg), (sib >> 6)); + ADD_W(ret, ret, TO_LA64(sib_reg2)); + } else + ADD_W(ret, TO_LA64(sib_reg2), TO_LA64(sib_reg)); + } else { + ret = TO_LA64(sib_reg2); + } + } + } else if ((nextop & 7) == 5) { + uint32_t tmp = F32; + MOV32w(ret, tmp); + GETIP(addr + delta); + ADD_W(ret, ret, xRIP); + switch (lock) { + case 1: addLockAddress(addr + delta + tmp); break; + case 2: + if (isLockAddress(addr + delta + tmp)) *l = 1; + break; + } + } else { + ret = TO_LA64((nextop & 7) + (rex.b << 3)); + if (ret == hint) { + AND(hint, ret, xMASK); // to clear upper part + } + } + } else { + int64_t i64; + uint8_t sib = 0; + int sib_reg = 0; + if ((nextop & 7) == 4) { + sib = F8; + sib_reg = ((sib >> 3) & 7) + (rex.x << 3); + } + int sib_reg2 = (sib & 0x07) + (rex.b << 3); + if (nextop & 0x80) + i64 = F32S; + else + i64 = F8S; + if (i64 == 0 || ((i64 >= -2048) && (i64 <= 2047) && i12)) { + *fixaddress = i64; + if ((nextop & 7) == 4) { + if (sib_reg != 4) { + if (sib >> 6) { + SLLI_D(ret, TO_LA64(sib_reg), (sib >> 6)); + ADD_W(ret, ret, TO_LA64(sib_reg2)); + } else + ADD_W(ret, TO_LA64(sib_reg2), TO_LA64(sib_reg)); + } else { + ret = TO_LA64(sib_reg2); + } + } else { + ret = TO_LA64((nextop & 0x07) + (rex.b << 3)); + } + } else { + if (i64 >= -2048 && i64 <= 2047) { + if ((nextop & 7) == 4) { + if (sib_reg != 4) { + if (sib >> 6) { + SLLI_D(scratch, TO_LA64(sib_reg), sib >> 6); + ADD_W(scratch, scratch, TO_LA64(sib_reg2)); + } else + ADD_W(scratch, TO_LA64(sib_reg2), TO_LA64(sib_reg)); + } else { + scratch = TO_LA64(sib_reg2); + } + } else + scratch = TO_LA64((nextop & 0x07) + (rex.b << 3)); + ADDI_W(ret, scratch, i64); + } else { + MOV32w(scratch, i64); + if ((nextop & 7) == 4) { + if (sib_reg != 4) { + ADD_W(scratch, scratch, TO_LA64(sib_reg2)); + if (sib >> 6) { + SLLI_D(ret, TO_LA64(sib_reg), (sib >> 6)); + ADD_W(ret, ret, scratch); + } else + ADD_W(ret, scratch, TO_LA64(sib_reg)); + } else { + PASS3(int tmp = TO_LA64(sib_reg2)); + ADD_W(ret, tmp, scratch); + } + } else { + PASS3(int tmp = TO_LA64((nextop & 0x07) + (rex.b << 3))); + ADD_W(ret, tmp, scratch); + } + } + } + } + *ed = ret; + return addr; +} + void jump_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst) { MAYUSE(dyn); @@ -652,6 +796,27 @@ int sse_get_reg_empty(dynarec_la64_t* dyn, int ninst, int s1, int a) dyn->lsx.ssecache[a].write = 1; // it will be write... return dyn->lsx.ssecache[a].reg; } +// forget ext register for a SSE reg, does nothing if the regs is not loaded +void sse_forget_reg(dynarec_la64_t* dyn, int ninst, int a) +{ + if (dyn->lsx.ssecache[a].v == -1) + return; + if (dyn->lsx.lsxcache[dyn->lsx.ssecache[a].reg].t == LSX_CACHE_XMMW) { + VST(dyn->lsx.ssecache[a].reg, xEmu, offsetof(x64emu_t, xmm[a])); + } + fpu_free_reg(dyn, dyn->lsx.ssecache[a].reg); + dyn->lsx.ssecache[a].v = -1; + return; +} + +void sse_reflect_reg(dynarec_la64_t* dyn, int ninst, int a) +{ + if (dyn->lsx.ssecache[a].v == -1) + return; + if (dyn->lsx.lsxcache[dyn->lsx.ssecache[a].reg].t == LSX_CACHE_XMMW) { + VST(dyn->lsx.ssecache[a].reg, xEmu, offsetof(x64emu_t, xmm[a])); + } +} // purge the SSE cache for XMM0..XMM7 (to use before function native call) void sse_purge07cache(dynarec_la64_t* dyn, int ninst, int s1) diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 14ad25d2..9302d252 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -111,6 +111,17 @@ LDz(x1, wback, fixedaddress); \ ed = x1; \ } +// GETED32 can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +#define GETED32(D) \ + if (MODREG) { \ + ed = TO_LA64((nextop & 7) + (rex.b << 3)); \ + wback = 0; \ + } else { \ + SMREAD(); \ + addr = geted32(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + LDxw(x1, wback, fixedaddress); \ + ed = x1; \ + } // GETEDH can use hint for ed, and x1 or x2 for wback (depending on hint), might also use x3. wback is 0 if ed is xEAX..xEDI #define GETEDH(hint, D) \ if (MODREG) { \ @@ -727,6 +738,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define dynarec64_0F STEPNAME(dynarec64_0F) #define dynarec64_64 STEPNAME(dynarec64_64) #define dynarec64_66 STEPNAME(dynarec64_66) +#define dynarec64_67 STEPNAME(dynarec64_67) #define dynarec64_F30F STEPNAME(dynarec64_F30F) #define dynarec64_660F STEPNAME(dynarec64_660F) #define dynarec64_F0 STEPNAME(dynarec64_F0) @@ -800,6 +812,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_sar32c STEPNAME(emit_sar32c) #define emit_ror32c STEPNAME(emit_ror32c) #define emit_rol32 STEPNAME(emit_rol32) +#define emit_rol32c STEPNAME(emit_rol32c) #define emit_pf STEPNAME(emit_pf) @@ -809,6 +822,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define sse_purge07cache STEPNAME(sse_purge07cache) #define sse_get_reg STEPNAME(sse_get_reg) #define sse_get_reg_empty STEPNAME(sse_get_reg_empty) +#define sse_forget_reg STEPNAME(sse_forget_reg) +#define sse_reflect_reg STEPNAME(sse_reflect_reg) #define fpu_pushcache STEPNAME(fpu_pushcache) #define fpu_popcache STEPNAME(fpu_popcache) @@ -895,6 +910,7 @@ void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_rol32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_rol32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_pf(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4); @@ -923,6 +939,10 @@ void sse_purge07cache(dynarec_la64_t* dyn, int ninst, int s1); int sse_get_reg(dynarec_la64_t* dyn, int ninst, int s1, int a, int forwrite); // get lsx register for an SSE reg, but don't try to synch it if it needed to be created int sse_get_reg_empty(dynarec_la64_t* dyn, int ninst, int s1, int a); +// forget float register for a SSE reg, create the entry if needed +void sse_forget_reg(dynarec_la64_t* dyn, int ninst, int a); +// Push current value to the cache +void sse_reflect_reg(dynarec_la64_t* dyn, int ninst, int a); void CacheTransform(dynarec_la64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3); @@ -940,6 +960,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog); uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_67(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 6201eda8..83f5719c 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -1139,6 +1139,12 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VSIGNCOV_H(vd, vj, vk) EMIT(type_3R(0b01110001001011101, vk, vj, vd)) #define VSIGNCOV_W(vd, vj, vk) EMIT(type_3R(0b01110001001011110, vk, vj, vd)) #define VSIGNCOV_D(vd, vj, vk) EMIT(type_3R(0b01110001001011111, vk, vj, vd)) +#define VMSKLTZ_B(vd, vj) EMIT(type_2R(0b0111001010011100010000, vj, vd)) +#define VMSKLTZ_H(vd, vj) EMIT(type_2R(0b0111001010011100010001, vj, vd)) +#define VMSKLTZ_W(vd, vj) EMIT(type_2R(0b0111001010011100010010, vj, vd)) +#define VMSKLTZ_D(vd, vj) EMIT(type_2R(0b0111001010011100010011, vj, vd)) +#define VMSKGEZ_B(vd, vj) EMIT(type_2R(0b0111001010011100010100, vj, vd)) +#define VMSKNZ_B(vd, vj) EMIT(type_2R(0b0111001010011100011000, vj, vd)) #define VAND_V(vd, vj, vk) EMIT(type_3R(0b01110001001001100, vk, vj, vd)) #define VLDI(vd, imm13) EMIT(type_1RI13(0b01110011111000, imm13, vd)) #define VOR_V(vd, vj, vk) EMIT(type_3R(0b01110001001001101, vk, vj, vd)) @@ -1694,6 +1700,8 @@ LSX instruction starts with V, LASX instruction starts with XV. #define XVSLT_HU(vd, vj, vk) EMIT(type_3R(0b01110100000010001, vk, vj, vd)) #define XVSLT_WU(vd, vj, vk) EMIT(type_3R(0b01110100000010010, vk, vj, vd)) #define XVSLT_DU(vd, vj, vk) EMIT(type_3R(0b01110100000010011, vk, vj, vd)) +#define XVBSLL_V(vd, vj, imm5) EMIT(type_2RI5(0b01110110100011100, imm5, vj, vd)) +#define XVBSRL_V(vd, vj, imm5) EMIT(type_2RI5(0b01110110100011101, imm5, vj, vd)) #define XVPACKEV_B(vd, vj, vk) EMIT(type_3R(0b01110101000101100, vk, vj, vd)) #define XVPACKEV_H(vd, vj, vk) EMIT(type_3R(0b01110101000101101, vk, vj, vd)) #define XVPACKEV_W(vd, vj, vk) EMIT(type_3R(0b01110101000101110, vk, vj, vd)) -- cgit 1.4.1