diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-05-04 03:18:42 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-03 21:18:42 +0200 |
| commit | 1501592fa87cde05f22ea7ec3b2587ffe51134f9 (patch) | |
| tree | 7fb03c7c64215393706098456a8ab7e897b9f8c7 /src | |
| parent | 12467916a4ce5bf8042324f0dbb10a93da41d834 (diff) | |
| download | box64-1501592fa87cde05f22ea7ec3b2587ffe51134f9.tar.gz box64-1501592fa87cde05f22ea7ec3b2587ffe51134f9.zip | |
[LA64_DYNAREC] Added more opcodes (#1491)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 6 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 23 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 75 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_66.c | 76 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 33 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_math.c | 98 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_shift.c | 64 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f20f.c | 68 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 37 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 32 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 6 |
11 files changed, 512 insertions, 6 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 37ce5743..82669dcc 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -657,7 +657,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEM(q1, 0); ABS_32(q0, q1); break; - + case 0xC8: INST_NAME("SHA1NEXTE Gx, Ex"); nextop = F8; @@ -900,7 +900,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v1 = v0; else v1 = fpu_get_scratch(dyn); - } else + } else v1 = q1; if(v1!=v0) { VEXTQ_8(v1, q1, q1, 8); @@ -1916,7 +1916,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what } break; - + case 0xB3: INST_NAME("BTR Ed, Gd"); SETFLAGS(X_CF, SF_SUBSET); diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 6e5ec591..3eb172ab 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -655,6 +655,23 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); WBACK; break; + case 2: // ADC + if (opcode == 0x81) { + INST_NAME("ADC Ed, Id"); + } else { + INST_NAME("ADC Ed, Ib"); + } + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode == 0x81) ? 4 : 1); + if (opcode == 0x81) + i64 = F32S; + else + i64 = F8S; + MOV64xw(x5, i64); + emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x1); + WBACK; + break; case 4: // AND if (opcode == 0x81) { INST_NAME("AND Ed, Id"); @@ -1811,6 +1828,12 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MOV32w(x2, u8); emit_test8(dyn, ninst, x1, x2, x3, x4, x5); break; + case 2: + INST_NAME("NOT Eb"); + GETEB(x1, 0); + NOR(x1, x1, xZR); + EBBACK(); + break; case 3: INST_NAME("NEG Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 6fb729f4..8ddae253 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -104,6 +104,22 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LOAD_XEMU_REM(); jump_to_epilog(dyn, 0, xRIP, ninst); break; + case 0x10: + INST_NAME("MOVUPS Gx,Ex"); + nextop = F8; + GETG; + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v1 = sse_get_reg(dyn, ninst, x1, ed, 0); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VOR_V(v0, v1, v1); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + VLD(v0, ed, fixedaddress); + } + break; case 0x11: INST_NAME("MOVUPS Ex,Gx"); nextop = F8; @@ -142,6 +158,14 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(v0, 1); VILVL_W(v0, q0, v0); break; + case 0x15: + INST_NAME("UNPCKHPS Gx, Ex"); + nextop = F8; + SMREAD(); + GETEX(q0, 0, 0); + GETGX(v0, 1); + VILVH_W(v0, q0, v0); + break; case 0x16: nextop = F8; if (MODREG) { @@ -158,6 +182,21 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } VILVL_D(v0, v1, v0); // v0[127:64] = v1[63:0] break; + case 0x17: + nextop = F8; + INST_NAME("MOVHPS Ex,Gx"); + GETGX(v0, 0); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1); + VEXTRINS_D(v1, v0, 0x01); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + v1 = fpu_get_scratch(dyn); + VEXTRINS_D(v1, v0, 0x01); + FST_D(v1, ed, fixedaddress); + SMWRITE2(); + } + break; case 0x18: nextop = F8; if ((nextop & 0xC0) == 0xC0) { @@ -283,6 +322,21 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni #undef GO + case 0x51: + INST_NAME("SQRTPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGX_empty(v0); + VFSQRT_S(v0, q0); + break; + case 0x52: + INST_NAME("RSQRTPS Gx, Ex"); + nextop = F8; + SKIPTEST(x1); + GETEX(q0, 0, 0); + GETGX_empty(q1); + VFRSQRT_S(q1, q0); + break; case 0x54: INST_NAME("ANDPS Gx, Ex"); nextop = F8; @@ -318,6 +372,20 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni VXOR_V(q0, q0, q1); } break; + case 0x58: + INST_NAME("ADDPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGX(v0, 1); + VFADD_S(v0, v0, q0); + break; + case 0x59: + INST_NAME("MULPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGX(v0, 1); + VFMUL_S(v0, v0, q0); + break; case 0x5A: INST_NAME("CVTPS2PD Gx, Ex"); nextop = F8; @@ -325,6 +393,13 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(q1, 1); VFCVTL_D_S(q1, q0); break; + case 0x5C: + INST_NAME("SUBPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGX(v0, 1); + VFSUB_S(v0, v0, q0); + break; #define GO(GETFLAGS, NO, YES, F, I) \ if (box64_dynarec_test == 2) { NOTEST(x1); } \ diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index fefd7056..3dd04ede 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -58,6 +58,15 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni return dynarec64_00(dyn, addr - 1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode switch (opcode) { + case 0x01: + INST_NAME("ADD Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_add16(dyn, ninst, x1, x2, x4, x5, x6); + EWBACK; + break; case 0x03: INST_NAME("ADD Gw, Ew"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -67,6 +76,15 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_add16(dyn, ninst, x1, x2, x3, x4, x6); GWBACK; break; + case 0x05: + INST_NAME("ADD AX, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + BSTRPICK_D(x1, xRAX, 15, 0); + MOV32w(x2, i32); + emit_add16(dyn, ninst, x1, x2, x3, x4, x6); + BSTRINS_D(xRAX, x1, 15, 0); + break; case 0x09: INST_NAME("OR Ew, Gw"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -93,6 +111,24 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_sbb16(dyn, ninst, x1, x2, x4, x5, x6); EWBACK; break; + case 0x21: + INST_NAME("AND Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_and16(dyn, ninst, x1, x2, x4, x5); + EWBACK; + break; + case 0x23: + INST_NAME("AND Gw, Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + emit_and16(dyn, ninst, x1, x2, x3, x4); + GWBACK; + break; case 0x25: INST_NAME("AND AX, Iw"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -411,6 +447,46 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMWRITELOCK(lock); } break; + case 0xD1: + case 0xD3: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 4: + case 6: + if (opcode == 0xD1) { + INST_NAME("SHL Ew, 1"); + MOV32w(x2, 1); + } else { + INST_NAME("SHL Ew, CL"); + ANDI(x2, xRCX, 0x1f); + BEQ_NEXT(x2, xZR); + } + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if (box64_dynarec_safeflags > 1) + MAYSETFLAGS(); + GETEW(x1, 0); + emit_shl16(dyn, ninst, x1, x2, x5, x4, x6); + EWBACK; + break; + default: + DEFAULT; + } + break; + case 0xF7: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 0: + case 1: + INST_NAME("TEST Ew, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, 2); + u16 = F16; + MOV32w(x2, u16); + emit_test16(dyn, ninst, x1, x2, x3, x4, x5); + break; + default: + DEFAULT; + } default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 3064397f..2929118a 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -248,6 +248,27 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int DEFAULT; } break; + case 0x54: + INST_NAME("ANDPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGX(v0, 1); + VAND_V(v0, v0, q0); + break; + case 0x57: + INST_NAME("XORPD Gx, Ex"); + nextop = F8; + GETG; + if (MODREG && ((nextop & 7) + (rex.b << 3) == gd)) { + // special case for XORPD Gx, Gx + q0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VXOR_V(q0, q0, q0); + } else { + q0 = sse_get_reg(dyn, ninst, x1, gd, 1); + GETEX(q1, 0, 0); + VXOR_V(q0, q0, q1); + } + break; case 0x5A: INST_NAME("CVTPD2PS Gx, Ex"); nextop = F8; @@ -601,6 +622,18 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int SMWRITE2(); } break; + case 0xAF: + INST_NAME("IMUL Gw,Ew"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + UFLAG_DF(x1, d_imul16); + GETSEW(x1, 0); + GETSGW(x2); + MUL_W(x2, x2, x1); + UFLAG_RES(x2); + BSTRPICK_D(x2, x2, 15, 0); + GWBACK; + break; case 0xBE: INST_NAME("MOVSX Gw, Eb"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index d9958f27..bee0acfa 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -1024,3 +1024,101 @@ void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ORI(xFlags, xFlags, 1 << F_ZF); } } + +// emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6) +{ + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, rex.w ? d_adc64 : d_adc32b); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX (X_ALL) { + if (rex.w) + X64_ADC_D(s1, s2); + else + X64_ADC_W(s1, s2); + } + if (rex.w) + ADC_D(s1, s1, s2); + else + ADC_W(s1, s1, s2); + + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + IFX (X_CF) { + if (rex.w) { + AND(s5, xMASK, s1); + AND(s4, xMASK, s2); + ADD_D(s5, s5, s4); // lo + SRLI_D(s3, s1, 0x20); + SRLI_D(s4, s2, 0x20); + ADD_D(s4, s4, s3); + SRLI_D(s5, s5, 0x20); + ADD_D(s5, s5, s4); // hi + SRAI_D(s6, s5, 0x20); + } else { + AND(s3, s1, xMASK); + AND(s4, s2, xMASK); + ADD_D(s5, s3, s4); + SRLI_D(s6, s5, 0x20); + } + } + IFX (X_AF | X_OF) { + OR(s4, s1, s2); // s4 = op1 | op2 + AND(s5, s1, s2); // s5 = op1 & op2 + } + + ADDxw(s1, s1, s2); + ANDI(s3, xFlags, 1 << F_CF); + ADDxw(s1, s1, s3); + + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + + CLEAR_FLAGS(s3); + IFX (X_CF) { + BEQZ(s6, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + IFX (X_AF | X_OF) { + ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2) + OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX (X_AF) { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX (X_OF) { + SRLI_D(s3, s3, rex.w ? 62 : 30); + SRLI_D(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF); + } + } + IFX (X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } +} diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index 6bea6301..9820a754 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -22,6 +22,70 @@ #include "dynarec_la64_functions.h" #include "dynarec_la64_helper.h" +// emit SHL16 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch +void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + // s2 is not 0 here and is 1..1f/3f + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, op1)); + ST_H(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_shl16); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX (X_ALL) { + X64_SLL_H(s1, s2); + } + SLL_D(s1, s1, s2); + BSTRPICK_D(s1, s1, 15, 0); + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + + return; + } + + SLL_D(s1, s1, s2); + + CLEAR_FLAGS(s3); + IFX (X_CF | X_OF) { + SRLI_D(s5, s1, 16); + ANDI(s5, s5, 1); // LSB == F_CF + IFX (X_CF) { + OR(xFlags, xFlags, s5); + } + } + + SLLI_D(s1, s1, 48); + IFX (X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + SRLI_D(s1, s1, 48); + + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_OF) { + // OF flag is affected only on 1-bit shifts + ADDI_D(s3, s2, -1); + BNEZ(s3, 4 + 4 * 4); + SRLI_D(s3, s1, 15); + XOR(s3, s3, s5); + SLLI_D(s3, s3, F_OF); + OR(xFlags, xFlags, s3); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c index 903d7e58..e065c569 100644 --- a/src/dynarec/la64/dynarec_la64_f20f.c +++ b/src/dynarec/la64/dynarec_la64_f20f.c @@ -93,6 +93,65 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } VEXTRINS_D(v0, d1, 0); break; + case 0x2C: + INST_NAME("CVTTSD2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEXSD(q0, 0, 0); + if (!box64_dynarec_fastround) { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + } + d1 = fpu_get_scratch(dyn); + if (rex.w) { + FTINTRZ_L_D(d1, q0); + MOVFR2GR_D(gd, d1); + } else { + FTINTRZ_W_D(d1, q0); + MOVFR2GR_S(gd, d1); + } + if (!rex.w) ZEROUP(gd); + if (!box64_dynarec_fastround) { + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + CBZ_NEXT(x5); + if (rex.w) { + MOV64x(gd, 0x8000000000000000LL); + } else { + MOV32w(gd, 0x80000000); + } + } + break; + case 0x2D: + INST_NAME("CVTSD2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEXSD(q0, 0, 0); + if (!box64_dynarec_fastround) { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + } + d1 = fpu_get_scratch(dyn); + u8 = sse_setround(dyn, ninst, x2, x3); + if (rex.w) { + FTINT_L_D(d1, q0); + MOVFR2GR_D(gd, d1); + } else { + FTINT_W_D(d1, q0); + MOVFR2GR_S(gd, d1); + } + x87_restoreround(dyn, ninst, u8); + if (!box64_dynarec_fastround) { + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + CBZ_NEXT(x5); + if (rex.w) { + MOV64x(gd, 0x8000000000000000LL); + } else { + MOV32w(gd, 0x80000000); + } + } + break; case 0x58: INST_NAME("ADDSD Gx, Ex"); nextop = F8; @@ -127,6 +186,15 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int MARK; VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0] break; + case 0x5A: + INST_NAME("CVTSD2SS Gx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETEXSD(d0, 0, 0); + d1 = fpu_get_scratch(dyn); + FCVT_S_D(d1, d0); + VEXTRINS_W(v0, d1, 0); + break; case 0x5C: INST_NAME("SUBSD Gx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index 8965002d..9c59e59c 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -585,6 +585,43 @@ void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st) // TODO } +// Set rounding according to mxcsr flags, return reg to restore flags +int sse_setround(dynarec_la64_t* dyn, int ninst, int s1, int s2) +{ + MAYUSE(dyn); + MAYUSE(ninst); + MAYUSE(s1); + MAYUSE(s2); + LD_W(s1, xEmu, offsetof(x64emu_t, mxcsr)); + SRLI_D(s1, s1, 13); + ANDI(s1, s1, 0b11); + // MMX/x87 Round mode: 0..3: Nearest, Down, Up, Chop + // LA64: 0..3: Nearest, TowardZero, TowardsPositive, TowardsNegative + // 0->0, 1->3, 2->2, 3->1 + BEQ(s1, xZR, 32); + ADDI_D(s2, xZR, 2); + BEQ(s1, s2, 24); + ADDI_D(s2, xZR, 3); + BEQ(s1, s2, 12); + ADDI_D(s1, xZR, 3); + B(8); + ADDI_D(s1, xZR, 1); + // done + SLLI_D(s1, s1, 8); + MOVFCSR2GR(s2, FCSR3); + MOVGR2FCSR(FCSR3, s1); // exange RM with current + return s2; +} + +// Restore round flag +void x87_restoreround(dynarec_la64_t* dyn, int ninst, int s1) +{ + MAYUSE(dyn); + MAYUSE(ninst); + MAYUSE(s1); + MOVGR2FCSR(FCSR3, s1); +} + // SSE / SSE2 helpers // get lsx register for a SSE reg, create the entry if needed int sse_get_reg(dynarec_la64_t* dyn, int ninst, int s1, int a, int forwrite) diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index d7bf5efd..bebc24c6 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -176,6 +176,11 @@ addr = fakeed(dyn, addr, ninst, nextop); \ } +// GETGW extract x64 register in gd, that is i, Signed extented +#define GETSGW(i) \ + gd = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3)); \ + EXT_W_H(i, gd); \ + gd = i; // Write back ed in wback (if wback not 0) #define WBACK \ @@ -186,7 +191,20 @@ ST_W(ed, wback, fixedaddress); \ SMWRITE(); \ } - +// GETSEW will use i for ed, and can use r3 for wback. This is the Signed version +#define GETSEW(i, D) \ + if (MODREG) { \ + wback = TO_LA64((nextop & 7) + (rex.b << 3)); \ + EXT_W_H(i, wback); \ + ed = i; \ + wb1 = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, i, &fixedaddress, rex, NULL, 1, D); \ + LD_H(i, wback, fixedaddress); \ + ed = i; \ + wb1 = 1; \ + } // Write w back to original register / memory (w needs to be 16bits only!) #define EWBACKW(w) \ if (wb1) { \ @@ -734,6 +752,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_add8 STEPNAME(emit_add8) #define emit_add8c STEPNAME(emit_add8c) #define emit_add16 STEPNAME(emit_add16) +#define emit_adc32 STEPNAME(emit_adc32) #define emit_sub16 STEPNAME(emit_sub16) #define emit_sub32 STEPNAME(emit_sub32) #define emit_sub32c STEPNAME(emit_sub32c) @@ -760,6 +779,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_and16 STEPNAME(emit_and16) #define emit_and32 STEPNAME(emit_and32) #define emit_and32c STEPNAME(emit_and32c) +#define emit_shl16 STEPNAME(emit_shl16) #define emit_shl32 STEPNAME(emit_shl32) #define emit_shl32c STEPNAME(emit_shl32c) #define emit_shr8 STEPNAME(emit_shr8) @@ -771,7 +791,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_pf STEPNAME(emit_pf) - +#define x87_restoreround STEPNAME(x87_restoreround) +#define sse_setround STEPNAME(sse_setround) #define x87_forget STEPNAME(x87_forget) #define sse_purge07cache STEPNAME(sse_purge07cache) #define sse_get_reg STEPNAME(sse_get_reg) @@ -818,6 +839,7 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4); void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6); void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5); @@ -844,6 +866,7 @@ void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s void emit_and16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); +void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); @@ -866,7 +889,10 @@ void fpu_reflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3); void fpu_unreflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3); void fpu_pushcache(dynarec_la64_t* dyn, int ninst, int s1, int not07); void fpu_popcache(dynarec_la64_t* dyn, int ninst, int s1, int not07); - +// Restore round flag +void x87_restoreround(dynarec_la64_t* dyn, int ninst, int s1); +// Set rounding according to mxcsr flags, return reg to restore flags +int sse_setround(dynarec_la64_t* dyn, int ninst, int s1, int s2); // refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached) void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 9951f070..29713ca6 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -1246,6 +1246,12 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VFMAXA_D(vd, vj, vk) EMIT(type_3R(0b01110001010000010, vk, vj, vd)) #define VFMINA_S(vd, vj, vk) EMIT(type_3R(0b01110001010000101, vk, vj, vd)) #define VFMINA_D(vd, vj, vk) EMIT(type_3R(0b01110001010000110, vk, vj, vd)) +#define VFSQRT_S(vd, vj) EMIT(type_2R(0b0111001010011100111001, vj, vd)) +#define VFSQRT_D(vd, vj) EMIT(type_2R(0b0111001010011100111010, vj, vd)) +#define VFRECIP_S(vd, vj) EMIT(type_2R(0b0111001010011100111101, vj, vd)) +#define VFRECIP_D(vd, vj) EMIT(type_2R(0b0111001010011100111110, vj, vd)) +#define VFRSQRT_S(vd, vj) EMIT(type_2R(0b0111001010011101000001, vj, vd)) +#define VFRSQRT_D(vd, vj) EMIT(type_2R(0b0111001010011101000010, vj, vd)) #define VFCVTL_S_H(vd, vj) EMIT(type_2R(0b0111001010011101111010, vj, vd)) #define VFCVTH_S_H(vd, vj) EMIT(type_2R(0b0111001010011101111011, vj, vd)) #define VFCVTL_D_S(vd, vj) EMIT(type_2R(0b0111001010011101111100, vj, vd)) |