diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-05-03 18:41:49 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-03 12:41:49 +0200 |
| commit | 31dabf51aebe9825ab0c3110ebabd3e3125e927e (patch) | |
| tree | 523cdb915044e410bbc376e0571f3f99c3ad7bc9 /src | |
| parent | 22fd100b04409b3b9aebbdd6707e73802b22be50 (diff) | |
| download | box64-31dabf51aebe9825ab0c3110ebabd3e3125e927e.tar.gz box64-31dabf51aebe9825ab0c3110ebabd3e3125e927e.zip | |
[LA64_DYNAREC] Added more opcodes (#1490)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 72 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 18 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_66.c | 101 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 40 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_shift.c | 63 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f20f.c | 8 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 10 |
7 files changed, 296 insertions, 16 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 02bbbee2..6e5ec591 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -300,10 +300,19 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGD; GETED(0); emit_xor32(dyn, ninst, rex, ed, gd, x3, x4); - if(ed!=gd) { + if (ed != gd) { WBACK; } break; + case 0x32: + INST_NAME("XOR Gb, Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_xor8(dyn, ninst, x2, x1, x4, x5); + GBBACK(); + break; case 0x33: INST_NAME("XOR Gd, Ed"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -312,6 +321,12 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(0); emit_xor32(dyn, ninst, rex, gd, ed, x3, x4); break; + case 0x35: + INST_NAME("XOR EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4); + break; case 0x38: INST_NAME("CMP Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -320,12 +335,6 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGB(x2); emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5, x6); break; - case 0x35: - INST_NAME("XOR EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4); - break; case 0x39: INST_NAME("CMP Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -1048,6 +1057,31 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MOV64xw(x2, i64); emit_test32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); break; + case 0xAA: + if (rep) { + INST_NAME("REP STOSB"); + CBZ_NEXT(xRCX); + ANDI(x1, xFlags, 1 << F_DF); + BNEZ_MARK2(x1); + MARK; // Part with DF==0 + ST_B(xRAX, xRDI, 0); + ADDI_D(xRDI, xRDI, 1); + ADDI_D(xRCX, xRCX, -1); + BNEZ_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + ST_B(xRAX, xRDI, 0); + ADDI_D(xRDI, xRDI, -1); + ADDI_D(xRCX, xRCX, -1); + BNEZ_MARK2(xRCX); + // done + } else { + INST_NAME("STOSB"); + GETDIR(x3, x1, 1); + ST_B(xRAX, xRDI, 0); + ADD_D(xRDI, xRDI, x3); + } + break; case 0xAB: if (rep) { INST_NAME("REP STOSD"); @@ -1450,6 +1484,30 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } } break; + case 0xD0: + case 0xD2: // TODO: Jump if CL is 0 + nextop = F8; + switch ((nextop >> 3) & 7) { + case 5: + if (opcode == 0xD0) { + INST_NAME("SHR Eb, 1"); + MOV32w(x2, 1); + } else { + INST_NAME("SHR Eb, CL"); + ANDI(x2, xRCX, 0x1F); + BEQ_NEXT(x2, xZR); + } + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if (box64_dynarec_safeflags > 1) + MAYSETFLAGS(); + GETEB(x1, 0); + emit_shr8(dyn, ninst, x1, x2, x5, x4, x6); + EBBACK(); + break; + default: + DEFAULT; + } + break; case 0xD1: nextop = F8; switch ((nextop >> 3) & 7) { diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 5cc8ee71..6fb729f4 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -158,6 +158,24 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } VILVL_D(v0, v1, v0); // v0[127:64] = v1[63:0] break; + case 0x18: + nextop = F8; + if ((nextop & 0xC0) == 0xC0) { + INST_NAME("NOP (multibyte)"); + } else + switch ((nextop >> 3) & 7) { + case 0: + case 1: + case 2: + case 3: + INST_NAME("PREFETCHh Ed"); + FAKEED; + break; + default: + INST_NAME("NOP (multibyte)"); + FAKEED; + } + break; case 0x1F: INST_NAME("NOP (multibyte)"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index 6c4e54f9..a9a5dc68 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -67,6 +67,15 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_add16(dyn, ninst, x1, x2, x3, x4, x6); GWBACK; break; + case 0x09: + INST_NAME("OR Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_or16(dyn, ninst, x1, x2, x4, x2); + EWBACK; + break; case 0x0F: switch (rep) { case 0: addr = dynarec64_660F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; @@ -145,6 +154,18 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEW(x2, 0); emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6); break; + case 0x3D: + INST_NAME("CMP AX, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + BSTRPICK_D(x1, xRAX, 15, 0); + if (i32) { + MOV32w(x2, i32); + emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6); + } else { + emit_cmp16_0(dyn, ninst, x1, x3, x4); + } + break; case 0x81: case 0x83: nextop = F8; @@ -181,6 +202,22 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_or16(dyn, ninst, x1, x5, x2, x4); EWBACK; break; + case 4: // AND + if (opcode == 0x81) { + INST_NAME("AND Ew, Iw"); + } else { + INST_NAME("AND Ew, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode == 0x81) ? 2 : 1); + if (opcode == 0x81) + i16 = F16S; + else + i16 = F8S; + MOV64x(x5, i16); + emit_and16(dyn, ninst, x1, x5, x2, x4); + EWBACK; + break; case 5: // SUB if (opcode == 0x81) { INST_NAME("SUB Ew, Iw"); @@ -260,6 +297,31 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BSTRINS_D(gd, x2, 15, 0); } break; + case 0xAB: + if (rep) { + INST_NAME("REP STOSW"); + CBZ_NEXT(xRCX); + ANDI(x1, xFlags, 1 << F_DF); + BNEZ_MARK2(x1); + MARK; // Part with DF==0 + ST_H(xRAX, xRDI, 0); + ADDI_D(xRDI, xRDI, 2); + ADDI_D(xRCX, xRCX, -1); + BNEZ_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + ST_H(xRAX, xRDI, 0); + ADDI_D(xRDI, xRDI, -2); + ADDI_D(xRCX, xRCX, -1); + BNEZ_MARK2(xRCX); + // done + } else { + INST_NAME("STOSW"); + GETDIR(x3, x1, 2); + ST_H(xRAX, xRDI, 0); + ADD_D(xRDI, xRDI, x3); + } + break; case 0xB8: case 0xB9: case 0xBA: @@ -277,6 +339,45 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xC1: nextop = F8; switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("ROL Ew, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF | X_CF, SF_SET_DF); + GETEW(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rol16, x1, x3); + EWBACK; + break; + case 1: + INST_NAME("ROR Ew, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF | X_CF, SF_SET_DF); + GETEW(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(ror16, x1, x3); + EWBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ew, Ib"); + UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); } + SETFLAGS(X_ALL, SF_PENDING); + GETEW(x1, 1); + u8 = F8; + UFLAG_IF { MOV32w(x2, (u8 & 15)); } + UFLAG_OP12(ed, x2) + if (MODREG) { + SLLI_D(ed, ed, 48 + (u8 & 15)); + SRLI_D(ed, ed, 48); + } else { + SLLI_D(ed, ed, u8 & 15); + } + EWBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_shl16); + break; case 5: INST_NAME("SHR Ew, Ib"); UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); } diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 3bac9e1f..3064397f 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -86,6 +86,46 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VLD(v0, ed, fixedaddress); } break; + case 0x2E: + // no special check... + case 0x2F: + if (opcode == 0x2F) { + INST_NAME("COMISD Gx, Ex"); + } else { + INST_NAME("UCOMISD Gx, Ex"); + } + SETFLAGS(X_ALL, SF_SET); + SET_DFNONE(); + nextop = F8; + GETGX(d0, 0); + GETEXSD(v0, 0, 0); + + CLEAR_FLAGS(x3); + // if isnan(d0) || isnan(v0) + IFX (X_ZF | X_PF | X_CF) { + FCMP_D(fcc0, d0, v0, cUN); + BCEQZ_MARK(fcc0); + ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); + B_MARK3_nocond; + } + MARK; + // else if isless(d0, v0) + IFX (X_CF) { + FCMP_D(fcc1, d0, v0, cLT); + BCEQZ_MARK2(fcc1); + ORI(xFlags, xFlags, 1 << F_CF); + B_MARK3_nocond; + } + MARK2; + // else if d0 == v0 + IFX (X_ZF) { + FCMP_D(fcc2, d0, v0, cEQ); + BCEQZ_MARK3(fcc2); + ORI(xFlags, xFlags, 1 << F_ZF); + } + MARK3; + SPILL_EFLAGS(); + break; case 0x38: // SSSE3 opcodes nextop = F8; switch (nextop) { diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index aa528b34..6bea6301 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -8,6 +8,7 @@ #include "dynarec.h" #include "emu/x64emu_private.h" #include "emu/x64run_private.h" +#include "la64_emitter.h" #include "x64run.h" #include "x64emu.h" #include "box64stack.h" @@ -169,6 +170,66 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } } +// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch +void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + int64_t j64; + + + IFX (X_PEND) { + ST_B(s2, xEmu, offsetof(x64emu_t, op2)); + ST_B(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_shr8); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX (X_ALL) { + X64_SRL_B(s1, s2); + } + SRL_D(s1, s1, s2); + ANDI(s1, s1, 0xff); + + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + CLEAR_FLAGS(s3); + IFX (X_CF) { + ADDI_D(s3, s2, -1); + SRA_D(s3, s1, s3); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); + } + IFX (X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + ADDI_D(s3, xZR, 1); + BNE(s2, s3, 4 + 3 * 4); + SRLI_D(s3, s1, 7); + SLLI_D(s3, s3, F_OF); + OR(xFlags, xFlags, s3); + } + + SRL_D(s1, s1, s2); + ANDI(s1, s1, 0xff); + + // SF should be unset + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) @@ -210,7 +271,7 @@ void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // OF flag is affected only on 1-bit shifts // OF flag is set to the most-significant bit of the original operand ADDI_D(s3, xZR, 1); - BEQ(s2, s3, 4 + 4 * 4); + BNE(s2, s3, 4 + 4 * 4); SRLIxw(s3, s1, rex.w ? 63 : 31); SLLI_D(s3, s3, F_OF); OR(xFlags, xFlags, s3); diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c index 6167883d..903d7e58 100644 --- a/src/dynarec/la64/dynarec_la64_f20f.c +++ b/src/dynarec/la64/dynarec_la64_f20f.c @@ -97,7 +97,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("ADDSD Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEXSD(v1, 0); + GETEXSD(v1, 0, 0); d0 = fpu_get_scratch(dyn); FADD_D(d0, v0, v1); if (!box64_dynarec_fastnan) { @@ -114,7 +114,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("MULSD Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEXSD(v1, 0); + GETEXSD(v1, 0, 0); d0 = fpu_get_scratch(dyn); FMUL_D(d0, v0, v1); if (!box64_dynarec_fastnan) { @@ -131,7 +131,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("SUBSD Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEXSD(v1, 0); + GETEXSD(v1, 0, 0); d0 = fpu_get_scratch(dyn); FSUB_D(d0, v0, v1); if (!box64_dynarec_fastnan) { @@ -148,7 +148,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("DIVSD Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEXSD(v1, 0); + GETEXSD(v1, 0, 0); d0 = fpu_get_scratch(dyn); FDIV_D(d0, v0, v1); if (!box64_dynarec_fastnan) { diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 5fa9d127..d7bf5efd 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -290,14 +290,14 @@ } // Get Ex as a double, not a quad (warning, x1 get used, x2 might too) -#define GETEXSD(a, D) \ +#define GETEXSD(a, w, D) \ if (MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0); \ + a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w); \ } else { \ - SMREAD(); \ + SMREAD(); /* TODO */ \ a = fpu_get_scratch(dyn); \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, D); \ - FLD_D(a, ed, fixedaddress); \ + FLD_D(a, ed, fixedaddress); \ } // Get Ex as a single, not a quad (warning, x1 get used) @@ -762,6 +762,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_and32c STEPNAME(emit_and32c) #define emit_shl32 STEPNAME(emit_shl32) #define emit_shl32c STEPNAME(emit_shl32c) +#define emit_shr8 STEPNAME(emit_shr8) #define emit_shr32 STEPNAME(emit_shr32) #define emit_shr32c STEPNAME(emit_shr32c) #define emit_sar32c STEPNAME(emit_sar32c) @@ -845,6 +846,7 @@ void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); +void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); |