diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-04-05 23:56:24 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-05 17:56:24 +0200 |
| commit | 6c5d35df6b79c823640874229fcfab93ea3eb45e (patch) | |
| tree | c6270e3c4250162e9915cfe217c8236ff03bc921 /src | |
| parent | 4d260217054f16e4efc31c9bef7974898f43252e (diff) | |
| download | box64-6c5d35df6b79c823640874229fcfab93ea3eb45e.tar.gz box64-6c5d35df6b79c823640874229fcfab93ea3eb45e.zip | |
[LA64_DYNAREC] Added more opcodes (#1417)
* Added 64 8B MOV opcode * Added 64 33 XOR opcode * Added 19 SBB opcode * Smol fix
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 16 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_64.c | 90 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_math.c | 74 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 26 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 27 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 34 |
6 files changed, 265 insertions, 2 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 56765fe1..d0cf7328 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -141,6 +141,16 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0x19: + INST_NAME("SBB Ed, Gd"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_sbb32(dyn, ninst, rex, ed, gd, x3, x4, x5); + WBACK; + break; case 0x20: INST_NAME("AND Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -336,6 +346,12 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } } break; + case 0x64: + addr = dynarec64_64(dyn, addr, ip, ninst, rex, rep, _FS, ok, need_epilog); + break; + case 0x65: + addr = dynarec64_64(dyn, addr, ip, ninst, rex, rep, _GS, ok, need_epilog); + break; case 0x66: addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; diff --git a/src/dynarec/la64/dynarec_la64_64.c b/src/dynarec/la64/dynarec_la64_64.c new file mode 100644 index 00000000..9dd03c01 --- /dev/null +++ b/src/dynarec/la64/dynarec_la64_64.c @@ -0,0 +1,90 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "custommem.h" + +#include "la64_printer.h" +#include "dynarec_la64_private.h" +#include "dynarec_la64_helper.h" +#include "dynarec_la64_functions.h" + +uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog) +{ + (void)ip; + (void)rep; + (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop; + uint8_t u8; + uint8_t gd, ed, eb1, eb2, gb1, gb2; + uint8_t gback, wback, wb1, wb2, wb; + int64_t i64, j64; + uint64_t u64; + int v0, v1; + int q0; + int d0; + int64_t fixedaddress, gdoffset; + int unscaled; + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(wb1); + MAYUSE(wb2); + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(j64); + MAYUSE(d0); + MAYUSE(q0); + MAYUSE(v0); + MAYUSE(v1); + + while ((opcode == 0xF2) || (opcode == 0xF3)) { + rep = opcode - 0xF1; + opcode = F8; + } + + GETREX(); + + switch (opcode) { + case 0x33: + INST_NAME("XOR Gd, Seg:Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop = F8; + GETGD; + GETEDO(x4, 0); + emit_xor32(dyn, ninst, rex, gd, ed, x3, x4); + break; + case 0x8B: + INST_NAME("MOV Gd, Seg:Ed"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop = F8; + GETGD; + if (MODREG) { // reg <= reg + MVxw(gd, TO_LA64((nextop & 7) + (rex.b << 3))); + } else { // mem <= reg + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + ADD_D(x4, ed, x4); + LDxw(gd, x4, fixedaddress); + } + break; + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index f47eb082..ce928edd 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -155,7 +155,10 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i if (la64_lbt) { IFX(X_ALL) { - X64_ADD_WU(s1, s2); + if (rex.w) + X64_ADD_DU(s1, s2); + else + X64_ADD_WU(s1, s2); } ADDxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); @@ -536,7 +539,11 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i if (la64_lbt) { IFX(X_PEND) {} else {MOV64xw(s2, c);} IFX(X_ALL) { - X64_SUB_WU(s1, s2); + if (rex.w) { + X64_SUB_WU(s1, s2); + } else { + X64_SUB_DU(s1, s2); + } } SUBxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); @@ -585,6 +592,69 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i } +// emit SBB32 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch +void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) +{ + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, rex.w ? d_sbb64 : d_sbb32); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + if (rex.w) { + SBC_W(s3, s1, s2); + } else { + SBC_D(s3, s1, s2); + } + IFX (X_ALL) { + if (rex.w) + X64_SBC_W(s1, s2); + else + X64_SBC_D(s1, s2); + } + MVxw(s1, s3); + + IFX (X_PEND) + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + return; + } + + IFX (X_AF | X_CF | X_OF) { + // for later flag calculation + NOR(s5, xZR, s1); + } + + SUBxw(s1, s1, s2); + ANDI(s3, xFlags, 1 << F_CF); + SUBxw(s1, s1, s3); + + CLEAR_FLAGS(s3); + IFX (X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + + CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w ? 64 : 32); + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + + // emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3) { diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index 7d8b9961..6c5b76b5 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -506,6 +506,32 @@ void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav dyn->last_ip = 0; } +void grab_segdata(dynarec_la64_t* dyn, uintptr_t addr, int ninst, int reg, int segment) +{ + (void)addr; + int64_t j64; + MAYUSE(j64); + MESSAGE(LOG_DUMP, "Get %s Offset\n", (segment == _FS) ? "FS" : "GS"); + int t1 = x1, t2 = x4; + if (reg == t1) ++t1; + if (reg == t2) ++t2; + LD_WU(t2, xEmu, offsetof(x64emu_t, segs_serial[segment])); + LD_D(reg, xEmu, offsetof(x64emu_t, segs_offs[segment])); + if (segment == _GS) { + CBNZ_MARKSEG(t2); // fast check + } else { + LD_D(t1, xEmu, offsetof(x64emu_t, context)); + LD_WU(t1, t1, offsetof(box64context_t, sel_serial)); + SUB_W(t1, t1, t2); + CBZ_MARKSEG(t1); + } + MOV64x(x1, segment); + call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 0, xFlags); + MARKSEG; + MESSAGE(LOG_DUMP, "----%s Offset\n", (segment == _FS) ? "FS" : "GS"); +} + + void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st) { // TODO diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index d3acc05f..3d91d68e 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -122,6 +122,18 @@ // GETEW will use i for ed, and can use r3 for wback. #define GETEW(i, D) GETEWW(x3, i, D) +// GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +#define GETEDO(O, D) \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + wback = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, D); \ + LDXxw(x1, wback, O); \ + ed = x1; \ + } + // FAKEED like GETED, but doesn't get anything #define FAKEED \ if (!MODREG) { \ @@ -304,6 +316,15 @@ j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ B(j64) +// Branch to MARKSEG if reg is 0 (use j64) +#define CBZ_MARKSEG(reg) \ + j64 = GETMARKSEG - (dyn->native_size); \ + BEQZ(reg, j64); +// Branch to MARKSEG if reg is not 0 (use j64) +#define CBNZ_MARKSEG(reg) \ + j64 = GETMARKSEG - (dyn->native_size); \ + BNEZ(reg, j64); + #define IFX(A) if ((dyn->insts[ninst].x64.gen_flags & (A))) #define IFXA(A, B) if ((dyn->insts[ninst].x64.gen_flags & (A)) && (B)) #define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || !dyn->insts[ninst].x64.gen_flags)) @@ -525,6 +546,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define dynarec64_00 STEPNAME(dynarec64_00) #define dynarec64_0F STEPNAME(dynarec64_0F) +#define dynarec64_64 STEPNAME(dynarec64_64) #define dynarec64_66 STEPNAME(dynarec64_66) #define dynarec64_F30F STEPNAME(dynarec64_F30F) #define dynarec64_660F STEPNAME(dynarec64_660F) @@ -538,6 +560,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define jump_to_next STEPNAME(jump_to_next) #define ret_to_epilog STEPNAME(ret_to_epilog) #define call_c STEPNAME(call_c) +#define grab_segdata STEPNAME(grab_segdata) #define emit_cmp16 STEPNAME(emit_cmp16) #define emit_cmp16_0 STEPNAME(emit_cmp16_0) #define emit_cmp32 STEPNAME(emit_cmp32) @@ -555,6 +578,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_sub32c STEPNAME(emit_sub32c) #define emit_sub8 STEPNAME(emit_sub8) #define emit_sub8c STEPNAME(emit_sub8c) +#define emit_sbb32 STEPNAME(emit_sbb32) #define emit_neg32 STEPNAME(emit_neg32) #define emit_or32 STEPNAME(emit_or32) #define emit_or32c STEPNAME(emit_or32c) @@ -599,6 +623,7 @@ void jump_to_epilog_fast(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst); void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits); void ret_to_epilog(dynarec_la64_t* dyn, int ninst, rex_t rex); void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); +void grab_segdata(dynarec_la64_t* dyn, uintptr_t addr, int ninst, int reg, int segment); void emit_cmp8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6); void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6); void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6); @@ -616,6 +641,7 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5); void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5); +void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3); void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); @@ -666,6 +692,7 @@ void CacheTransform(dynarec_la64_t* dyn, int ninst, int cacheupd, int s1, int s2 uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); +uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog); uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 291b28da..beca7c1b 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -594,6 +594,18 @@ f24-f31 fs0-fs7 Static registers Callee // MemoryStore(GR[rd][63:0], paddr, DOUBLEWORD) #define ST_D(rd, rj, imm12) EMIT(type_2RI12(0b0010100111, imm12, rj, rd)) +#define LDX_B(rd, rj, rk) EMIT(type_3R(0b00111000000000000, rk, rj, rd)) +#define LDX_H(rd, rj, rk) EMIT(type_3R(0b00111000000001000, rk, rj, rd)) +#define LDX_W(rd, rj, rk) EMIT(type_3R(0b00111000000010000, rk, rj, rd)) +#define LDX_D(rd, rj, rk) EMIT(type_3R(0b00111000000011000, rk, rj, rd)) +#define STX_B(rd, rj, rk) EMIT(type_3R(0b00111000000100000, rk, rj, rd)) +#define STX_H(rd, rj, rk) EMIT(type_3R(0b00111000000101000, rk, rj, rd)) +#define STX_W(rd, rj, rk) EMIT(type_3R(0b00111000000110000, rk, rj, rd)) +#define STX_D(rd, rj, rk) EMIT(type_3R(0b00111000000111000, rk, rj, rd)) +#define LDX_BU(rd, rj, rk) EMIT(type_3R(0b00111000001000000, rk, rj, rd)) +#define LDX_HU(rd, rj, rk) EMIT(type_3R(0b00111000001001000, rk, rj, rd)) +#define LDX_WU(rd, rj, rk) EMIT(type_3R(0b00111000001010000, rk, rj, rd)) + #define FLD_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101110, imm12, rj, fd)) #define FLD_S(fd, rj, imm12) EMIT(type_2RI12(0b0010101100, imm12, rj, fd)) #define FST_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101111, imm12, rj, fd)) @@ -1631,6 +1643,20 @@ LSX instruction starts with V, LASX instruction starts with XV. #define X64_ROTLI_D(rj, imm6) EMIT(type_2RI6(0x55, imm6, rj, 0x17)) #define X64_RCLI_D(rj, imm6) EMIT(type_2RI6(0x55, imm6, rj, 0x1b)) +// Warning, these are LBT addons that uses LBT4.eflags internally +#define ADC_B(rd, rj, rk) EMIT(type_3R(0x60, rk, rj, rd)) +#define ADC_H(rd, rj, rk) EMIT(type_3R(0x61, rk, rj, rd)) +#define ADC_W(rd, rj, rk) EMIT(type_3R(0x62, rk, rj, rd)) +#define ADC_D(rd, rj, rk) EMIT(type_3R(0x63, rk, rj, rd)) +#define SBC_B(rd, rj, rk) EMIT(type_3R(0x64, rk, rj, rd)) +#define SBC_H(rd, rj, rk) EMIT(type_3R(0x65, rk, rj, rd)) +#define SBC_W(rd, rj, rk) EMIT(type_3R(0x66, rk, rj, rd)) +#define SBC_D(rd, rj, rk) EMIT(type_3R(0x67, rk, rj, rd)) +#define RCR_B(rd, rj, rk) EMIT(type_3R(0x68, rk, rj, rd)) +#define RCR_H(rd, rj, rk) EMIT(type_3R(0x69, rk, rj, rd)) +#define RCR_W(rd, rj, rk) EMIT(type_3R(0x6a, rk, rj, rd)) +#define RCR_D(rd, rj, rk) EMIT(type_3R(0x6b, rk, rj, rd)) + //////////////////////////////////////////////////////////////////////////////// @@ -1738,6 +1764,14 @@ LSX instruction starts with V, LASX instruction starts with XV. LD_WU(rd, rj, imm12); \ } while (0) +#define LDXxw(rd, rj, rk) \ + do { \ + if (rex.w) \ + LDX_D(rd, rj, rk); \ + else \ + LDX_WU(rd, rj, rk); \ + } while (0) + #define LDz(rd, rj, imm12) \ do { \ if (rex.is32bits) \ |