diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-04-06 20:45:22 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-06 14:45:22 +0200 |
| commit | b96139274fcb83be3e9085a1a06084364c938bc5 (patch) | |
| tree | 12df4a027bcc17a1f5ffab665eb67263c32d5655 /src | |
| parent | d84faf57ab384344017f57c1b1d261352a320bab (diff) | |
| download | box64-b96139274fcb83be3e9085a1a06084364c938bc5.tar.gz box64-b96139274fcb83be3e9085a1a06084364c938bc5.zip | |
[LA64_DYNAREC] Added more opcodes (#1425)
* Added 0B OR opcode * Added D3 /7 SAR opcode * Added D3 /5 SHR opcode * Added 80 /1 OR opcode * Addeded 66 0F BE MOVSX opcode * Fixed SRAxw * Fix * Added 0F C8..CF BSWAP opcode * Added more opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 56 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 12 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 27 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_logic.c | 49 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_shift.c | 68 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 14 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 56 |
7 files changed, 272 insertions, 10 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 2100ae73..c32d1bc3 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -120,6 +120,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_or32(dyn, ninst, rex, ed, gd, x3, x4); WBACK; break; + case 0x0B: + INST_NAME("OR Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_or32(dyn, ninst, rex, gd, ed, x3, x4); + break; case 0x0D: INST_NAME("OR EAX, Id"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -520,6 +528,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x80: nextop = F8; switch((nextop>>3)&7) { + case 1: // OR + INST_NAME("OR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK(); + break; case 4: // AND INST_NAME("AND Eb, Ib"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -536,6 +552,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6); EBBACK(); break; + case 6: // XOR + INST_NAME("XOR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_xor8c(dyn, ninst, x1, u8, x2, x4); + EBBACK(); + break; case 7: // CMP INST_NAME("CMP Eb, Ib"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -785,6 +809,15 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ZEROUP(xRAX); } break; + case 0x99: + INST_NAME("CDQ"); + if (rex.w) { + SRAI_D(xRDX, xRAX, 63); + } else { + SRAI_W(xRDX, xRAX, 31); + BSTRPICK_D(xRDX, xRDX, 31, 0); + } + break; case 0xA0: INST_NAME("MOV AL,Ob"); if(rex.is32bits) u64 = F32; else u64 = F64; @@ -1239,6 +1272,29 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_shl32(dyn, ninst, rex, ed, x3, x5, x4, x6); WBACK; break; + case 5: + INST_NAME("SHR Ed, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); + GETED(0); + if (!rex.w && MODREG) { ZEROUP(ed); } + CBZ_NEXT(x3); + emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); + WBACK; + break; + case 7: + INST_NAME("SAR Ed, CL"); + SETFLAGS(X_ALL, SF_PENDING); + ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); + GETED(0); + if (!rex.w && MODREG) { ZEROUP(ed); } + CBZ_NEXT(x3); + UFLAG_OP12(ed, x3); + SRAxw(ed, ed, x3); + WBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, rex.w ? d_sar64 : d_sar32); + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index c6879562..6bd7c3d4 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -411,6 +411,18 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } if (!rex.w) ZEROUP(gd); break; + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + INST_NAME("BSWAP Reg"); + gd = TO_LA64((opcode & 7) + (rex.b << 3)); + REVBxw(gd, gd); + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 8a8a06fa..cdafe2e1 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -106,6 +106,33 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VLD(v0, ed, fixedaddress); } break; + case 0xBE: + INST_NAME("MOVSX Gw, Eb"); + nextop = F8; + GETGD; + if (MODREG) { + if (rex.rex) { + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + eb1 = ed; + eb2 = 0; + } else { + ed = (nextop & 7); + eb1 = TO_LA64(ed & 3); // Ax, Cx, Dx or Bx + eb2 = (ed & 4) >> 2; // L or H + } + if (eb2) { + SRLI_D(x1, eb1, eb2 * 8); + EXT_W_B(x1, x1); + } else { + EXT_W_B(x1, eb1); + } + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); + LD_B(x1, ed, fixedaddress); + } + BSTRINS_D(gd, x1, 15, 0); + break; case 0xEF: INST_NAME("PXOR Gx,Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c index 9f4ce0e1..b8f3e8be 100644 --- a/src/dynarec/la64/dynarec_la64_emit_logic.c +++ b/src/dynarec/la64/dynarec_la64_emit_logic.c @@ -22,6 +22,46 @@ #include "dynarec_la64_helper.h" +// emit XOR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) +{ + IFX (X_PEND) { + SET_DF(s4, d_xor8); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX (X_ALL) { + ADDI_D(s3, xZR, c & 0xff); + X64_XOR_B(s1, s3); + } + XORI(s1, s1, c & 0xff); + IFX (X_PEND) + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + return; + } + + XORI(s1, s1, c & 0xff); + ANDI(s1, s1, 0xff); + CLEAR_FLAGS(s3); + IFX (X_SF) { + SRLI_D(s3, s1, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit XOR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) { @@ -429,4 +469,11 @@ void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } -} \ No newline at end of file +} + +// emit OR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_or8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4) +{ + MOV32w(s2, c & 0xff); + emit_or8(dyn, ninst, s1, s2, s3, s4); +} diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index 3cf6d41d..883968f8 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -166,6 +166,74 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } } + +// emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch +void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + int64_t j64; + + + IFX (X_PEND) { + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, rex.w ? d_shr64 : d_shr32); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX (X_ALL) { + if (rex.w) + X64_SRL_D(s1, s2); + else + X64_SRL_W(s1, s2); + } + SRL_D(s1, s1, s2); + if (!rex.w) { ZEROUP(s1); } + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + CLEAR_FLAGS(s3); + IFX (X_CF) { + ADDI_D(s3, s2, -1); + SRA_D(s3, s1, s3); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); + } + IFX (X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + ADDI_D(s3, xZR, 1); + BEQ(s2, s3, 4 + 4 * 4); + SRLIxw(s3, s1, rex.w ? 63 : 31); + SLLI_D(s3, s3, F_OF); + OR(xFlags, xFlags, s3); + } + + SRL_D(s1, s1, s2); + + IFX (X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX (X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 481dbb68..49229c09 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -211,8 +211,12 @@ wb2 = (wback >> 2) * 8; \ wback = TO_LA64(wback & 3); \ } \ - if (wb2) { SRLI_D(i, wback, wb2); } \ - EXT_W_B(i, i); \ + if (wb2) { \ + SRLI_D(i, wback, wb2); \ + EXT_W_B(i, i); \ + } else { \ + EXT_W_B(i, wback); \ + } \ wb1 = 0; \ ed = i; \ } else { \ @@ -652,6 +656,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_or32 STEPNAME(emit_or32) #define emit_or32c STEPNAME(emit_or32c) #define emit_or8 STEPNAME(emit_or8) +#define emit_or8c STEPNAME(emit_or8c) +#define emit_xor8c STEPNAME(emit_xor8c) #define emit_xor32 STEPNAME(emit_xor32) #define emit_xor32c STEPNAME(emit_xor32c) #define emit_and8 STEPNAME(emit_and8) @@ -660,6 +666,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_and32c STEPNAME(emit_and32c) #define emit_shl32 STEPNAME(emit_shl32) #define emit_shl32c STEPNAME(emit_shl32c) +#define emit_shr32 STEPNAME(emit_shr32) #define emit_shr32c STEPNAME(emit_shr32c) #define emit_sar32c STEPNAME(emit_sar32c) #define emit_ror32c STEPNAME(emit_ror32c) @@ -720,6 +727,8 @@ void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_or8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4); +void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_xor32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); @@ -728,6 +737,7 @@ void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); +void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index eea358b7..49b3b985 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -247,7 +247,7 @@ f24-f31 fs0-fs7 Static registers Callee // GR[rd] = SRL(GR[rj][63:0], GR[rk][5:0]) #define SRL_D(rd, rj, rk) EMIT(type_3R(0b00000000000110010, rk, rj, rd)) // GR[rd] = SRA(GR[rj][63:0], GR[rk][5:0]) -#define SLA_D(rd, rj, rk) EMIT(type_3R(0b00000000000110011, rk, rj, rd)) +#define SRA_D(rd, rj, rk) EMIT(type_3R(0b00000000000110011, rk, rj, rd)) // GR[rd] = ROTR(GR[rj][63:0], GR[rk][5:0]) #define ROTR_D(rd, rj, rk) EMIT(type_3R(0b00000000000110111, rk, rj, rd)) @@ -269,13 +269,25 @@ f24-f31 fs0-fs7 Static registers Callee // GR[rd] = ROTR(GR[rj][31:0], imm5) (Rotate To Right) #define ROTRI_W(rd, rj, imm5) EMIT(type_2RI5(0b00000000010011001, imm5, rj, rd)) +#define SRAxw(rd, rj, rk) \ + do { \ + if (rex.w) { \ + SRA_D(rd, rj, rk); \ + } else { \ + SRA_W(rd, rj, rk); \ + ZEROUP(rd); \ + } \ + } while (0) + // Shift Left Immediate -#define SLLIxw(rd, rs1, imm) \ - if (rex.w) { \ - SLLI_D(rd, rs1, imm); \ - } else { \ - SLLI_W(rd, rs1, imm); \ - } +#define SLLIxw(rd, rs1, imm) \ + do { \ + if (rex.w) { \ + SLLI_D(rd, rs1, imm); \ + } else { \ + SLLI_W(rd, rs1, imm); \ + } \ + } while (0) // Shift Right Logical Immediate #define SRLIxw(rd, rs1, imm) \ do { \ @@ -400,6 +412,36 @@ f24-f31 fs0-fs7 Static registers Callee // ZERO the upper part #define ZEROUP(rd) BSTRINS_D(rd, xZR, 63, 32); +#define CLO_W(rd, rj) EMIT(type_2R(0b0000000000000000000100, rj, rd)) +#define CLZ_W(rd, rj) EMIT(type_2R(0b0000000000000000000101, rj, rd)) +#define CTO_W(rd, rj) EMIT(type_2R(0b0000000000000000000110, rj, rd)) +#define CTZ_W(rd, rj) EMIT(type_2R(0b0000000000000000000111, rj, rd)) +#define CLO_D(rd, rj) EMIT(type_2R(0b0000000000000000001000, rj, rd)) +#define CLZ_D(rd, rj) EMIT(type_2R(0b0000000000000000001001, rj, rd)) +#define CTO_D(rd, rj) EMIT(type_2R(0b0000000000000000001010, rj, rd)) +#define CTZ_D(rd, rj) EMIT(type_2R(0b0000000000000000001011, rj, rd)) +#define REVB_2H(rd, rj) EMIT(type_2R(0b0000000000000000001100, rj, rd)) +#define REVB_4H(rd, rj) EMIT(type_2R(0b0000000000000000001101, rj, rd)) +#define REVB_2W(rd, rj) EMIT(type_2R(0b0000000000000000001110, rj, rd)) +#define REVB_D(rd, rj) EMIT(type_2R(0b0000000000000000001111, rj, rd)) +#define REVH_2W(rd, rj) EMIT(type_2R(0b0000000000000000010000, rj, rd)) +#define REVH_D(rd, rj) EMIT(type_2R(0b0000000000000000010001, rj, rd)) +#define BITREV_4B(rd, rj) EMIT(type_2R(0b0000000000000000010010, rj, rd)) +#define BITREV_8B(rd, rj) EMIT(type_2R(0b0000000000000000010011, rj, rd)) +#define BITREV_W(rd, rj) EMIT(type_2R(0b0000000000000000010100, rj, rd)) +#define BITREV_D(rd, rj) EMIT(type_2R(0b0000000000000000010101, rj, rd)) + +#define REVBxw(rd, rj) \ + do { \ + if (rex.w) { \ + REVB_D(rd, rj); \ + } else { \ + REVB_2W(rd, rj); \ + ZEROUP(rd); \ + } \ + } while (0) + + // GR[rd] = SignExtend(GR[rj][7:0], GRLEN) #define EXT_W_B(rd, rj) EMIT(type_2R(0b0000000000000000010111, rj, rd)) |