From 0ba63ea272b8bc4dff24377d238d082df83f520c Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Tue, 5 Mar 2024 05:07:11 +0800 Subject: [LA64_DYNAREC] Added more opcodes (#1327) * [LA64_DYNAREC] Added F7 /2 NOT opcode * [LA64_DYNAREC] Added 09 OR opcode * [LA64_DYNAREC] Added 64 MOVSXD opcode * [LA64_DYNAREC] Added more instructions to the emitter and printer, and fixed typos * Remove unused code * [LA64_DYNAREC] Added D3 /4 /6 SHL opcode * [LA64_DYNAREC] Added 0D OR opcode * [LA64_DYNAREC] Fixed a bug when LBT is present * Revert --- src/dynarec/la64/dynarec_la64_00.c | 68 ++++++++++++++++++++++++++++++ src/dynarec/la64/dynarec_la64_emit_logic.c | 41 ++++++++++++++++++ src/dynarec/la64/dynarec_la64_emit_shift.c | 66 +++++++++++++++++++++++++++++ src/dynarec/la64/dynarec_la64_helper.h | 6 ++- src/dynarec/la64/la64_emitter.h | 13 +++++- src/dynarec/la64/la64_printer.c | 24 ++++++++++- src/dynarec/rv64/dynarec_rv64_00_3.c | 2 - 7 files changed, 213 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index cdc3d621..0b807e01 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -102,6 +102,21 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6); break; + case 0x09: + INST_NAME("OR Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_or32(dyn, ninst, rex, ed, gd, x3, x4); + WBACK; + break; + case 0x0D: + INST_NAME("OR EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + emit_or32c(dyn, ninst, rex, xRAX, i64, x3, x4); + break; case 0x0F: switch (rep) { case 0: @@ -231,6 +246,33 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni POP1z(gd); break; break; + case 0x63: + if(rex.is32bits) { + // this is ARPL opcode + DEFAULT; + } else { + INST_NAME("MOVSXD Gd, Ed"); + nextop = F8; + GETGD; + if(rex.w) { + if(MODREG) { // reg <= reg + ADDI_W(gd, TO_LA64((nextop&7)+(rex.b<<3)), 0); + } else { // mem <= reg + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LD_W(gd, ed, fixedaddress); + } + } else { + if(MODREG) { // reg <= reg + AND(gd, xRAX+(nextop&7)+(rex.b<<3), xMASK); + } else { // mem <= reg + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LD_WU(gd, ed, fixedaddress); + } + } + } + break; case 0x66: addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; @@ -633,6 +675,24 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0xD3: + nextop = F8; + switch((nextop>>3)&7) { + case 4: + case 6: + INST_NAME("SHL Ed, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + ANDI(x3, xRCX, rex.w?0x3f:0x1f); + GETED(0); + if(!rex.w && MODREG) { ZEROUP(ed); } + CBZ_NEXT(x3); + emit_shl32(dyn, ninst, rex, ed, x3, x5, x4, x6); + WBACK; + break; + default: + DEFAULT; + } + break; #define GO(Z) \ BARRIER(BARRIER_MAYBE); \ JUMP(addr + i8, 1); \ @@ -868,6 +928,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; emit_test32c(dyn, ninst, rex, ed, i64, x3, x4, x5); break; + case 2: + INST_NAME("NOT Ed"); + GETED(0); + NOR(ed, ed, xZR); + if(!rex.w && MODREG) + ZEROUP(ed); + WBACK; + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c index e7c31c72..ac1e1020 100644 --- a/src/dynarec/la64/dynarec_la64_emit_logic.c +++ b/src/dynarec/la64/dynarec_la64_emit_logic.c @@ -155,6 +155,47 @@ void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i } } + +// emit OR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + CLEAR_FLAGS(s3); + IFX(X_PEND) { + SET_DF(s4, rex.w?d_or64:d_or32); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + IFXA(X_ALL, la64_lbt) { + if (rex.w) X64_OR_D(s1, s2); else X64_OR_W(s1, s2); + X64_GET_EFLAGS(s3, X_ALL); + OR(xFlags, xFlags, s3); + } + + OR(s1, s1, s2); + if (!rex.w) ZEROUP(s1); + + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + + if(la64_lbt) return; + + // test sign bit before zeroup. + IFX(X_SF) { + if (!rex.w) SEXT_W(s1, s1); + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit OR32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4) { diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index 24d7f102..ae1712d9 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -21,6 +21,72 @@ #include "dynarec_la64_functions.h" #include "dynarec_la64_helper.h" +// emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch +void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) +{ + // s2 is not 0 here and is 1..1f/3f + CLEAR_FLAGS(s3); + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_shl64:d_shl32); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX(X_ALL) { + if (rex.w) X64_SLL_D(s1, s2); else X64_SLL_W(s1, s2); + X64_GET_EFLAGS(s3, X_ALL); + OR(xFlags, xFlags, s3); + } + SLL_D(s1, s1, s2); + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + IFX(X_CF | X_OF) { + ADDI_D(s5, s2, rex.w?-64:-32); + SUB_D(s5, xZR, s5); + SRL_W(s3, s1, s5); + ANDI(s5, s3, 1); // LSB == F_CF + IFX(X_CF) { + OR(xFlags, xFlags, s5); + } + } + + SLL_D(s1, s1, s2); + + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + ADDI_D(s3, s2, -1); + BNEZ(s3, 4 + 4 * 4); + SRLIxw(s3, s1, rex.w?63:31); + XOR(s3, s3, s5); + SLLI_D(s3, s3, F_OF); + OR(xFlags, xFlags, s3); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index b8b607eb..7a174edd 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -451,10 +451,12 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_sub32c STEPNAME(emit_sub32c) #define emit_sub8 STEPNAME(emit_sub8) #define emit_sub8c STEPNAME(emit_sub8c) +#define emit_or32 STEPNAME(emit_or32) #define emit_or32c STEPNAME(emit_or32c) #define emit_xor32 STEPNAME(emit_xor32) #define emit_and8c STEPNAME(emit_and8c) -#define emit_and32c STEPNAME(emit_and32c) +#define emit_and32c STEPNAME(emit_and32c) +#define emit_shl32 STEPNAME(emit_shl32) #define emit_shr32c STEPNAME(emit_shr32c) #define emit_sar32c STEPNAME(emit_sar32c) @@ -501,10 +503,12 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5); void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5); +void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); +void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 58488586..d80f02ad 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -227,13 +227,22 @@ f24-f31 fs0-fs7 Static registers Callee // tmp = SRL(GR[rj][31:0], GR[rk][4:0]) // GR[rd] = SignExtend(tmp[31:0], GRLEN) #define SRL_W(rd, rj, rk) EMIT(type_3R(0b00000000000101111, rk, rj, rd)) -// tmp = SLA(GR[rj][31:0], GR[rk][4:0]) +// tmp = SRA(GR[rj][31:0], GR[rk][4:0]) // GR[rd] = SignExtend(tmp[31:0], GRLEN) -#define SLA_W(rd, rj, rk) EMIT(type_3R(0b00000000000110000, rk, rj, rd)) +#define SRA_W(rd, rj, rk) EMIT(type_3R(0b00000000000110000, rk, rj, rd)) // tmp = ROTR(GR[rj][31:0], GR[rk][4:0]) // GR[rd] = SignExtend(tmp[31:0], GRLEN) #define ROTR_W(rd, rj, rk) EMIT(type_3R(0b00000000000110110, rk, rj, rd)) +// GR[rd] = SLL(GR[rj][63:0], GR[rk][5:0]) +#define SLL_D(rd, rj, rk) EMIT(type_3R(0b00000000000110001, rk, rj, rd)) +// GR[rd] = SRL(GR[rj][63:0], GR[rk][5:0]) +#define SRL_D(rd, rj, rk) EMIT(type_3R(0b00000000000110010, rk, rj, rd)) +// GR[rd] = SRA(GR[rj][63:0], GR[rk][5:0]) +#define SLA_D(rd, rj, rk) EMIT(type_3R(0b00000000000110011, rk, rj, rd)) +// GR[rd] = ROTR(GR[rj][63:0], GR[rk][5:0]) +#define ROTR_D(rd, rj, rk) EMIT(type_3R(0b00000000000110111, rk, rj, rd)) + // GR[rd] = SLL(GR[rj][63:0], imm6) (Shift Left Logical) #define SLLI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001000001, imm6, rj, rd)) // GR[rd] = SRL(GR[rj][63:0], imm6) (Shift Right Logical) diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 385be055..3a16aa9e 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -205,9 +205,9 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SRL.W", Xt[Rd], Xt[Rj], Xt[Rk]); return buff; } - // SLA.W + // SRA.W if(isMask(opcode, "00000000000110000kkkkkjjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SLA.W", Xt[Rd], Xt[Rj], Xt[Rk]); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SRA.W", Xt[Rd], Xt[Rj], Xt[Rk]); return buff; } // ROTR.W @@ -215,6 +215,26 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "ROTR.W", Xt[Rd], Xt[Rj], Xt[Rk]); return buff; } + // SLL.D + if(isMask(opcode, "00000000000110001kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SLL.D", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // SRL.D + if(isMask(opcode, "00000000000110010kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SRL.D", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // SRA.D + if(isMask(opcode, "00000000000110011kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SRA.D", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // ROTR.D + if(isMask(opcode, "00000000000110111kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "ROTR.D", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } // SLLI.D if(isMask(opcode, "0000000001000001iiiiiijjjjjddddd", &a)) { snprintf(buff, sizeof(buff), "%-15s %s, %s, %u", "SLLI.D", Xt[Rd], Xt[Rj], imm); diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index cdb53bf2..1158eadf 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -699,8 +699,6 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int UFLAG_RES(ed); UFLAG_DF(x3, rex.w?d_sar64:d_sar32); break; - default: - DEFAULT; } break; -- cgit 1.4.1