From 828f088b6c693a38fc762d5b4cf1d7fd86664645 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Thu, 13 Apr 2023 19:06:23 +0800 Subject: [RV64_DYNAREC] Added more opcodes for SV and some fixes (#694) * [RV64_DYNAREC] Added D3 C7 opcode * [RV64_DYNAREC] Added 0F B3 BTR opcode * [RV64_DYNAREC] Added D1 /4,6 SHL opcode * [RV64_DYNAREC] Added 66 0F F3 PSLLQ opcode * [RV64_DYNAREC] Fixed ROL opcode --- src/dynarec/rv64/dynarec_rv64_00.c | 25 +++++++++++--- src/dynarec/rv64/dynarec_rv64_0f.c | 38 +++++++++++++++++++++- src/dynarec/rv64/dynarec_rv64_660f.c | 20 ++++++++++++ src/dynarec/rv64/dynarec_rv64_emit_shift.c | 52 +++++++++++++++++++++++++++--- src/dynarec/rv64/dynarec_rv64_helper.h | 9 ++++++ src/dynarec/rv64/rv64_emitter.h | 2 ++ 6 files changed, 136 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 2bb6d9ea..99c32971 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -1219,6 +1219,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = (F8)&(rex.w?0x3f:0x1f); emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); if(u8) { WBACK; } + if(!wback && !rex.w) ZEROUP(ed); break; case 4: case 6: @@ -1449,17 +1450,25 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xD1: nextop = F8; switch((nextop>>3)&7) { + case 4: + case 6: + INST_NAME("SHL Ed, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED(0); + emit_shl32c(dyn, ninst, rex, ed, 1, x3, x4, x5); + WBACK; + break; case 5: - INST_NAME("SHR Ed, Ib"); + INST_NAME("SHR Ed, 1"); SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(1); + GETED(0); emit_shr32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; break; case 7: - INST_NAME("SAR Ed, Ib"); + INST_NAME("SAR Ed, 1"); SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(1); + GETED(0); emit_sar32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; break; @@ -1471,6 +1480,14 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xD3: nextop = F8; switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Ed, CL"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED(0); + emit_rol32(dyn, ninst, rex, ed, xRCX, x3, x4); + WBACK; + if(!wback && !rex.w) ZEROUP(ed); + break; case 4: case 6: INST_NAME("SHL Ed, CL"); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 2e8aae41..72c61e4a 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -591,7 +591,43 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SRLI(gd, gd, 32); } break; - + case 0xB3: + INST_NAME("BTR Ed, Gd"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(); + nextop = F8; + GETGD; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + SRAI(x1, gd, 5+rex.w); + SLLI(x1, x1, 2+rex.w); + ADD(x3, wback, x1); + LDxw(x1, x3, fixedaddress); + ed = x1; + wback = x3; + } + if (rex.w) { + ANDI(x2, gd, 0x3f); + } else { + ANDI(x2, gd, 0x1f); + } + SRL(x4, ed, x2); + ANDI(x4, x4, 1); // F_CF is 1 + ANDI(xFlags, xFlags, ~1); + OR(xFlags, xFlags, x4); + ADDI(x3, xZR, 1); + SLL(x3, x3, x2); + NOT(x3, x3); + AND(ed, ed, x3); + if(wback) { + SDxw(ed, wback, fixedaddress); + SMWRITE(); + } + break; case 0xB6: INST_NAME("MOVZX Gd, Eb"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 4ae1eaa7..198c1a85 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -865,6 +865,26 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); } break; + case 0xF3: + INST_NAME("PSLLQ Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + ADDI(x4, xZR, 64); + LD(x3, wback, fixedaddress+0); + BLTU_MARK(x3, x4); + // just zero dest + SD(xZR, gback, 0); + SD(xZR, gback, 8); + B_NEXT_nocond; + MARK; + LD(x4, gback, 0); + LD(x5, gback, 8); + SLL(x4, x4, x3); + SLL(x5, x5, x3); + SD(x4, gback, 0); + SD(x5, gback, 8); + break; case 0xFA: INST_NAME("PSUBD Gx,Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index baf3f7b3..04d73f10 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -307,6 +307,51 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } } + +// emit ROL32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + int64_t j64; + IFX(X_CF) { + ANDI(xFlags, xFlags, ~(1UL<=reg2 (use j64) #define BGE_MARK(reg1, reg2) Bxx_gen(GE, MARK, reg1, reg2) // Branch to MARK2 if reg1==reg2 (use j64) @@ -486,6 +488,11 @@ // Branch to MARKLOCK if reg1!=0 (use j64) #define BNEZ_MARKLOCK(reg) BNE_MARKLOCK(reg, xZR) +// Branch to NEXT if reg1==reg2 (use j64) +#define BEQ_NEXT(reg1, reg2) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + BEQ(reg1, reg2, j64) + // Branch to NEXT if reg1==0 (use j64) #define CBZ_NEXT(reg1) \ j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ @@ -822,6 +829,7 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr); #define emit_shr32 STEPNAME(emit_shr32) #define emit_shr32c STEPNAME(emit_shr32c) #define emit_sar32c STEPNAME(emit_sar32c) +#define emit_rol32 STEPNAME(emit_rol32) #define emit_rol32c STEPNAME(emit_rol32c) #define emit_ror32c STEPNAME(emit_ror32c) #define emit_shrd32c STEPNAME(emit_shrd32c) @@ -954,6 +962,7 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); //void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index af4cd430..a36b3bf3 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -297,6 +297,8 @@ f28–31 ft8–11 FP temporaries Caller // rd = rs1>>rs2 arithmetic #define SRAW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0111011)) +#define SLLxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, rex.w?0b0110011:0b0111011)) +#define SRLxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, rex.w?0b0110011:0b0111011)) #define SRAxw(rd, rs1, rs2) if(rex.w) {SRA(rd, rs1, rs2);} else {SRAW(rd, rs1, rs2); ZEROUP(rd);} // Shift Left Immediate, 32-bit, sign-extended -- cgit 1.4.1