diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00.c | 25 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 38 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 20 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 52 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 9 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 2 |
6 files changed, 136 insertions, 10 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 2bb6d9ea..99c32971 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -1219,6 +1219,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = (F8)&(rex.w?0x3f:0x1f); emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); if(u8) { WBACK; } + if(!wback && !rex.w) ZEROUP(ed); break; case 4: case 6: @@ -1449,17 +1450,25 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xD1: nextop = F8; switch((nextop>>3)&7) { + case 4: + case 6: + INST_NAME("SHL Ed, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED(0); + emit_shl32c(dyn, ninst, rex, ed, 1, x3, x4, x5); + WBACK; + break; case 5: - INST_NAME("SHR Ed, Ib"); + INST_NAME("SHR Ed, 1"); SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(1); + GETED(0); emit_shr32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; break; case 7: - INST_NAME("SAR Ed, Ib"); + INST_NAME("SAR Ed, 1"); SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(1); + GETED(0); emit_sar32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; break; @@ -1471,6 +1480,14 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xD3: nextop = F8; switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Ed, CL"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED(0); + emit_rol32(dyn, ninst, rex, ed, xRCX, x3, x4); + WBACK; + if(!wback && !rex.w) ZEROUP(ed); + break; case 4: case 6: INST_NAME("SHL Ed, CL"); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 2e8aae41..72c61e4a 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -591,7 +591,43 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SRLI(gd, gd, 32); } break; - + case 0xB3: + INST_NAME("BTR Ed, Gd"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(); + nextop = F8; + GETGD; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + SRAI(x1, gd, 5+rex.w); + SLLI(x1, x1, 2+rex.w); + ADD(x3, wback, x1); + LDxw(x1, x3, fixedaddress); + ed = x1; + wback = x3; + } + if (rex.w) { + ANDI(x2, gd, 0x3f); + } else { + ANDI(x2, gd, 0x1f); + } + SRL(x4, ed, x2); + ANDI(x4, x4, 1); // F_CF is 1 + ANDI(xFlags, xFlags, ~1); + OR(xFlags, xFlags, x4); + ADDI(x3, xZR, 1); + SLL(x3, x3, x2); + NOT(x3, x3); + AND(ed, ed, x3); + if(wback) { + SDxw(ed, wback, fixedaddress); + SMWRITE(); + } + break; case 0xB6: INST_NAME("MOVZX Gd, Eb"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 4ae1eaa7..198c1a85 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -865,6 +865,26 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); } break; + case 0xF3: + INST_NAME("PSLLQ Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + ADDI(x4, xZR, 64); + LD(x3, wback, fixedaddress+0); + BLTU_MARK(x3, x4); + // just zero dest + SD(xZR, gback, 0); + SD(xZR, gback, 8); + B_NEXT_nocond; + MARK; + LD(x4, gback, 0); + LD(x5, gback, 8); + SLL(x4, x4, x3); + SLL(x5, x5, x3); + SD(x4, gback, 0); + SD(x5, gback, 8); + break; case 0xFA: INST_NAME("PSUBD Gx,Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index baf3f7b3..04d73f10 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -307,6 +307,51 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } } + +// emit ROL32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + int64_t j64; + IFX(X_CF) { + ANDI(xFlags, xFlags, ~(1UL<<F_CF)); + } + + IFX(X_PEND) { + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_rol64:d_rol32); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + if(rex.w) { + ANDI(s4, s2, 0x3f); + } else { + ANDI(s4, s2, 0x1f); + } + SLLxw(s3, s1, s4); + NEG(s4, s4); + ADDI(s4, s4, rex.w?64:32); + SRLxw(s1, s1, s4); + OR(s1, s3, s1); + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF) { + // F_CF=0 + ANDI(s4, s1, 1); + OR(xFlags, xFlags, s4); + } + IFX(X_OF) { + ADDI(s3, xZR, 1); + BEQ_NEXT(s2, s3); + SRLIxw(s3, s1, rex.w?63:31); + XOR(s3, s3, s1); + ANDI(s3, s3, 1); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } +} + // emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { @@ -327,11 +372,8 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } return; } - SLLI(s3, s1, c); - if (!rex.w) { - AND(s3, xMASK, s3); - } - SRLI(s1, s1, (rex.w?64:32)-c); + SLLIxw(s3, s1, c); + SRLIxw(s1, s1, (rex.w?64:32)-c); OR(s1, s3, s1); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index a9e1a9a1..924612a6 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -463,6 +463,8 @@ #define B_MARK_nocond Bxx_gen(__, MARK, 0, 0) // Branch to MARK if reg1<reg2 (use j64) #define BLT_MARK(reg1, reg2) Bxx_gen(LT, MARK, reg1, reg2) +// Branch to MARK if reg1<reg2 (use j64) +#define BLTU_MARK(reg1, reg2) Bxx_gen(LTU, MARK, reg1, reg2) // Branch to MARK if reg1>=reg2 (use j64) #define BGE_MARK(reg1, reg2) Bxx_gen(GE, MARK, reg1, reg2) // Branch to MARK2 if reg1==reg2 (use j64) @@ -486,6 +488,11 @@ // Branch to MARKLOCK if reg1!=0 (use j64) #define BNEZ_MARKLOCK(reg) BNE_MARKLOCK(reg, xZR) +// Branch to NEXT if reg1==reg2 (use j64) +#define BEQ_NEXT(reg1, reg2) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + BEQ(reg1, reg2, j64) + // Branch to NEXT if reg1==0 (use j64) #define CBZ_NEXT(reg1) \ j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ @@ -822,6 +829,7 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr); #define emit_shr32 STEPNAME(emit_shr32) #define emit_shr32c STEPNAME(emit_shr32c) #define emit_sar32c STEPNAME(emit_sar32c) +#define emit_rol32 STEPNAME(emit_rol32) #define emit_rol32c STEPNAME(emit_rol32c) #define emit_ror32c STEPNAME(emit_ror32c) #define emit_shrd32c STEPNAME(emit_shrd32c) @@ -954,6 +962,7 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); //void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index af4cd430..a36b3bf3 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -297,6 +297,8 @@ f28–31 ft8–11 FP temporaries Caller // rd = rs1>>rs2 arithmetic #define SRAW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0111011)) +#define SLLxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, rex.w?0b0110011:0b0111011)) +#define SRLxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, rex.w?0b0110011:0b0111011)) #define SRAxw(rd, rs1, rs2) if(rex.w) {SRA(rd, rs1, rs2);} else {SRAW(rd, rs1, rs2); ZEROUP(rd);} // Shift Left Immediate, 32-bit, sign-extended |