diff options
| author | xctan <xctan@cirno.icu> | 2023-04-19 16:11:22 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-19 10:11:22 +0200 |
| commit | 6f29d2a5d8208d2f0a8c1e1f992de19ca36b74cc (patch) | |
| tree | 3b80823cf3b800353193491950d4f44e905985fb | |
| parent | 008ef41261b9723c05d223b5732d4574879118b2 (diff) | |
| download | box64-6f29d2a5d8208d2f0a8c1e1f992de19ca36b74cc.tar.gz box64-6f29d2a5d8208d2f0a8c1e1f992de19ca36b74cc.zip | |
[RV64_DYNAREC] Added more opcodes (#712)
* [RV64_DYNAREC] Added 64 33 XOR opcode * [RV64_DYNAREC] Added 0F C8-CF BSWAP opcode * [RV64_DYNAREC] Added 66 0F 3A 0B ROUNDSD opcode * [RV64_DYNAREC] Added F3 0F BC TZCNT opcode * [RV64_DYNAREC] Added F3 0F E6 CVTDQ2PD opcode * [RV64_DYNAREC] Added F3 0F 5B CVTTPS2DQ opcode * [RV64_DYNAREC] Fixed CVTTPS2DQ, CVTDQ2PD and printer
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 62 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_64.c | 12 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 28 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 69 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_printer.c | 2 |
6 files changed, 173 insertions, 3 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 86d1eab7..6f356ff4 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1003,6 +1003,68 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SW(x5, gback, 2*4); SW(x6, gback, 3*4); break; + + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: /* BSWAP reg */ + INST_NAME("BSWAP Reg"); + gd = xRAX+(opcode&7)+(rex.b<<3); + MOV_U12(x1, 0xff); + SLLI(x4, x1, 8); // mask 0xff00 + if (rex.w) { + SLLI(x5, x1, 16); // mask 0xff0000 + SLLI(x6, x1, 24); // mask 0xff000000 + + SRLI(x2, gd, 56); + + SRLI(x3, gd, 40); + AND(x3, x3, x4); + OR(x2, x2, x3); + + SRLI(x3, gd, 24); + AND(x3, x3, x5); + OR(x2, x2, x3); + + SRLI(x3, gd, 8); + AND(x3, x3, x6); + OR(x2, x2, x3); + + AND(x3, gd, x6); + SLLI(x3, x3, 8); + OR(x2, x2, x3); + + AND(x3, gd, x5); + SLLI(x3, x3, 24); + OR(x2, x2, x3); + + AND(x3, gd, x4); + SLLI(x3, x3, 40); + OR(x2, x2, x3); + + SLLI(x3, x3, 56); + OR(gd, x2, x3); + } else { + SRLIW(x2, gd, 24); + + SRLIW(x3, gd, 8); + AND(x3, x3, x4); + OR(x2, x2, x3); + + AND(x3, gd, x4); + SLLI(x3, x3, 8); + OR(x2, x2, x3); + + AND(x3, gd, x1); + SLLI(x3, x3, 24); + OR(gd, x2, x3); + } + break; + default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c index b1fcc589..0f5c9087 100644 --- a/src/dynarec/rv64/dynarec_rv64_64.c +++ b/src/dynarec/rv64/dynarec_rv64_64.c @@ -73,6 +73,17 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEDO(x4, 0, x5); emit_sub32(dyn, ninst, rex, gd, ed, x3, x4, x5); break; + + // case 0x33: + // INST_NAME("XOR Gd, Seg:Ed"); + // SETFLAGS(X_ALL, SF_SET_PENDING); + // grab_segdata(dyn, addr, ninst, x4, seg); + // nextop = F8; + // GETGD; + // GETEDO(x4, 0, x5); + // emit_xor32(dyn, ninst, rex, gd, ed, x3, x4); + // break; + case 0x88: INST_NAME("MOV Seg:Eb, Gb"); grab_segdata(dyn, addr, ninst, x4, seg); @@ -130,6 +141,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMWRITE2(); } break; + case 0x8B: INST_NAME("MOV Gd, Seg:Ed"); grab_segdata(dyn, addr, ninst, x4, seg); diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 98836ecf..16e84d9a 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -47,6 +47,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MAYUSE(eb1); MAYUSE(eb2); MAYUSE(j64); + + static const int8_t round_round[] = { RD_RNE, RD_RDN, RD_RUP, RD_RTZ }; switch(opcode) { case 0x10: @@ -242,6 +244,32 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int DEFAULT; } break; + case 0x3A: // these are some more SSSE3+ opcodes + opcode = F8; + switch(opcode) { + case 0x0B: + INST_NAME("ROUNDSD Gx, Ex, Ib"); + nextop = F8; + GETGX(x1); + GETEXSD(d0, 0); + u8 = F8; + v1 = fpu_get_scratch(dyn); + if(u8&4) { + u8 = sse_setround(dyn, ninst, x4, x2); + FCVTLD(x5, d0, RD_DYN); + FCVTDL(v1, x5, RD_DYN); + x87_restoreround(dyn, ninst, u8); + } else { + FCVTLD(x5, d0, round_round[u8&3]); + FCVTDL(v1, x5, round_round[u8&3]); + } + FSD(v1, gback, 0); + break; + default: + DEFAULT; + } + break; + case 0x54: INST_NAME("ANDPD Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index d9700507..945a62d6 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -224,7 +224,58 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_MV_Q2(x3); if(!MODREG) SMWRITE2(); break; - + + case 0x5B: + INST_NAME("CVTTPS2DQ Gx, Ex"); + nextop = F8; + GETEX(x5, 0) ; + GETGX(x6); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + FLW(v0, x5, 0); + FLW(v1, x5, 4); + FLW(q0, x5, 8); + FLW(q1, x5, 12); + FCVTWS(x1, v0, RD_RTZ); + FCVTWS(x2, v1, RD_RTZ); + FCVTWS(x3, q0, RD_RTZ); + FCVTWS(x4, q1, RD_RTZ); + SW(x1, x6, 0); + SW(x2, x6, 4); + SW(x3, x6, 8); + SW(x4, x6, 12); + break; + case 0xBC: + INST_NAME("TZCNT Gd, Ed"); + SETFLAGS(X_ZF, SF_SUBSET); + SET_DFNONE(); + nextop = F8; + GETED(0); + GETGD; + if(!rex.w && MODREG) { + AND(x4, ed, xMASK); + ed = x4; + } + BNE_MARK(ed, xZR); + ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF))); + ORI(xFlags, xFlags, 1<<F_CF); + MOV32w(gd, rex.w?64:32); + B_NEXT_nocond; + MARK; + NEG(x2, ed); + AND(x2, x2, ed); + TABLE64(x3, 0x03f79d71b4ca8b09ULL); + MUL(x2, x2, x3); + SRLI(x2, x2, 64-6); + TABLE64(x1, (uintptr_t)&deBruijn64tab); + ADD(x1, x1, x2); + LBU(gd, x1, 0); + ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF))); + BNE(gd, xZR, 4+4); + ORI(xFlags, xFlags, 1<<F_ZF); + break; case 0xBD: INST_NAME("LZCNT Gd, Ed"); SETFLAGS(X_ZF|X_CF, SF_SUBSET); @@ -326,6 +377,22 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int NEG(x2, x2); FMVWX(d0, x2); break; + + case 0xE6: + INST_NAME("CVTDQ2PD Gx, Ex"); + nextop = F8; + GETEX(x1, 0); + GETGX(x2); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + LW(x3, x1, 0); + LW(x4, x1, 4); + FCVTDW(q0, x3, RD_DYN); + FCVTDW(q1, x4, RD_DYN); + FSD(q0, x2, 0); + FSD(q1, x2, 8); + break; + default: DEFAULT; } diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 5ffcc875..29336895 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -398,6 +398,7 @@ f28–31 ft8–11 FP temporaries Caller #define RD_RMM 0b100 // In instruction’s rm field, selects dynamic rounding mode; #define RD_RM 0b111 +#define RD_DYN RD_RM // load single precision from rs1+imm12 to frd #define FLW(frd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, frd, 0b0000111)) @@ -422,7 +423,7 @@ f28–31 ft8–11 FP temporaries Caller // Convert from signed 32bits to Single #define FCVTSW(frd, rs1, rm) EMIT(R_type(0b1101000, 0b00000, rs1, rm, frd, 0b1010011)) // Convert from Single to signed 32bits (trucated) -#define FCVTWS(rd, frs1, tm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011)) +#define FCVTWS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011)) #define FADDS(frd, frs1, frs2) EMIT(R_type(0b0000000, frs2, frs1, 0b000, frd, 0b1010011)) #define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b000, frd, 0b1010011)) diff --git a/src/dynarec/rv64/rv64_printer.c b/src/dynarec/rv64/rv64_printer.c index ed6167ba..bdc424c1 100644 --- a/src/dynarec/rv64/rv64_printer.c +++ b/src/dynarec/rv64/rv64_printer.c @@ -1315,7 +1315,7 @@ const char* rv64_print(uint32_t data, uintptr_t addr) insn.name = "fcvt.d.lu"; break; } - PRINT_xd_fs1(); + PRINT_fd_xs1(); } case 0x70: { assert(RS2(data) == 0); |