diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-04-18 21:01:17 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-18 15:01:17 +0200 |
| commit | 008ef41261b9723c05d223b5732d4574879118b2 (patch) | |
| tree | d493f7d4459a823be3c23954ecec2b26317489f5 /src | |
| parent | c61d341844d82ef4d1be17a1e1f019cbcdb2686f (diff) | |
| download | box64-008ef41261b9723c05d223b5732d4574879118b2.tar.gz box64-008ef41261b9723c05d223b5732d4574879118b2.zip | |
[RV64_DYNAREC] Added more opcode and some fixes (#710)
* Added 64 88/89/C7 opcodes for test11 * Added more DF opcodes * Fixed various bugs in x87/SSE/mmx infrastructure * Added F2 0F 2D CVTSD2SI opcode and remove a TODO in CVTTSD2SI * Fixed Invalid Operation handling DF opcodes (interpreter also) * Added 32bits -> 16bits overflow test on DF opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_64.c | 74 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_df.c | 48 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f.c | 36 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 32 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 4 | ||||
| -rw-r--r-- | src/emu/x64rundf.c | 5 |
7 files changed, 181 insertions, 22 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c index 9d1f8823..b1fcc589 100644 --- a/src/dynarec/rv64/dynarec_rv64_64.c +++ b/src/dynarec/rv64/dynarec_rv64_64.c @@ -73,6 +73,63 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEDO(x4, 0, x5); emit_sub32(dyn, ninst, rex, gd, ed, x3, x4, x5); break; + case 0x88: + INST_NAME("MOV Seg:Eb, Gb"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + gd = ((nextop&0x38)>>3)+(rex.r<<3); + if(rex.rex) { + gb2 = 0; + gb1 = xRAX + gd; + } else { + gb2 = ((gd&4)>>2); + gb1 = xRAX+(gd&3); + } + gd = x5; + if(gb2) { + SRLI(x5, gb1, 8); + gb1 = x5; + } + if(MODREG) { + ed = (nextop&7) + (rex.b<<3); + if(rex.rex) { + eb1 = xRAX+ed; + eb2 = 0; + } else { + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = ((ed&4)>>2); // L or H + } + ANDI(gd, gb1, 0xff); + if(eb2) { + MOV64x(x1, 0xffffffffffff00ffLL); + ANDI(x1, eb1, x1); + SLLI(gd, gd, 8); + OR(eb1, x1, gd); + } else { + ANDI(x1, eb1, ~0xff); + OR(eb1, x1, gd); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); + ADD(x4, ed, x4); + SB(gb1, x4, 0); + SMWRITE2(); + } + break; + case 0x89: + INST_NAME("MOV Seg:Ed, Gd"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg + MVxw(xRAX+(nextop&7)+(rex.b<<3), gd); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + ADD(x4, ed, x4); + SDxw(gd, x4, 0); + SMWRITE2(); + } + break; case 0x8B: INST_NAME("MOV Gd, Seg:Ed"); grab_segdata(dyn, addr, ninst, x4, seg); @@ -87,6 +144,23 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LDxw(gd, x4, 0); } break; + case 0xC7: + INST_NAME("MOV Seg:Ed, Id"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + if(MODREG) { // reg <= i32 + i64 = F32S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(ed, i64); + } else { // mem <= i32 + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 4); + i64 = F32S; + MOV64xw(x3, i64); + ADD(x4, ed, x4); + SDxw(x3, x4, 0); + SMWRITE2(); + } + break; default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c index b1e24231..a96a45f1 100644 --- a/src/dynarec/rv64/dynarec_rv64_df.c +++ b/src/dynarec/rv64/dynarec_rv64_df.c @@ -111,10 +111,56 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FCVTDL(v1, x1, RD_RNE); } break; + case 1: + INST_NAME("FISTTP Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 0, 0); + if(!box64_dynarec_fastround) { + FSFLAGSI(xZR); // reset all bits + } + FCVTWD(x4, v1, RD_RTZ); + if(!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1<<FR_NV); + BNEZ_MARK(x5); + SLLIW(x5, x4, 16); + SRAIW(x5, x5, 16); + BEQ_MARK2(x5, x4); + MARK; + MOV32w(x4, 0x8000); + } + MARK2; + SH(x4, wback, fixedaddress); + x87_do_pop(dyn, ninst, x3); + break; + case 3: + INST_NAME("FISTP Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 0, 0); + u8 = sse_setround(dyn, ninst, x2, x3); + if(!box64_dynarec_fastround) { + FSFLAGSI(xZR); // reset all bits + } + FCVTWD(x4, v1, RD_RM); + x87_restoreround(dyn, ninst, u8); + if(!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1<<FR_NV); + BNEZ_MARK(x5); + SLLIW(x5, x4, 16); + SRAIW(x5, x5, 16); + BEQ_MARK2(x5, x4); + MARK; + MOV32w(x4, 0x8000); + } + MARK2; + SH(x4, wback, fixedaddress); + x87_do_pop(dyn, ninst, x3); + break; default: DEFAULT; break; } } return addr; -} \ No newline at end of file +} diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c index 34e471b6..fed2e5d5 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f.c @@ -104,8 +104,42 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGD; GETEXSD(v0, 0); - // TODO: fastnan handling + if(!box64_dynarec_fastround) { + FSFLAGSI(xZR); // // reset all bits + } FCVTLDxw(gd, v0, RD_RTZ); + if(!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + CBZ_NEXT(x5); + if(rex.w) { + MOV64x(gd, 0x8000000000000000LL); + } else { + MOV32w(gd, 0x80000000); + } + } + break; + case 0x2D: + INST_NAME("CVTSD2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEXSD(v0, 0); + if(!box64_dynarec_fastround) { + FSFLAGSI(xZR); // // reset all bits + } + u8 = sse_setround(dyn, ninst, x2, x3); + FCVTLDxw(gd, v0, RD_RM); + x87_restoreround(dyn, ninst, u8); + if(!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + CBZ_NEXT(x5); + if(rex.w) { + MOV64x(gd, 0x8000000000000000LL); + } else { + MOV32w(gd, 0x80000000); + } + } break; case 0x38: // these are some more SSSE4.2+ opcodes opcode = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index a5fe8bc1..6d7e5e91 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -984,7 +984,7 @@ void x87_swapreg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int b) } // Set rounding according to cw flags, return reg to restore flags -int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) +int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2) { MAYUSE(dyn); MAYUSE(ninst); MAYUSE(s1); MAYUSE(s2); @@ -994,18 +994,19 @@ int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) // MMX/x87 Round mode: 0..3: Nearest, Down, Up, Chop // RV64: 0..7: Nearest, Toward Zero (Chop), Down, Up, Nearest tie to Max, invalid, invalid, dynamic (invalid here) // 0->0, 1->2, 2->3, 3->1 - SLLI(s1, s1, 1); + BEQ(s1, xZR, 24); ADDI(s2, xZR, 3); - BGE(s1, s2, 4+8); - SUBI(s1, s1, 4); - XORI(s3, s1, 0b11); + BEQ(s1, s2, 12); + ADDI(s1, s1, 1); + BEQ(xZR, xZR, 8); + ADDI(s1, xZR, 1); // transform done (is there a faster way?) - FSRM(s3); // exange RM with current - return s3; + FSRM(s1, s1); // exange RM with current + return s1; } // Set rounding according to mxcsr flags, return reg to restore flags -int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) +int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2) { MAYUSE(dyn); MAYUSE(ninst); MAYUSE(s1); MAYUSE(s2); @@ -1015,14 +1016,15 @@ int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) // MMX/x87 Round mode: 0..3: Nearest, Down, Up, Chop // RV64: 0..7: Nearest, Toward Zero (Chop), Down, Up, Nearest tie to Max, invalid, invalid, dynamic (invalid here) // 0->0, 1->2, 2->3, 3->1 - SLLI(s1, s1, 1); + BEQ(s1, xZR, 24); ADDI(s2, xZR, 3); - BGE(s1, s2, 4+8); - SUBI(s1, s1, 4); - XORI(s3, s1, 0b11); + BEQ(s1, s2, 12); + ADDI(s1, s1, 1); + BEQ(xZR, xZR, 8); + ADDI(s1, xZR, 1); // transform done (is there a faster way?) - FSRM(s3); // exange RM with current - return s3; + FSRM(s1, s1); // exange RM with current + return s1; } // Restore round flag, destroy s1 doing so @@ -1030,7 +1032,7 @@ void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1) { MAYUSE(dyn); MAYUSE(ninst); MAYUSE(s1); - FSRM(s1); // put back fpscr + FSRM(s1, s1); // put back fpscr } // MMX helpers diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 0c2174eb..652ef437 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1004,11 +1004,11 @@ void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st); // swap 2 x87 regs void x87_swapreg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int b); // Set rounding according to cw flags, return reg to restore flags -int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3); +int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2); // Restore round flag void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1); // Set rounding according to mxcsr flags, return reg to restore flags -int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3); +int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2); void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index a36b3bf3..5ffcc875 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -369,8 +369,8 @@ f28–31 ft8–11 FP temporaries Caller // RV32F // Read round mode #define FRRM(rd) CSRRS(rd, xZR, 0x002) -// Swap round mode with rd -#define FSRM(rd) CSRRWI(rd, 0b111, 0x002) +// Swap round mode +#define FSRM(rd, rs) CSRRW(rd, rs, 0x002) // Write FP exception flags, immediate #define FSFLAGSI(imm) CSRRWI(xZR, imm, 0x0001) // Read FP exception flags to rd diff --git a/src/emu/x64rundf.c b/src/emu/x64rundf.c index 03244715..528a6c3f 100644 --- a/src/emu/x64rundf.c +++ b/src/emu/x64rundf.c @@ -131,7 +131,10 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr) case 1: /* FISTTP Ew, ST0 */ GETEW(0); tmp16s = ST0.d; - EW->sword[0] = tmp16s; + if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x8000) || !isfinite(ST0.d)) + EW->sword[0] = 0x8000; + else + EW->sword[0] = tmp16s; fpu_do_pop(emu); break; case 2: /* FIST Ew, ST0 */ |