From 008ef41261b9723c05d223b5732d4574879118b2 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Tue, 18 Apr 2023 21:01:17 +0800 Subject: [RV64_DYNAREC] Added more opcode and some fixes (#710) * Added 64 88/89/C7 opcodes for test11 * Added more DF opcodes * Fixed various bugs in x87/SSE/mmx infrastructure * Added F2 0F 2D CVTSD2SI opcode and remove a TODO in CVTTSD2SI * Fixed Invalid Operation handling DF opcodes (interpreter also) * Added 32bits -> 16bits overflow test on DF opcodes --- src/dynarec/rv64/dynarec_rv64_64.c | 74 ++++++++++++++++++++++++++++++++++ src/dynarec/rv64/dynarec_rv64_df.c | 48 +++++++++++++++++++++- src/dynarec/rv64/dynarec_rv64_f20f.c | 36 ++++++++++++++++- src/dynarec/rv64/dynarec_rv64_helper.c | 32 ++++++++------- src/dynarec/rv64/dynarec_rv64_helper.h | 4 +- src/dynarec/rv64/rv64_emitter.h | 4 +- src/emu/x64rundf.c | 5 ++- 7 files changed, 181 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c index 9d1f8823..b1fcc589 100644 --- a/src/dynarec/rv64/dynarec_rv64_64.c +++ b/src/dynarec/rv64/dynarec_rv64_64.c @@ -73,6 +73,63 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEDO(x4, 0, x5); emit_sub32(dyn, ninst, rex, gd, ed, x3, x4, x5); break; + case 0x88: + INST_NAME("MOV Seg:Eb, Gb"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + gd = ((nextop&0x38)>>3)+(rex.r<<3); + if(rex.rex) { + gb2 = 0; + gb1 = xRAX + gd; + } else { + gb2 = ((gd&4)>>2); + gb1 = xRAX+(gd&3); + } + gd = x5; + if(gb2) { + SRLI(x5, gb1, 8); + gb1 = x5; + } + if(MODREG) { + ed = (nextop&7) + (rex.b<<3); + if(rex.rex) { + eb1 = xRAX+ed; + eb2 = 0; + } else { + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = ((ed&4)>>2); // L or H + } + ANDI(gd, gb1, 0xff); + if(eb2) { + MOV64x(x1, 0xffffffffffff00ffLL); + ANDI(x1, eb1, x1); + SLLI(gd, gd, 8); + OR(eb1, x1, gd); + } else { + ANDI(x1, eb1, ~0xff); + OR(eb1, x1, gd); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); + ADD(x4, ed, x4); + SB(gb1, x4, 0); + SMWRITE2(); + } + break; + case 0x89: + INST_NAME("MOV Seg:Ed, Gd"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg + MVxw(xRAX+(nextop&7)+(rex.b<<3), gd); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + ADD(x4, ed, x4); + SDxw(gd, x4, 0); + SMWRITE2(); + } + break; case 0x8B: INST_NAME("MOV Gd, Seg:Ed"); grab_segdata(dyn, addr, ninst, x4, seg); @@ -87,6 +144,23 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LDxw(gd, x4, 0); } break; + case 0xC7: + INST_NAME("MOV Seg:Ed, Id"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + if(MODREG) { // reg <= i32 + i64 = F32S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(ed, i64); + } else { // mem <= i32 + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 4); + i64 = F32S; + MOV64xw(x3, i64); + ADD(x4, ed, x4); + SDxw(x3, x4, 0); + SMWRITE2(); + } + break; default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c index b1e24231..a96a45f1 100644 --- a/src/dynarec/rv64/dynarec_rv64_df.c +++ b/src/dynarec/rv64/dynarec_rv64_df.c @@ -111,10 +111,56 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FCVTDL(v1, x1, RD_RNE); } break; + case 1: + INST_NAME("FISTTP Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 0, 0); + if(!box64_dynarec_fastround) { + FSFLAGSI(xZR); // reset all bits + } + FCVTWD(x4, v1, RD_RTZ); + if(!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1<0, 1->2, 2->3, 3->1 - SLLI(s1, s1, 1); + BEQ(s1, xZR, 24); ADDI(s2, xZR, 3); - BGE(s1, s2, 4+8); - SUBI(s1, s1, 4); - XORI(s3, s1, 0b11); + BEQ(s1, s2, 12); + ADDI(s1, s1, 1); + BEQ(xZR, xZR, 8); + ADDI(s1, xZR, 1); // transform done (is there a faster way?) - FSRM(s3); // exange RM with current - return s3; + FSRM(s1, s1); // exange RM with current + return s1; } // Set rounding according to mxcsr flags, return reg to restore flags -int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) +int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2) { MAYUSE(dyn); MAYUSE(ninst); MAYUSE(s1); MAYUSE(s2); @@ -1015,14 +1016,15 @@ int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) // MMX/x87 Round mode: 0..3: Nearest, Down, Up, Chop // RV64: 0..7: Nearest, Toward Zero (Chop), Down, Up, Nearest tie to Max, invalid, invalid, dynamic (invalid here) // 0->0, 1->2, 2->3, 3->1 - SLLI(s1, s1, 1); + BEQ(s1, xZR, 24); ADDI(s2, xZR, 3); - BGE(s1, s2, 4+8); - SUBI(s1, s1, 4); - XORI(s3, s1, 0b11); + BEQ(s1, s2, 12); + ADDI(s1, s1, 1); + BEQ(xZR, xZR, 8); + ADDI(s1, xZR, 1); // transform done (is there a faster way?) - FSRM(s3); // exange RM with current - return s3; + FSRM(s1, s1); // exange RM with current + return s1; } // Restore round flag, destroy s1 doing so @@ -1030,7 +1032,7 @@ void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1) { MAYUSE(dyn); MAYUSE(ninst); MAYUSE(s1); - FSRM(s1); // put back fpscr + FSRM(s1, s1); // put back fpscr } // MMX helpers diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 0c2174eb..652ef437 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1004,11 +1004,11 @@ void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st); // swap 2 x87 regs void x87_swapreg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int b); // Set rounding according to cw flags, return reg to restore flags -int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3); +int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2); // Restore round flag void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1); // Set rounding according to mxcsr flags, return reg to restore flags -int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3); +int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2); void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index a36b3bf3..5ffcc875 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -369,8 +369,8 @@ f28–31 ft8–11 FP temporaries Caller // RV32F // Read round mode #define FRRM(rd) CSRRS(rd, xZR, 0x002) -// Swap round mode with rd -#define FSRM(rd) CSRRWI(rd, 0b111, 0x002) +// Swap round mode +#define FSRM(rd, rs) CSRRW(rd, rs, 0x002) // Write FP exception flags, immediate #define FSFLAGSI(imm) CSRRWI(xZR, imm, 0x0001) // Read FP exception flags to rd diff --git a/src/emu/x64rundf.c b/src/emu/x64rundf.c index 03244715..528a6c3f 100644 --- a/src/emu/x64rundf.c +++ b/src/emu/x64rundf.c @@ -131,7 +131,10 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr) case 1: /* FISTTP Ew, ST0 */ GETEW(0); tmp16s = ST0.d; - EW->sword[0] = tmp16s; + if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x8000) || !isfinite(ST0.d)) + EW->sword[0] = 0x8000; + else + EW->sword[0] = tmp16s; fpu_do_pop(emu); break; case 2: /* FIST Ew, ST0 */ -- cgit 1.4.1