From 4f3b9f19a73df1e614e310341792ef8440b3df3b Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Tue, 20 Feb 2024 19:06:38 +0800 Subject: [DYNAREC_RV64] Added more opcodes and some minor optimizations (#1272) * Added DD /1 FISTTP i64 opcode * Some small optimizations * Added 0F AD SHRD opcode and some minor optimizations on the CF flag computation --- src/dynarec/rv64/dynarec_rv64_0f.c | 14 ++++ src/dynarec/rv64/dynarec_rv64_660f.c | 3 +- src/dynarec/rv64/dynarec_rv64_db.c | 7 +- src/dynarec/rv64/dynarec_rv64_dd.c | 22 ++++++ src/dynarec/rv64/dynarec_rv64_emit_shift.c | 111 +++++++++++++++++++++-------- src/dynarec/rv64/dynarec_rv64_f20f.c | 3 +- src/dynarec/rv64/dynarec_rv64_helper.h | 5 ++ 7 files changed, 126 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 83bfa752..3cd3e92f 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1269,6 +1269,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4); WBACK; break; + case 0xAD: + nextop = F8; + INST_NAME("SHRD Ed, Gd, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); + if (box64_dynarec_safeflags > 1) + MAYSETFLAGS(); + GETGD; + GETED(0); + if (!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); } + ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); + BEQ_NEXT(x3, xZR); + emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4); + WBACK; + break; case 0xAE: nextop = F8; if ((nextop & 0xF8) == 0xE8) { diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index dd4ee93a..adc7855e 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -1288,8 +1288,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FLW(d0, wback, fixedaddress + 4 * i); FCVTLS(x3, d0, RD_DYN); SEXT_W(x5, x3); - SUB(x5, x5, x3); - BEQZ(x5, 8); + BEQ(x5, x3, 8); LUI(x3, 0x80000); // INT32_MIN SW(x3, gback, gdoffset + 4 * i); } diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c index 71d77451..a647ee11 100644 --- a/src/dynarec/rv64/dynarec_rv64_db.c +++ b/src/dynarec/rv64/dynarec_rv64_db.c @@ -236,13 +236,10 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!box64_dynarec_fastround) { FRFLAGS(x5); // get back FPSR to check the IOC bit ANDI(x5, x5, 1 << FR_NV); - BNEZ_MARK(x5); - SEXT_W(x5, x4); - BEQ_MARK2(x5, x4); - MARK; + BEQZ_MARK(x5); MOV32w(x4, 0x80000000); + MARK; } - MARK2; SW(x4, wback, fixedaddress); X87_POP_OR_FAIL(dyn, ninst, x3); break; diff --git a/src/dynarec/rv64/dynarec_rv64_dd.c b/src/dynarec/rv64/dynarec_rv64_dd.c index 35273745..d1255655 100644 --- a/src/dynarec/rv64/dynarec_rv64_dd.c +++ b/src/dynarec/rv64/dynarec_rv64_dd.c @@ -160,6 +160,28 @@ uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLD(v1, wback, fixedaddress); break; + case 1: + INST_NAME("FISTTP i64, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); + if (ST_IS_I64(0)) { + FSD(v1, wback, fixedaddress); + } else { + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // reset all bits + } + FCVTLD(x4, v1, RD_RTZ); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1 << FR_NV); + BEQZ_MARK(x5); + MOV64x(x4, 0x8000000000000000); + MARK; + } + SD(x4, wback, fixedaddress); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; case 2: INST_NAME("FST double"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index f5d5ade1..23bf1097 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -148,9 +148,8 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_CF) { SUBI(s3, s2, 1); SRA(s3, s1, s3); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_CF); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); } SRL(s1, s1, s2); @@ -208,14 +207,12 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX(X_CF) { if (c > 1) { SRAI(s3, s1, c-1); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); + ANDI(s3, s3, 1); // LSB == F_CF } else { // no need to shift - ANDI(s3, s1, 1); - BEQZ(s3, 8); + ANDI(s3, s1, 1); // LSB == F_CF } - ORI(xFlags, xFlags, 1 << F_CF); + OR(xFlags, xFlags, s3); } SRLIxw(s1, s1, c); @@ -273,14 +270,12 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX(X_CF) { if (c > 1) { SRAI(s3, s1, c-1); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); + ANDI(s3, s3, 1); // LSB == F_CF } else { // no need to shift - ANDI(s3, s1, 1); - BEQZ(s3, 8); + ANDI(s3, s1, 1); // LSB == F_CF } - ORI(xFlags, xFlags, 1 << F_CF); + OR(xFlags, xFlags, s3); } SRAIxw(s1, s1, c); @@ -519,14 +514,11 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin IFX(X_CF) { if (c > 1) { SRAI(s3, s1, c-1); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); + ANDI(s3, s3, 1); // LSB == F_CF } else { - // no need to shift - ANDI(s3, s1, 1); - BEQZ(s3, 8); + ANDI(s3, s1, 1); // LSB == F_CF } - ORI(xFlags, xFlags, 1 << F_CF); + OR(xFlags, xFlags, s3); } SRLIxw(s3, s1, c); @@ -586,14 +578,12 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin IFX(X_CF) { if (c > 1) { SRAI(s3, s1, c-1); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); + ANDI(s3, s3, 1); // LSB == F_CF } else { // no need to shift - ANDI(s3, s1, 1); - BEQZ(s3, 8); + ANDI(s3, s1, 1); // LSB == F_CF } - ORI(xFlags, xFlags, 1 << F_CF); + OR(xFlags, xFlags, s3); } SRLIxw(s3, s1, c); @@ -628,7 +618,8 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } } -void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) { +void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) +{ c&=(rex.w?0x3f:0x1f); CLEAR_FLAGS(); IFX(X_PEND) { @@ -689,7 +680,67 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } } -void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s4, int s3) { + +void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4) +{ + int64_t j64; + CLEAR_FLAGS(); + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SDxw(s5, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w ? d_shrd64 : d_shrd32); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_CF) { + SUB(s3, s5, 1); + SRA(s3, s1, s3); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); + } + IFX(X_OF) { + SRLxw(s4, s1, rex.w ? 63 : 31); + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + ADDI(s4, xZR, (rex.w ? 64 : 32)); + SUB(s4, s4, s5); + SRLxw(s3, s1, s5); + SLLxw(s4, s2, s4); + OR(s1, s4, s3); + + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_OF) { + ADDI(s5, s5, -1); + BNEZ_MARK(s5); + SRLIxw(s3, s1, rex.w?63:31); + BEXTI(s4, xFlags, F_OF2); + XOR(s3, s3, s4); + ANDI(xFlags, xFlags, ~(1<