diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-02-20 19:06:38 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-20 12:06:38 +0100 |
| commit | 4f3b9f19a73df1e614e310341792ef8440b3df3b (patch) | |
| tree | ae69a14970a4fc628946e1a82c7a48a27f2b1241 /src | |
| parent | e562baf30b07e2241770dd4d69ab47d597c93ad6 (diff) | |
| download | box64-4f3b9f19a73df1e614e310341792ef8440b3df3b.tar.gz box64-4f3b9f19a73df1e614e310341792ef8440b3df3b.zip | |
[DYNAREC_RV64] Added more opcodes and some minor optimizations (#1272)
* Added DD /1 FISTTP i64 opcode * Some small optimizations * Added 0F AD SHRD opcode and some minor optimizations on the CF flag computation
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 14 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_db.c | 7 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_dd.c | 22 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 111 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f.c | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 5 |
7 files changed, 126 insertions, 39 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 83bfa752..3cd3e92f 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1269,6 +1269,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4); WBACK; break; + case 0xAD: + nextop = F8; + INST_NAME("SHRD Ed, Gd, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); + if (box64_dynarec_safeflags > 1) + MAYSETFLAGS(); + GETGD; + GETED(0); + if (!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); } + ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); + BEQ_NEXT(x3, xZR); + emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4); + WBACK; + break; case 0xAE: nextop = F8; if ((nextop & 0xF8) == 0xE8) { diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index dd4ee93a..adc7855e 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -1288,8 +1288,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FLW(d0, wback, fixedaddress + 4 * i); FCVTLS(x3, d0, RD_DYN); SEXT_W(x5, x3); - SUB(x5, x5, x3); - BEQZ(x5, 8); + BEQ(x5, x3, 8); LUI(x3, 0x80000); // INT32_MIN SW(x3, gback, gdoffset + 4 * i); } diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c index 71d77451..a647ee11 100644 --- a/src/dynarec/rv64/dynarec_rv64_db.c +++ b/src/dynarec/rv64/dynarec_rv64_db.c @@ -236,13 +236,10 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!box64_dynarec_fastround) { FRFLAGS(x5); // get back FPSR to check the IOC bit ANDI(x5, x5, 1 << FR_NV); - BNEZ_MARK(x5); - SEXT_W(x5, x4); - BEQ_MARK2(x5, x4); - MARK; + BEQZ_MARK(x5); MOV32w(x4, 0x80000000); + MARK; } - MARK2; SW(x4, wback, fixedaddress); X87_POP_OR_FAIL(dyn, ninst, x3); break; diff --git a/src/dynarec/rv64/dynarec_rv64_dd.c b/src/dynarec/rv64/dynarec_rv64_dd.c index 35273745..d1255655 100644 --- a/src/dynarec/rv64/dynarec_rv64_dd.c +++ b/src/dynarec/rv64/dynarec_rv64_dd.c @@ -160,6 +160,28 @@ uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLD(v1, wback, fixedaddress); break; + case 1: + INST_NAME("FISTTP i64, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); + if (ST_IS_I64(0)) { + FSD(v1, wback, fixedaddress); + } else { + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // reset all bits + } + FCVTLD(x4, v1, RD_RTZ); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1 << FR_NV); + BEQZ_MARK(x5); + MOV64x(x4, 0x8000000000000000); + MARK; + } + SD(x4, wback, fixedaddress); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; case 2: INST_NAME("FST double"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index f5d5ade1..23bf1097 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -148,9 +148,8 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_CF) { SUBI(s3, s2, 1); SRA(s3, s1, s3); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_CF); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); } SRL(s1, s1, s2); @@ -208,14 +207,12 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX(X_CF) { if (c > 1) { SRAI(s3, s1, c-1); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); + ANDI(s3, s3, 1); // LSB == F_CF } else { // no need to shift - ANDI(s3, s1, 1); - BEQZ(s3, 8); + ANDI(s3, s1, 1); // LSB == F_CF } - ORI(xFlags, xFlags, 1 << F_CF); + OR(xFlags, xFlags, s3); } SRLIxw(s1, s1, c); @@ -273,14 +270,12 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX(X_CF) { if (c > 1) { SRAI(s3, s1, c-1); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); + ANDI(s3, s3, 1); // LSB == F_CF } else { // no need to shift - ANDI(s3, s1, 1); - BEQZ(s3, 8); + ANDI(s3, s1, 1); // LSB == F_CF } - ORI(xFlags, xFlags, 1 << F_CF); + OR(xFlags, xFlags, s3); } SRAIxw(s1, s1, c); @@ -519,14 +514,11 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin IFX(X_CF) { if (c > 1) { SRAI(s3, s1, c-1); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); + ANDI(s3, s3, 1); // LSB == F_CF } else { - // no need to shift - ANDI(s3, s1, 1); - BEQZ(s3, 8); + ANDI(s3, s1, 1); // LSB == F_CF } - ORI(xFlags, xFlags, 1 << F_CF); + OR(xFlags, xFlags, s3); } SRLIxw(s3, s1, c); @@ -586,14 +578,12 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin IFX(X_CF) { if (c > 1) { SRAI(s3, s1, c-1); - ANDI(s3, s3, 1); // LSB - BEQZ(s3, 8); + ANDI(s3, s3, 1); // LSB == F_CF } else { // no need to shift - ANDI(s3, s1, 1); - BEQZ(s3, 8); + ANDI(s3, s1, 1); // LSB == F_CF } - ORI(xFlags, xFlags, 1 << F_CF); + OR(xFlags, xFlags, s3); } SRLIxw(s3, s1, c); @@ -628,7 +618,8 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } } -void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) { +void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) +{ c&=(rex.w?0x3f:0x1f); CLEAR_FLAGS(); IFX(X_PEND) { @@ -689,7 +680,67 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } } -void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s4, int s3) { + +void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4) +{ + int64_t j64; + CLEAR_FLAGS(); + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SDxw(s5, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w ? d_shrd64 : d_shrd32); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_CF) { + SUB(s3, s5, 1); + SRA(s3, s1, s3); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); + } + IFX(X_OF) { + SRLxw(s4, s1, rex.w ? 63 : 31); + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + ADDI(s4, xZR, (rex.w ? 64 : 32)); + SUB(s4, s4, s5); + SRLxw(s3, s1, s5); + SLLxw(s4, s2, s4); + OR(s1, s4, s3); + + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_OF) { + ADDI(s5, s5, -1); + BNEZ_MARK(s5); + SRLIxw(s3, s1, rex.w?63:31); + BEXTI(s4, xFlags, F_OF2); + XOR(s3, s3, s4); + ANDI(xFlags, xFlags, ~(1<<F_OF2)); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + MARK; + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4) +{ int64_t j64; CLEAR_FLAGS(); IFX(X_PEND) { @@ -703,9 +754,8 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int SUB(s3, s3, s5); IFX(X_CF) { SRL(s4, s1, s3); - ANDI(s4, s4, 1); - BEQZ(s4, 8); - ORI(xFlags, xFlags, 1 << F_CF); + ANDI(s4, s4, 1); // LSB == F_CF + OR(xFlags, xFlags, s4); } IFX(X_OF) { SRLxw(s4, s1, rex.w?63:31); @@ -746,7 +796,8 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int } } -void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) { +void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) +{ c&=15; CLEAR_FLAGS(); IFX(X_PEND) { diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c index 04f23a70..2d063aba 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f.c @@ -365,8 +365,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FLD(d0, wback, fixedaddress+8*i); FCVTLD(x3, d0, RD_DYN); SEXT_W(x5, x3); - SUB(x5, x5, x3); - BEQZ(x5, 8); + BEQ(x5, x3, 8); LUI(x3, 0x80000); // INT32_MIN SW(x3, gback, gdoffset+4*i); } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 85ffd5e3..e1048260 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -685,6 +685,9 @@ // Branch to MARK if reg1==reg2 (use j64) #define BEQ_MARK(reg1, reg2) Bxx_gen(EQ, MARK, reg1, reg2) #define BEQ_MARKi(reg1, reg2, i) Bxx_geni(EQ, MARK, reg1, reg2, i) +// Branch to MARK if reg1==0 (use j64) +#define BEQZ_MARK(reg) BEQ_MARK(reg, xZR) +#define BEQZ_MARKi(reg, i) BEQ_MARKi(reg, xZR, i) // Branch to MARK if reg1!=reg2 (use j64) #define BNE_MARK(reg1, reg2) Bxx_gen(NE, MARK, reg1, reg2) #define BNE_MARKi(reg1, reg2, i) Bxx_geni(NE, MARK, reg1, reg2, i) @@ -1161,6 +1164,7 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr); #define emit_ror32c STEPNAME(emit_ror32c) #define emit_shrd32c STEPNAME(emit_shrd32c) #define emit_shld32c STEPNAME(emit_shld32c) +#define emit_shrd32 STEPNAME(emit_shld32) #define emit_shld32 STEPNAME(emit_shld32) #define emit_shld16c STEPNAME(emit_shld16c) #define emit_shrd16c STEPNAME(emit_shrd16c) @@ -1299,6 +1303,7 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5); +void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4); void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4); void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5); |