diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-03-17 17:46:30 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-03-17 10:46:30 +0100 |
| commit | 18888e404e4d91abfa597c5358570c3976c704e5 (patch) | |
| tree | 576af19aad335ccf9a76e5251c6abf65caf9976f /src | |
| parent | 394513971cf97619f34de6f84a5792d8ecd8f9c7 (diff) | |
| download | box64-18888e404e4d91abfa597c5358570c3976c704e5.tar.gz box64-18888e404e4d91abfa597c5358570c3976c704e5.zip | |
[RV64_DYNAREC] Minor D8..DF opcodes refactor (#2442)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d8.c | 435 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d9.c | 541 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_da.c | 367 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_db.c | 479 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_dc.c | 346 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_dd.c | 305 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_de.c | 282 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_df.c | 500 |
8 files changed, 1552 insertions, 1703 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_d8.c b/src/dynarec/rv64/dynarec_rv64_d8.c index 94503ea9..6dd32420 100644 --- a/src/dynarec/rv64/dynarec_rv64_d8.c +++ b/src/dynarec/rv64/dynarec_rv64_d8.c @@ -43,221 +43,224 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(v2); MAYUSE(v1); - switch (nextop) { - case 0xC0 ... 0xC7: - INST_NAME("FADD ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FADDS(v1, v1, v2); - } else { - FADDD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xC8 ... 0xCF: - INST_NAME("FMUL ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FMULS(v1, v1, v2); - } else { - FMULD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xD0 ... 0xD7: - INST_NAME("FCOM ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMD(v1, v2, x1, x2, x3, x4, x5); - } - break; - case 0xD8 ... 0xDF: - INST_NAME("FCOMP ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMD(v1, v2, x1, x2, x3, x4, x5); - } - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xE0 ... 0xE7: - INST_NAME("FSUB ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FSUBS(v1, v1, v2); - } else { - FSUBD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xE8 ... 0xEF: - INST_NAME("FSUBR ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FSUBS(v1, v2, v1); - } else { - FSUBD(v1, v2, v1); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xF0 ... 0xF7: - INST_NAME("FDIV ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FDIVS(v1, v1, v2); - } else { - FDIVD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xF8 ... 0xFF: - INST_NAME("FDIVR ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FDIVS(v1, v2, v1); - } else { - FDIVD(v1, v2, v1); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - - default: - switch ((nextop >> 3) & 7) { - case 0: - INST_NAME("FADD ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLW(s0, ed, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FADDS(v1, v1, s0); - } else { - FCVTDS(s0, s0); - FADDD(v1, v1, s0); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 1: - INST_NAME("FMUL ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLW(s0, ed, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FMULS(v1, v1, s0); - } else { - FCVTDS(s0, s0); - FMULD(v1, v1, s0); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 2: - INST_NAME("FCOM ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLW(s0, ed, fixedaddress); - if (ST_IS_F(0)) { - FCOMS(v1, s0, x1, x6, x3, x4, x5); - } else { - FCVTDS(s0, s0); - FCOMD(v1, s0, x1, x6, x3, x4, x5); - } - break; - case 3: - INST_NAME("FCOMP ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLW(s0, ed, fixedaddress); - if (ST_IS_F(0)) { - FCOMS(v1, s0, x1, x6, x3, x4, x5); - } else { - FCVTDS(s0, s0); - FCOMD(v1, s0, x1, x6, x3, x4, x5); - } - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 4: - INST_NAME("FSUB ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLW(s0, ed, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FSUBS(v1, v1, s0); - } else { - FCVTDS(s0, s0); - FSUBD(v1, v1, s0); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 5: - INST_NAME("FSUBR ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLW(s0, ed, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FSUBS(v1, s0, v1); - } else { - FCVTDS(s0, s0); - FSUBD(v1, s0, v1); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 6: - INST_NAME("FDIV ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLW(s0, ed, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FDIVS(v1, v1, s0); - } else { - FCVTDS(s0, s0); - FDIVD(v1, v1, s0); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 7: - INST_NAME("FDIVR ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLW(s0, ed, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - if (ST_IS_F(0)) { - FDIVS(v1, s0, v1); - } else { - FCVTDS(s0, s0); - FDIVD(v1, s0, v1); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - } - } + if (MODREG) + switch (nextop) { + case 0xC0 ... 0xC7: + INST_NAME("FADD ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FADDS(v1, v1, v2); + } else { + FADDD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xC8 ... 0xCF: + INST_NAME("FMUL ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FMULS(v1, v1, v2); + } else { + FMULD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xD0 ... 0xD7: + INST_NAME("FCOM ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMD(v1, v2, x1, x2, x3, x4, x5); + } + break; + case 0xD8 ... 0xDF: + INST_NAME("FCOMP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMD(v1, v2, x1, x2, x3, x4, x5); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xE0 ... 0xE7: + INST_NAME("FSUB ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FSUBS(v1, v1, v2); + } else { + FSUBD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xE8 ... 0xEF: + INST_NAME("FSUBR ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FSUBS(v1, v2, v1); + } else { + FSUBD(v1, v2, v1); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xF0 ... 0xF7: + INST_NAME("FDIV ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FDIVS(v1, v1, v2); + } else { + FDIVD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xF8 ... 0xFF: + INST_NAME("FDIVR ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FDIVS(v1, v2, v1); + } else { + FDIVD(v1, v2, v1); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + default: + DEFAULT; + break; + } + else + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("FADD ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FADDS(v1, v1, s0); + } else { + FCVTDS(s0, s0); + FADDD(v1, v1, s0); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 1: + INST_NAME("FMUL ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FMULS(v1, v1, s0); + } else { + FCVTDS(s0, s0); + FMULD(v1, v1, s0); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 2: + INST_NAME("FCOM ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLW(s0, ed, fixedaddress); + if (ST_IS_F(0)) { + FCOMS(v1, s0, x1, x6, x3, x4, x5); + } else { + FCVTDS(s0, s0); + FCOMD(v1, s0, x1, x6, x3, x4, x5); + } + break; + case 3: + INST_NAME("FCOMP ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLW(s0, ed, fixedaddress); + if (ST_IS_F(0)) { + FCOMS(v1, s0, x1, x6, x3, x4, x5); + } else { + FCVTDS(s0, s0); + FCOMD(v1, s0, x1, x6, x3, x4, x5); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 4: + INST_NAME("FSUB ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FSUBS(v1, v1, s0); + } else { + FCVTDS(s0, s0); + FSUBD(v1, v1, s0); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 5: + INST_NAME("FSUBR ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FSUBS(v1, s0, v1); + } else { + FCVTDS(s0, s0); + FSUBD(v1, s0, v1); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 6: + INST_NAME("FDIV ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FDIVS(v1, v1, s0); + } else { + FCVTDS(s0, s0); + FDIVD(v1, v1, s0); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 7: + INST_NAME("FDIVR ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + if (ST_IS_F(0)) { + FDIVS(v1, s0, v1); + } else { + FCVTDS(s0, s0); + FDIVD(v1, s0, v1); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + } return addr; } diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index 52c8ec10..f337a020 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -46,220 +46,201 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(v2); MAYUSE(j64); - switch (nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FLD STx"); - X87_PUSH_OR_FAIL(v2, dyn, ninst, x1, X87_ST(nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, (nextop & 7) + 1, X87_COMBINE(0, (nextop & 7) + 1)); - if (ST_IS_F(0)) { - FMVS(v2, v1); - } else { - FMVD(v2, v1); - } - break; + if (MODREG) + switch (nextop) { + case 0xC0 ... 0xC7: + INST_NAME("FLD STx"); + X87_PUSH_OR_FAIL(v2, dyn, ninst, x1, X87_ST(nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, (nextop & 7) + 1, X87_COMBINE(0, (nextop & 7) + 1)); + if (ST_IS_F(0)) { + FMVS(v2, v1); + } else { + FMVD(v2, v1); + } + break; - case 0xC8: - INST_NAME("FXCH ST0"); - break; - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - INST_NAME("FXCH STx"); - // swap the cache value, not the double value itself :p - x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7)); - x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7); - // should set C1 to 0 - break; + case 0xC8: + INST_NAME("FXCH ST0"); + break; + case 0xC9 ... 0xCF: + INST_NAME("FXCH STx"); + // swap the cache value, not the double value itself :p + x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7)); + x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7); + // should set C1 to 0 + break; - case 0xD0: - INST_NAME("FNOP"); - break; + case 0xD0: + INST_NAME("FNOP"); + break; - case 0xD8: - INST_NAME("FSTPNCE ST0, ST0"); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - INST_NAME("FSTPNCE ST0, STx"); - // copy the cache value for st0 to stx - x87_get_st_empty(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7)); - x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xE0: - INST_NAME("FCHS"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - if (ST_IS_F(0)) { - FNEGS(v1, v1); - } else { - FNEGD(v1, v1); - } - break; - case 0xE1: - INST_NAME("FABS"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - if (ST_IS_F(0)) { - FABSS(v1, v1); - } else { - FABSD(v1, v1); - } - break; + case 0xD8: + INST_NAME("FSTPNCE ST0, ST0"); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xD9 ... 0xDF: + INST_NAME("FSTPNCE ST0, STx"); + // copy the cache value for st0 to stx + x87_get_st_empty(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7)); + x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xE0: + INST_NAME("FCHS"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + if (ST_IS_F(0)) { + FNEGS(v1, v1); + } else { + FNEGD(v1, v1); + } + break; + case 0xE1: + INST_NAME("FABS"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + if (ST_IS_F(0)) { + FABSS(v1, v1); + } else { + FABSD(v1, v1); + } + break; - case 0xE4: - INST_NAME("FTST"); - DEFAULT - break; - case 0xE5: - INST_NAME("FXAM"); + case 0xE4: + INST_NAME("FTST"); + DEFAULT; + break; + case 0xE5: + INST_NAME("FXAM"); #if 1 - i1 = x87_get_current_cache(dyn, ninst, 0, EXT_CACHE_ST_D); - // value put in x14 - if (i1 == -1) { - if (fpu_is_st_freed(dyn, ninst, 0)) { - MOV32w(x4, 0b100000100000000); - B_MARK3_nocond; - } else { - // not in cache, so check Empty status and load it - i2 = -dyn->e.x87stack; - LWU(x3, xEmu, offsetof(x64emu_t, fpu_stack)); - if (i2) { - ADDI(x3, x3, i2); - } - MOV32w(x4, 0b100000100000000); // empty: C3,C2,C0 = 101 - BGE_MARK3(xZR, x3); - // x5 will be the actual top - LWU(x5, xEmu, offsetof(x64emu_t, top)); - if (i2) { - ADDI(x5, x5, i2); - ANDI(x5, x5, 7); // (emu->top + i)&7 - } - // load x2 with ST0 anyway, for sign extraction - if (rv64_zba) - SH3ADD(x1, x5, xEmu); - else { - SLLI(x5, x5, 3); - ADD(x1, xEmu, x5); - } - LD(x2, x1, offsetof(x64emu_t, x87)); - // load tag - if (i2 >= 0) { - LHU(x3, xEmu, offsetof(x64emu_t, fpu_tags)); - if (i2 > 0) { - LUI(x5, 0xffff0); - OR(x3, x3, x5); - SRLI(x3, x3, i2 * 2); + i1 = x87_get_current_cache(dyn, ninst, 0, EXT_CACHE_ST_D); + // value put in x14 + if (i1 == -1) { + if (fpu_is_st_freed(dyn, ninst, 0)) { + MOV32w(x4, 0b100000100000000); + B_MARK3_nocond; + } else { + // not in cache, so check Empty status and load it + i2 = -dyn->e.x87stack; + LWU(x3, xEmu, offsetof(x64emu_t, fpu_stack)); + if (i2) { + ADDI(x3, x3, i2); + } + MOV32w(x4, 0b100000100000000); // empty: C3,C2,C0 = 101 + BGE_MARK3(xZR, x3); + // x5 will be the actual top + LWU(x5, xEmu, offsetof(x64emu_t, top)); + if (i2) { + ADDI(x5, x5, i2); + ANDI(x5, x5, 7); // (emu->top + i)&7 + } + // load x2 with ST0 anyway, for sign extraction + if (rv64_zba) + SH3ADD(x1, x5, xEmu); + else { + SLLI(x5, x5, 3); + ADD(x1, xEmu, x5); + } + LD(x2, x1, offsetof(x64emu_t, x87)); + // load tag + if (i2 >= 0) { + LHU(x3, xEmu, offsetof(x64emu_t, fpu_tags)); + if (i2 > 0) { + LUI(x5, 0xffff0); + OR(x3, x3, x5); + SRLI(x3, x3, i2 * 2); + } + ANDI(x3, x3, 0b11); + BNEZ_MARK3(x3); // empty: C3,C2,C0 = 101 } - ANDI(x3, x3, 0b11); - BNEZ_MARK3(x3); // empty: C3,C2,C0 = 101 } + } else { + // simply move from cache reg to x2 + v1 = dyn->e.x87reg[i1]; + FMVXD(x2, v1); } - } else { - // simply move from cache reg to x2 - v1 = dyn->e.x87reg[i1]; - FMVXD(x2, v1); - } - // get exponant in x1 - SRLI(x1, x2, 20 + 32); - ANDI(x1, x1, 0x7ff); // 0x7ff - BNEZ_MARK(x1); // not zero or denormal - MOV64x(x3, 0x7fffffffffffffff); - AND(x1, x2, x3); - MOV32w(x4, 0b100000000000000); // Zero: C3,C2,C0 = 100 - BEQZ_MARK3(x1); - MOV32w(x4, 0b100010000000000); // Denormal: C3,C2,C0 = 110 - B_MARK3_nocond; - MARK; - ADDI(x3, xZR, 0x7ff); // infinite/NaN? - MOV32w(x4, 0b000010000000000); // normal: C3,C2,C0 = 010 - BNE_MARK3(x1, x3); - SLLI(x3, x2, 12); - SRLI(x3, x3, 12); // and 0x000fffffffffffff - MOV32w(x4, 0b000010100000000); // infinity: C3,C2,C0 = 011 - BEQZ_MARK3(x3); - MOV32w(x4, 0b000000100000000); // NaN: C3,C2,C0 = 001 - MARK3; - // Extract signa & Update SW - SRLI(x1, x2, 63); - SLLI(x1, x1, 9); - OR(x4, x4, x1); // C1 - LHU(x1, xEmu, offsetof(x64emu_t, sw)); - MOV32w(x2, ~0b0100011100000000); - AND(x1, x1, x2); - OR(x4, x4, x1); - SH(x4, xEmu, offsetof(x64emu_t, sw)); + // get exponant in x1 + SRLI(x1, x2, 20 + 32); + ANDI(x1, x1, 0x7ff); // 0x7ff + BNEZ_MARK(x1); // not zero or denormal + MOV64x(x3, 0x7fffffffffffffff); + AND(x1, x2, x3); + MOV32w(x4, 0b100000000000000); // Zero: C3,C2,C0 = 100 + BEQZ_MARK3(x1); + MOV32w(x4, 0b100010000000000); // Denormal: C3,C2,C0 = 110 + B_MARK3_nocond; + MARK; + ADDI(x3, xZR, 0x7ff); // infinite/NaN? + MOV32w(x4, 0b000010000000000); // normal: C3,C2,C0 = 010 + BNE_MARK3(x1, x3); + SLLI(x3, x2, 12); + SRLI(x3, x3, 12); // and 0x000fffffffffffff + MOV32w(x4, 0b000010100000000); // infinity: C3,C2,C0 = 011 + BEQZ_MARK3(x3); + MOV32w(x4, 0b000000100000000); // NaN: C3,C2,C0 = 001 + MARK3; + // Extract signa & Update SW + SRLI(x1, x2, 63); + SLLI(x1, x1, 9); + OR(x4, x4, x1); // C1 + LHU(x1, xEmu, offsetof(x64emu_t, sw)); + MOV32w(x2, ~0b0100011100000000); + AND(x1, x1, x2); + OR(x4, x4, x1); + SH(x4, xEmu, offsetof(x64emu_t, sw)); #else - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_refresh(dyn, ninst, x1, x2, 0); - s0 = x87_stackcount(dyn, ninst, x1); - CALL(fpu_fxam, -1, 0, 0); // should be possible inline, but is it worth it? - x87_unstackcount(dyn, ninst, x1, s0); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_refresh(dyn, ninst, x1, x2, 0); + s0 = x87_stackcount(dyn, ninst, x1); + CALL(fpu_fxam, -1, 0, 0); // should be possible inline, but is it worth it? + x87_unstackcount(dyn, ninst, x1, s0); #endif - break; + break; - case 0xE8: - INST_NAME("FLD1"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F); - if (ST_IS_F(0)) { - MOV32w(x1, 0x3f800000); - FMVWX(v1, x1); - } else { - MOV64x(x1, 0x3FF0000000000000); - FMVDX(v1, x1); - } - break; - case 0xE9: - INST_NAME("FLDL2T"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); - FTABLE64(v1, L2T); - break; - case 0xEA: - INST_NAME("FLDL2E"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); - FTABLE64(v1, L2E); - break; - case 0xEB: - INST_NAME("FLDPI"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); - FTABLE64(v1, PI); - break; - case 0xEC: - INST_NAME("FLDLG2"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); - FTABLE64(v1, LG2); - break; - case 0xED: - INST_NAME("FLDLN2"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); - FTABLE64(v1, LN2); - break; - case 0xEE: - INST_NAME("FLDZ"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F); - if (ST_IS_F(0)) { - FMVWX(v1, xZR); - } else { - FMVDX(v1, xZR); - } - break; + case 0xE8: + INST_NAME("FLD1"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F); + if (ST_IS_F(0)) { + MOV32w(x1, 0x3f800000); + FMVWX(v1, x1); + } else { + MOV64x(x1, 0x3FF0000000000000); + FMVDX(v1, x1); + } + break; + case 0xE9: + INST_NAME("FLDL2T"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); + FTABLE64(v1, L2T); + break; + case 0xEA: + INST_NAME("FLDL2E"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); + FTABLE64(v1, L2E); + break; + case 0xEB: + INST_NAME("FLDPI"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); + FTABLE64(v1, PI); + break; + case 0xEC: + INST_NAME("FLDLG2"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); + FTABLE64(v1, LG2); + break; + case 0xED: + INST_NAME("FLDLN2"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); + FTABLE64(v1, LN2); + break; + case 0xEE: + INST_NAME("FLDZ"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F); + if (ST_IS_F(0)) + FMVWX(v1, xZR); + else + FMVDX(v1, xZR); + break; case 0xF0: INST_NAME("F2XM1"); @@ -455,90 +436,78 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x3, s0); break; - - - case 0xD1: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - case 0xE2: - case 0xE3: - case 0xE6: - case 0xE7: - case 0xEF: + default: DEFAULT; break; - - default: - switch ((nextop >> 3) & 7) { - case 0: - INST_NAME("FLD ST0, float[ED]"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLW(v1, ed, fixedaddress); - if (!ST_IS_F(0)) { - FCVTDS(v1, v1); - } - break; - case 2: - INST_NAME("FST float[ED], ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); - if (ST_IS_F(0)) - s0 = v1; - else { - s0 = fpu_get_scratch(dyn); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - FCVTSD(s0, v1); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - } - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FSW(s0, ed, fixedaddress); - break; - case 3: - INST_NAME("FSTP float[ED], ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - if (!ST_IS_F(0)) { - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); - FCVTSD(v1, v1); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - } - FSW(v1, ed, fixedaddress); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 4: - INST_NAME("FLDENV Ed"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE? - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - MOV32w(x2, 0); - CALL(fpu_loadenv, -1, ed, x2); - break; - case 5: - INST_NAME("FLDCW Ew"); - GETEW(x1, 0); - SH(x1, xEmu, offsetof(x64emu_t, cw)); // hopefully cw is not too far for an imm8 - break; - case 6: - INST_NAME("FNSTENV Ed"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE? - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - MOV32w(x2, 0); - CALL(fpu_savenv, -1, ed, x2); - break; - case 7: - INST_NAME("FNSTCW Ew"); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 0, 0); - ed = x1; - wb1 = 1; - LH(x1, xEmu, offsetof(x64emu_t, cw)); - EWBACK; - break; - default: - DEFAULT; - } - } + } + else + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("FLD ST0, float[ED]"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLW(v1, ed, fixedaddress); + if (!ST_IS_F(0)) { + FCVTDS(v1, v1); + } + break; + case 2: + INST_NAME("FST float[ED], ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); + if (ST_IS_F(0)) + s0 = v1; + else { + s0 = fpu_get_scratch(dyn); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + FCVTSD(s0, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + } + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FSW(s0, ed, fixedaddress); + break; + case 3: + INST_NAME("FSTP float[ED], ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + if (!ST_IS_F(0)) { + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + FCVTSD(v1, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + } + FSW(v1, ed, fixedaddress); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 4: + INST_NAME("FLDENV Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE? + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + MOV32w(x2, 0); + CALL(fpu_loadenv, -1, ed, x2); + break; + case 5: + INST_NAME("FLDCW Ew"); + GETEW(x1, 0); + SH(x1, xEmu, offsetof(x64emu_t, cw)); // hopefully cw is not too far for an imm8 + break; + case 6: + INST_NAME("FNSTENV Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE? + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + MOV32w(x2, 0); + CALL(fpu_savenv, -1, ed, x2); + break; + case 7: + INST_NAME("FNSTCW Ew"); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 0, 0); + ed = x1; + wb1 = 1; + LH(x1, xEmu, offsetof(x64emu_t, cw)); + EWBACK; + break; + default: + DEFAULT; + } return addr; } diff --git a/src/dynarec/rv64/dynarec_rv64_da.c b/src/dynarec/rv64/dynarec_rv64_da.c index 7609d877..165ae3c8 100644 --- a/src/dynarec/rv64/dynarec_rv64_da.c +++ b/src/dynarec/rv64/dynarec_rv64_da.c @@ -43,196 +43,187 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(ed); MAYUSE(j64); - switch (nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FCMOVB ST0, STx"); - READFLAGS(X_CF); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - ANDI(x1, xFlags, 1 << F_CF); - CBZ_NEXT(x1); - if (ST_IS_F(0)) - FMVS(v1, v2); - else - FMVD(v1, v2); - break; - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - INST_NAME("FCMOVE ST0, STx"); - READFLAGS(X_ZF); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - ANDI(x1, xFlags, 1 << F_ZF); - CBZ_NEXT(x1); - if (ST_IS_F(0)) - FMVS(v1, v2); - else - FMVD(v1, v2); - break; - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - INST_NAME("FCMOVBE ST0, STx"); - READFLAGS(X_CF | X_ZF); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)); - CBZ_NEXT(x1); - if (ST_IS_F(0)) - FMVS(v1, v2); - else - FMVD(v1, v2); - break; - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - INST_NAME("FCMOVU ST0, STx"); - READFLAGS(X_PF); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - ANDI(x1, xFlags, (1 << F_PF)); - CBZ_NEXT(x1); - if (ST_IS_F(0)) - FMVS(v1, v2); - else - FMVD(v1, v2); - break; - case 0xE9: - INST_NAME("FUCOMPP ST0, ST1"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, 1, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMD(v1, v2, x1, x2, x3, x4, x5); - } - X87_POP_OR_FAIL(dyn, ninst, x3); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xE4: - case 0xF0: - case 0xF1: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - case 0xF8: - case 0xF9: - case 0xFD: + if (MODREG) + switch (nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FCMOVB ST0, STx"); + READFLAGS(X_CF); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + ANDI(x1, xFlags, 1 << F_CF); + CBZ_NEXT(x1); + if (ST_IS_F(0)) + FMVS(v1, v2); + else + FMVD(v1, v2); + break; + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + INST_NAME("FCMOVE ST0, STx"); + READFLAGS(X_ZF); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + ANDI(x1, xFlags, 1 << F_ZF); + CBZ_NEXT(x1); + if (ST_IS_F(0)) + FMVS(v1, v2); + else + FMVD(v1, v2); + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + INST_NAME("FCMOVBE ST0, STx"); + READFLAGS(X_CF | X_ZF); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)); + CBZ_NEXT(x1); + if (ST_IS_F(0)) + FMVS(v1, v2); + else + FMVD(v1, v2); + break; + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + INST_NAME("FCMOVU ST0, STx"); + READFLAGS(X_PF); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + ANDI(x1, xFlags, (1 << F_PF)); + CBZ_NEXT(x1); + if (ST_IS_F(0)) + FMVS(v1, v2); + else + FMVD(v1, v2); + break; + case 0xE9: + INST_NAME("FUCOMPP ST0, ST1"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, 1, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMD(v1, v2, x1, x2, x3, x4, x5); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + default: DEFAULT; break; - - default: - switch ((nextop >> 3) & 7) { - case 0: - INST_NAME("FIADD ST0, Ed"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(x1, ed, fixedaddress); - FCVTDW(v2, x1, RD_RNE); // i32 -> double - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - FADDD(v1, v1, v2); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 1: - INST_NAME("FIMUL ST0, Ed"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(x1, ed, fixedaddress); - FCVTDW(v2, x1, RD_RNE); // i32 -> double - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - FMULD(v1, v1, v2); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 2: - INST_NAME("FICOM ST0, Ed"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(x1, ed, fixedaddress); - FCVTDW(v2, x1, RD_RNE); // i32 -> double - FCOMD(v1, v2, x1, x2, x3, x4, x5); - break; - case 3: - INST_NAME("FICOMP ST0, Ed"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(x1, ed, fixedaddress); - FCVTDW(v2, x1, RD_RNE); // i32 -> double - FCOMD(v1, v2, x1, x2, x3, x4, x5); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 4: - INST_NAME("FISUB ST0, Ed"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(x1, ed, fixedaddress); - FCVTDW(v2, x1, RD_RNE); // i32 -> double - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - FSUBD(v1, v1, v2); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 5: - INST_NAME("FISUBR ST0, Ed"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(x1, ed, fixedaddress); - FCVTDW(v2, x1, RD_RNE); // i32 -> double - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - FSUBD(v1, v2, v1); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 6: - INST_NAME("FIDIV ST0, Ed"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(x1, ed, fixedaddress); - FCVTDW(v2, x1, RD_RNE); // i32 -> double - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - FDIVD(v1, v1, v2); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 7: - INST_NAME("FIDIVR ST0, Ed"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(x1, ed, fixedaddress); - FCVTDW(v2, x1, RD_RNE); // i32 -> double - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - FDIVD(v1, v2, v1); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - } } + else + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("FIADD ST0, Ed"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(x1, ed, fixedaddress); + FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FADDD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 1: + INST_NAME("FIMUL ST0, Ed"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(x1, ed, fixedaddress); + FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FMULD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 2: + INST_NAME("FICOM ST0, Ed"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(x1, ed, fixedaddress); + FCVTDW(v2, x1, RD_RNE); // i32 -> double + FCOMD(v1, v2, x1, x2, x3, x4, x5); + break; + case 3: + INST_NAME("FICOMP ST0, Ed"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(x1, ed, fixedaddress); + FCVTDW(v2, x1, RD_RNE); // i32 -> double + FCOMD(v1, v2, x1, x2, x3, x4, x5); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 4: + INST_NAME("FISUB ST0, Ed"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(x1, ed, fixedaddress); + FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FSUBD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 5: + INST_NAME("FISUBR ST0, Ed"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(x1, ed, fixedaddress); + FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FSUBD(v1, v2, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 6: + INST_NAME("FIDIV ST0, Ed"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(x1, ed, fixedaddress); + FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FDIVD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 7: + INST_NAME("FIDIVR ST0, Ed"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(x1, ed, fixedaddress); + FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FDIVD(v1, v2, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + } return addr; } diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c index 37aafc65..45fcd1e0 100644 --- a/src/dynarec/rv64/dynarec_rv64_db.c +++ b/src/dynarec/rv64/dynarec_rv64_db.c @@ -44,254 +44,249 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(v1); MAYUSE(j64); - switch (nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FCMOVNB ST0, STx"); - READFLAGS(X_CF); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - ANDI(x1, xFlags, 1 << F_CF); - CBNZ_NEXT(x1); - if (ST_IS_F(0)) { - FMVS(v1, v2); - } else { - FMVD(v1, v2); // F_CF==0 - } - break; - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - INST_NAME("FCMOVNE ST0, STx"); - READFLAGS(X_ZF); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - ANDI(x1, xFlags, 1 << F_ZF); - CBNZ_NEXT(x1); - if (ST_IS_F(0)) { - FMVS(v1, v2); - } else { - FMVD(v1, v2); // F_ZF==0 - } - break; - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - INST_NAME("FCMOVNBE ST0, STx"); - READFLAGS(X_CF | X_ZF); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)); - CBNZ_NEXT(x1); - if (ST_IS_F(0)) { - FMVS(v1, v2); - } else { - FMVD(v1, v2); // F_CF==0 & F_ZF==0 - } - break; - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - INST_NAME("FCMOVNU ST0, STx"); - READFLAGS(X_PF); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - ANDI(x1, xFlags, 1 << F_PF); - CBNZ_NEXT(x1); - if (ST_IS_F(0)) { - FMVS(v1, v2); - } else { - FMVD(v1, v2); // F_PF==0 - } - break; - case 0xE1: - INST_NAME("FDISI8087_NOP"); // so.. NOP? - break; - case 0xE2: - INST_NAME("FNCLEX"); - LH(x2, xEmu, offsetof(x64emu_t, sw)); - ANDI(x2, x2, ~(0xff)); // IE .. PE, SF, ES - MOV32w(x1, ~(1 << 15)); // B - AND(x2, x2, x1); - SH(x2, xEmu, offsetof(x64emu_t, sw)); - break; - case 0xE3: - INST_NAME("FNINIT"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_purgecache(dyn, ninst, 0, x1, x2, x3); - CALL(reset_fpu, -1, 0, 0); - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - INST_NAME("FUCOMI ST0, STx"); - SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMIS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMID(v1, v2, x1, x2, x3, x4, x5); - } + if (MODREG) + switch (nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FCMOVNB ST0, STx"); + READFLAGS(X_CF); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + ANDI(x1, xFlags, 1 << F_CF); + CBNZ_NEXT(x1); + if (ST_IS_F(0)) { + FMVS(v1, v2); + } else { + FMVD(v1, v2); // F_CF==0 + } + break; + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + INST_NAME("FCMOVNE ST0, STx"); + READFLAGS(X_ZF); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + ANDI(x1, xFlags, 1 << F_ZF); + CBNZ_NEXT(x1); + if (ST_IS_F(0)) { + FMVS(v1, v2); + } else { + FMVD(v1, v2); // F_ZF==0 + } + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + INST_NAME("FCMOVNBE ST0, STx"); + READFLAGS(X_CF | X_ZF); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)); + CBNZ_NEXT(x1); + if (ST_IS_F(0)) { + FMVS(v1, v2); + } else { + FMVD(v1, v2); // F_CF==0 & F_ZF==0 + } + break; + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + INST_NAME("FCMOVNU ST0, STx"); + READFLAGS(X_PF); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + ANDI(x1, xFlags, 1 << F_PF); + CBNZ_NEXT(x1); + if (ST_IS_F(0)) { + FMVS(v1, v2); + } else { + FMVD(v1, v2); // F_PF==0 + } + break; + case 0xE1: + INST_NAME("FDISI8087_NOP"); // so.. NOP? + break; + case 0xE2: + INST_NAME("FNCLEX"); + LH(x2, xEmu, offsetof(x64emu_t, sw)); + ANDI(x2, x2, ~(0xff)); // IE .. PE, SF, ES + MOV32w(x1, ~(1 << 15)); // B + AND(x2, x2, x1); + SH(x2, xEmu, offsetof(x64emu_t, sw)); + break; + case 0xE3: + INST_NAME("FNINIT"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_purgecache(dyn, ninst, 0, x1, x2, x3); + CALL(reset_fpu, -1, 0, 0); + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + INST_NAME("FUCOMI ST0, STx"); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMIS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMID(v1, v2, x1, x2, x3, x4, x5); + } - break; - case 0xF0: - case 0xF1: - case 0xF2: - case 0xF3: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - INST_NAME("FCOMI ST0, STx"); - SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMIS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMID(v1, v2, x1, x2, x3, x4, x5); - } - break; - - case 0xE0: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - DEFAULT; - break; - - default: - switch ((nextop >> 3) & 7) { - case 0: - INST_NAME("FILD ST0, Ed"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(x1, ed, fixedaddress); - FCVTDW(v1, x1, RD_RNE); // i32 -> double - break; - case 1: - INST_NAME("FISTTP Ed, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); - if (!BOX64ENV(dynarec_fastround)) { - FSFLAGSI(0); // reset all bits - } - FCVTWD(x4, v1, RD_RTZ); - if (!BOX64ENV(dynarec_fastround)) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, 1 << FR_NV); - BEQZ_MARK(x5); - MOV32w(x4, 0x80000000); - MARK; - } - SW(x4, wback, fixedaddress); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 2: - INST_NAME("FIST Ed, ST0"); - DEFAULT; - break; - case 3: - INST_NAME("FISTP Ed, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - u8 = x87_setround(dyn, ninst, x1, x2); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); - v2 = fpu_get_scratch(dyn); - if (!BOX64ENV(dynarec_fastround)) { - FSFLAGSI(0); // reset all bits - } - FCVTWD(x4, v1, RD_DYN); - x87_restoreround(dyn, ninst, u8); - if (!BOX64ENV(dynarec_fastround)) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, 1 << FR_NV); - BEQ_MARK2(x5, xZR); - MOV32w(x4, 0x80000000); - } - MARK2; - SW(x4, wback, fixedaddress); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 5: - INST_NAME("FLD tbyte"); + break; + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + INST_NAME("FCOMI ST0, STx"); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMIS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMID(v1, v2, x1, x2, x3, x4, x5); + } + break; + default: + DEFAULT; + break; + } + else + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("FILD ST0, Ed"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(x1, ed, fixedaddress); + FCVTDW(v1, x1, RD_RNE); // i32 -> double + break; + case 1: + INST_NAME("FISTTP Ed, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); + if (!BOX64ENV(dynarec_fastround)) { + FSFLAGSI(0); // reset all bits + } + FCVTWD(x4, v1, RD_RTZ); + if (!BOX64ENV(dynarec_fastround)) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1 << FR_NV); + BEQZ_MARK(x5); + MOV32w(x4, 0x80000000); + MARK; + } + SW(x4, wback, fixedaddress); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 2: + INST_NAME("FIST Ed, ST0"); + DEFAULT; + break; + case 3: + INST_NAME("FISTP Ed, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + u8 = x87_setround(dyn, ninst, x1, x2); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); + v2 = fpu_get_scratch(dyn); + if (!BOX64ENV(dynarec_fastround)) { + FSFLAGSI(0); // reset all bits + } + FCVTWD(x4, v1, RD_DYN); + x87_restoreround(dyn, ninst, u8); + if (!BOX64ENV(dynarec_fastround)) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1 << FR_NV); + BEQ_MARK2(x5, xZR); + MOV32w(x4, 0x80000000); + } + MARK2; + SW(x4, wback, fixedaddress); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 5: + INST_NAME("FLD tbyte"); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0); + if ((PK(0) == 0xDB && ((PK(1) >> 3) & 7) == 7) || (!rex.is32bits && PK(0) >= 0x40 && PK(0) <= 0x4f && PK(1) == 0xDB && ((PK(2) >> 3) & 7) == 7)) { + // the FLD is immediatly followed by an FSTP + LD(x5, ed, fixedaddress + 0); + LH(x6, ed, fixedaddress + 8); + // no persistant scratch register, so unrool both instruction here... + MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n"); + nextop = F8; // 0xDB or rex + if (!rex.is32bits && nextop >= 0x40 && nextop <= 0x4f) { + rex.rex = nextop; + nextop = F8; // 0xDB + } else + rex.rex = 0; + nextop = F8; // modrm addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0); - if ((PK(0) == 0xDB && ((PK(1) >> 3) & 7) == 7) || (!rex.is32bits && PK(0) >= 0x40 && PK(0) <= 0x4f && PK(1) == 0xDB && ((PK(2) >> 3) & 7) == 7)) { - // the FLD is immediatly followed by an FSTP - LD(x5, ed, fixedaddress + 0); - LH(x6, ed, fixedaddress + 8); - // no persistant scratch register, so unrool both instruction here... - MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n"); - nextop = F8; // 0xDB or rex - if (!rex.is32bits && nextop >= 0x40 && nextop <= 0x4f) { - rex.rex = nextop; - nextop = F8; // 0xDB - } else - rex.rex = 0; - nextop = F8; // modrm - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0); - SD(x5, ed, fixedaddress + 0); - SH(x6, ed, fixedaddress + 8); - } else { - if (BOX64ENV(x87_no80bits)) { - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); - FLD(v1, ed, fixedaddress); - } else { - ADDI(x1, ed, fixedaddress); - X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x3); - // sync top - s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fld, -1, x1, 0); - // go back with the top & stack counter - x87_unstackcount(dyn, ninst, x3, s0); - } - } - break; - case 7: - INST_NAME("FSTP tbyte"); + SD(x5, ed, fixedaddress + 0); + SH(x6, ed, fixedaddress + 8); + } else { if (BOX64ENV(x87_no80bits)) { - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FSD(v1, wback, fixedaddress); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); + FLD(v1, ed, fixedaddress); } else { - x87_forget(dyn, ninst, x1, x3, 0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + ADDI(x1, ed, fixedaddress); + X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x3); + // sync top s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fstp, -1, ed, 0); + CALL(native_fld, -1, x1, 0); + // go back with the top & stack counter x87_unstackcount(dyn, ninst, x3, s0); } - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - default: - DEFAULT; - } - } + } + break; + case 7: + INST_NAME("FSTP tbyte"); + if (BOX64ENV(x87_no80bits)) { + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FSD(v1, wback, fixedaddress); + } else { + x87_forget(dyn, ninst, x1, x3, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + s0 = x87_stackcount(dyn, ninst, x3); + CALL(native_fstp, -1, ed, 0); + x87_unstackcount(dyn, ninst, x3, s0); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + default: + DEFAULT; + } return addr; } diff --git a/src/dynarec/rv64/dynarec_rv64_dc.c b/src/dynarec/rv64/dynarec_rv64_dc.c index 309abde8..3346e6de 100644 --- a/src/dynarec/rv64/dynarec_rv64_dc.c +++ b/src/dynarec/rv64/dynarec_rv64_dc.c @@ -39,180 +39,184 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(v2); MAYUSE(v1); - switch (nextop) { - case 0xC0 ... 0xC7: - INST_NAME("FADD STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FADDS(v1, v1, v2); - } else { - FADDD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xC8 ... 0xCF: - INST_NAME("FMUL STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FMULS(v1, v1, v2); - } else { - FMULD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xD0 ... 0xD7: - INST_NAME("FCOM ST0, STx"); // yep - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMD(v1, v2, x1, x2, x3, x4, x5); - } - break; - case 0xD8 ... 0xDF: - INST_NAME("FCOMP ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMD(v1, v2, x1, x2, x3, x4, x5); - } - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xE0 ... 0xE7: - INST_NAME("FSUBR STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FSUBS(v1, v2, v1); - } else { - FSUBD(v1, v2, v1); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xE8 ... 0xEF: - INST_NAME("FSUB STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FSUBS(v1, v1, v2); - } else { - FSUBD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xF0 ... 0xF7: - INST_NAME("FDIVR STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FDIVS(v1, v2, v1); - } else { - FDIVD(v1, v2, v1); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 0xF8 ... 0xFF: - INST_NAME("FDIV STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FDIVS(v1, v1, v2); - } else { - FDIVD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - default: - switch ((nextop >> 3) & 7) { - case 0: - INST_NAME("FADD ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLD(v2, wback, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (MODREG) + switch (nextop) { + case 0xC0 ... 0xC7: + INST_NAME("FADD STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FADDS(v1, v1, v2); + } else { FADDD(v1, v1, v2); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 1: - INST_NAME("FMUL ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLD(v2, wback, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xC8 ... 0xCF: + INST_NAME("FMUL STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FMULS(v1, v1, v2); + } else { FMULD(v1, v1, v2); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 2: - INST_NAME("FCOM ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLD(v2, wback, fixedaddress); - FCOMD(v1, v2, x1, x6, x3, x4, x5); - break; - case 3: - INST_NAME("FCOMP ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLD(v2, wback, fixedaddress); - FCOMD(v1, v2, x1, x6, x3, x4, x5); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 4: - INST_NAME("FSUB ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLD(v2, wback, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - FSUBD(v1, v1, v2); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 5: - INST_NAME("FSUBR ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLD(v2, wback, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xD0 ... 0xD7: + INST_NAME("FCOM ST0, STx"); // yep + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMD(v1, v2, x1, x2, x3, x4, x5); + } + break; + case 0xD8 ... 0xDF: + INST_NAME("FCOMP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMD(v1, v2, x1, x2, x3, x4, x5); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xE0 ... 0xE7: + INST_NAME("FSUBR STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FSUBS(v1, v2, v1); + } else { FSUBD(v1, v2, v1); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 6: - INST_NAME("FDIV ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLD(v2, wback, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - FDIVD(v1, v1, v2); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - case 7: - INST_NAME("FDIVR ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLD(v2, wback, fixedaddress); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xE8 ... 0xEF: + INST_NAME("FSUB STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FSUBS(v1, v1, v2); + } else { + FSUBD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xF0 ... 0xF7: + INST_NAME("FDIVR STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FDIVS(v1, v2, v1); + } else { FDIVD(v1, v2, v1); - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - break; - } - } + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 0xF8 ... 0xFF: + INST_NAME("FDIV STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FDIVS(v1, v1, v2); + } else { + FDIVD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + default: + DEFAULT; + break; + } + else + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("FADD ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FADDD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 1: + INST_NAME("FMUL ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FMULD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 2: + INST_NAME("FCOM ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLD(v2, wback, fixedaddress); + FCOMD(v1, v2, x1, x6, x3, x4, x5); + break; + case 3: + INST_NAME("FCOMP ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLD(v2, wback, fixedaddress); + FCOMD(v1, v2, x1, x6, x3, x4, x5); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 4: + INST_NAME("FSUB ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FSUBD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 5: + INST_NAME("FSUBR ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FSUBD(v1, v2, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 6: + INST_NAME("FDIV ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FDIVD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + case 7: + INST_NAME("FDIVR ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + FDIVD(v1, v2, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + break; + } return addr; } diff --git a/src/dynarec/rv64/dynarec_rv64_dd.c b/src/dynarec/rv64/dynarec_rv64_dd.c index 06e2ea98..4a1e527d 100644 --- a/src/dynarec/rv64/dynarec_rv64_dd.c +++ b/src/dynarec/rv64/dynarec_rv64_dd.c @@ -42,188 +42,131 @@ uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(v1); MAYUSE(j64); - switch (nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FFREE STx"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_purgecache(dyn, ninst, 0, x1, x2, x3); - MOV32w(x1, nextop & 7); - CALL(fpu_do_free, -1, x1, 0); - break; - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - INST_NAME("FST ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FMVS(v2, v1); - } else { - FMVD(v2, v1); - } - break; - case 0xD8: - INST_NAME("FSTP ST0, ST0"); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - INST_NAME("FSTP ST0, STx"); - // copy the cache value for st0 to stx - x87_get_st_empty(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7)); - x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); - x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - INST_NAME("FUCOM ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMD(v1, v2, x1, x2, x3, x4, x5); - } - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - INST_NAME("FUCOMP ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMD(v1, v2, x1, x2, x3, x4, x5); - } - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - case 0xF0: - case 0xF1: - case 0xF2: - case 0xF3: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - case 0xF8: - case 0xF9: - case 0xFA: - case 0xFB: - case 0xFC: - case 0xFD: - case 0xFE: - case 0xFF: - DEFAULT; - break; - - default: - switch ((nextop >> 3) & 7) { - case 0: - INST_NAME("FLD double"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FLD(v1, wback, fixedaddress); - break; - case 1: - INST_NAME("FISTTP i64, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); - if (ST_IS_I64(0)) { - FSD(v1, wback, fixedaddress); - } else { - if (!BOX64ENV(dynarec_fastround)) { - FSFLAGSI(0); // reset all bits - } - FCVTLD(x4, v1, RD_RTZ); - if (!BOX64ENV(dynarec_fastround)) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, 1 << FR_NV); - BEQZ_MARK(x5); - MOV64x(x4, 0x8000000000000000); - MARK; - } - SD(x4, wback, fixedaddress); - } - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 2: - INST_NAME("FST double"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + if (MODREG) + switch (nextop) { + case 0xC0 ... 0xC7: + INST_NAME("FFREE STx"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_purgecache(dyn, ninst, 0, x1, x2, x3); + MOV32w(x1, nextop & 7); + CALL(fpu_do_free, -1, x1, 0); + break; + case 0xD0 ... 0xD7: + INST_NAME("FST ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FMVS(v2, v1); + } else { + FMVD(v2, v1); + } + break; + case 0xD8: + INST_NAME("FSTP ST0, ST0"); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xD9 ... 0xDF: + INST_NAME("FSTP ST0, STx"); + // copy the cache value for st0 to stx + x87_get_st_empty(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7)); + x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xE0 ... 0xE7: + INST_NAME("FUCOM ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMD(v1, v2, x1, x2, x3, x4, x5); + } + break; + case 0xE8 ... 0xEF: + INST_NAME("FUCOMP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMD(v1, v2, x1, x2, x3, x4, x5); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + default: + DEFAULT; + break; + } + else + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("FLD double"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLD(v1, wback, fixedaddress); + break; + case 1: + INST_NAME("FISTTP i64, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); + if (ST_IS_I64(0)) { FSD(v1, wback, fixedaddress); - break; - case 3: - INST_NAME("FSTP double"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - FSD(v1, wback, fixedaddress); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 6: - INST_NAME("FSAVE m108byte"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - fpu_purgecache(dyn, ninst, 0, x1, x2, x3); - addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0); - CALL(native_fsave, -1, ed, 0); - break; - case 7: - INST_NAME("FNSTSW m2byte"); - fpu_purgecache(dyn, ninst, 0, x1, x2, x3); - addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0); - LWU(x2, xEmu, offsetof(x64emu_t, top)); - LHU(x3, xEmu, offsetof(x64emu_t, sw)); - if (dyn->e.x87stack) { - // update top - ADDI(x2, x2, -dyn->e.x87stack); - ANDI(x2, x2, 7); + } else { + if (!BOX64ENV(dynarec_fastround)) { + FSFLAGSI(0); // reset all bits + } + FCVTLD(x4, v1, RD_RTZ); + if (!BOX64ENV(dynarec_fastround)) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1 << FR_NV); + BEQZ_MARK(x5); + MOV64x(x4, 0x8000000000000000); + MARK; } - MOV32w(x5, ~0x3800); - AND(x3, x3, x5); // mask out TOP - SLLI(x2, x2, 11); // shift TOP to bit 11 - OR(x3, x3, x2); // inject TOP - SH(x3, ed, fixedaddress); // store whole sw flags - break; - default: - DEFAULT; - } - } + SD(x4, wback, fixedaddress); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 2: + INST_NAME("FST double"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FSD(v1, wback, fixedaddress); + break; + case 3: + INST_NAME("FSTP double"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FSD(v1, wback, fixedaddress); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 6: + INST_NAME("FSAVE m108byte"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0); + CALL(native_fsave, -1, ed, 0); + break; + case 7: + INST_NAME("FNSTSW m2byte"); + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0); + LWU(x2, xEmu, offsetof(x64emu_t, top)); + LHU(x3, xEmu, offsetof(x64emu_t, sw)); + if (dyn->e.x87stack) { + // update top + ADDI(x2, x2, -dyn->e.x87stack); + ANDI(x2, x2, 7); + } + MOV32w(x5, ~0x3800); + AND(x3, x3, x5); // mask out TOP + SLLI(x2, x2, 11); // shift TOP to bit 11 + OR(x3, x3, x2); // inject TOP + SH(x3, ed, fixedaddress); // store whole sw flags + break; + default: + DEFAULT; + } return addr; } diff --git a/src/dynarec/rv64/dynarec_rv64_de.c b/src/dynarec/rv64/dynarec_rv64_de.c index 91a7b0c5..d0952a29 100644 --- a/src/dynarec/rv64/dynarec_rv64_de.c +++ b/src/dynarec/rv64/dynarec_rv64_de.c @@ -38,173 +38,119 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(v2); MAYUSE(v1); - switch (nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FADDP STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FADDS(v1, v1, v2); - } else { - FADDD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - INST_NAME("FMULP STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FMULS(v1, v1, v2); - } else { - FMULD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - INST_NAME("FCOMP ST0, STx"); // yep - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMD(v1, v2, x1, x2, x3, x4, x5); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xD9: - INST_NAME("FCOMPP ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (ST_IS_F(0)) { - FCOMS(v1, v2, x1, x2, x3, x4, x5); - } else { - FCOMD(v1, v2, x1, x2, x3, x4, x5); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - X87_POP_OR_FAIL(dyn, ninst, x3); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - INST_NAME("FSUBRP STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FSUBS(v1, v2, v1); - } else { - FSUBD(v1, v2, v1); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - INST_NAME("FSUBP STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FSUBS(v1, v1, v2); - } else { - FSUBD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xF0: - case 0xF1: - case 0xF2: - case 0xF3: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - INST_NAME("FDIVRP STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FDIVS(v1, v2, v1); - } else { - FDIVD(v1, v2, v1); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xF8: - case 0xF9: - case 0xFA: - case 0xFB: - case 0xFC: - case 0xFD: - case 0xFE: - case 0xFF: - INST_NAME("FDIVP STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); - if (ST_IS_F(0)) { - FDIVS(v1, v1, v2); - } else { - FDIVD(v1, v1, v2); - } - if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xD8: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - DEFAULT; - break; - default: - switch ((nextop >> 3) & 7) { - default: - DEFAULT; - } - } + if (MODREG) + switch (nextop) { + case 0xC0 ... 0xC7: + INST_NAME("FADDP STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FADDS(v1, v1, v2); + } else { + FADDD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xC8 ... 0xCF: + INST_NAME("FMULP STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FMULS(v1, v1, v2); + } else { + FMULD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xD0 ... 0xD7: + INST_NAME("FCOMP ST0, STx"); // yep + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMD(v1, v2, x1, x2, x3, x4, x5); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xD9: + INST_NAME("FCOMPP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (ST_IS_F(0)) { + FCOMS(v1, v2, x1, x2, x3, x4, x5); + } else { + FCOMD(v1, v2, x1, x2, x3, x4, x5); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + X87_POP_OR_FAIL(dyn, ninst, x3); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xE0 ... 0xE7: + INST_NAME("FSUBRP STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FSUBS(v1, v2, v1); + } else { + FSUBD(v1, v2, v1); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xE8 ... 0xEF: + INST_NAME("FSUBP STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FSUBS(v1, v1, v2); + } else { + FSUBD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xF0 ... 0xF7: + INST_NAME("FDIVRP STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FDIVS(v1, v2, v1); + } else { + FDIVD(v1, v2, v1); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 0xF8 ... 0xFF: + INST_NAME("FDIVP STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); + if (ST_IS_F(0)) { + FDIVS(v1, v1, v2); + } else { + FDIVD(v1, v1, v2); + } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + default: + DEFAULT; + break; + } + else + switch ((nextop >> 3) & 7) { + default: + DEFAULT; + } return addr; } diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c index cbb75923..7d689d9b 100644 --- a/src/dynarec/rv64/dynarec_rv64_df.c +++ b/src/dynarec/rv64/dynarec_rv64_df.c @@ -40,275 +40,273 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(v1); MAYUSE(j64); - switch (nextop) { - case 0xC0 ... 0xC7: - INST_NAME("FFREEP STx"); - // not handling Tag... - X87_POP_OR_FAIL(dyn, ninst, x3); - break; + if (MODREG) + switch (nextop) { + case 0xC0 ... 0xC7: + INST_NAME("FFREEP STx"); + // not handling Tag... + X87_POP_OR_FAIL(dyn, ninst, x3); + break; - case 0xE0: - INST_NAME("FNSTSW AX"); - LWU(x2, xEmu, offsetof(x64emu_t, top)); - if (dyn->e.x87stack) { - ADDI(x2, x2, -dyn->e.x87stack); - ANDI(x2, x2, 0x7); - } - LHU(x1, xEmu, offsetof(x64emu_t, sw)); - MOV32w(x3, 0b1100011111111111); // mask - AND(x1, x1, x3); - SLLI(x2, x2, 11); - OR(x1, x1, x2); // inject top - SH(x1, xEmu, offsetof(x64emu_t, sw)); - SRLI(xRAX, xRAX, 16); - SLLI(xRAX, xRAX, 16); - OR(xRAX, xRAX, x1); - break; - case 0xE8 ... 0xF7: - if (nextop < 0xF0) { - INST_NAME("FUCOMIP ST0, STx"); - } else { - INST_NAME("FCOMIP ST0, STx"); - } - SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); - SET_DFNONE(); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); - CLEAR_FLAGS(); - IFX (X_ZF | X_PF | X_CF) { - if (ST_IS_F(0)) { - FEQS(x5, v1, v1); - FEQS(x4, v2, v2); - AND(x5, x5, x4); - BEQZ(x5, 24); // undefined/NaN - FEQS(x5, v1, v2); - BNEZ(x5, 24); // equal - FLTS(x3, v1, v2); // x3 = (v1<v2)?1:0 - OR(xFlags, xFlags, x3); // CF is the least significant bit - J(16); // end - // NaN - ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); - J(8); // end - // equal - ORI(xFlags, xFlags, 1 << F_ZF); - // end + case 0xE0: + INST_NAME("FNSTSW AX"); + LWU(x2, xEmu, offsetof(x64emu_t, top)); + if (dyn->e.x87stack) { + ADDI(x2, x2, -dyn->e.x87stack); + ANDI(x2, x2, 0x7); + } + LHU(x1, xEmu, offsetof(x64emu_t, sw)); + MOV32w(x3, 0b1100011111111111); // mask + AND(x1, x1, x3); + SLLI(x2, x2, 11); + OR(x1, x1, x2); // inject top + SH(x1, xEmu, offsetof(x64emu_t, sw)); + SRLI(xRAX, xRAX, 16); + SLLI(xRAX, xRAX, 16); + OR(xRAX, xRAX, x1); + break; + case 0xE8 ... 0xF7: + if (nextop < 0xF0) { + INST_NAME("FUCOMIP ST0, STx"); } else { - FEQD(x5, v1, v1); - FEQD(x4, v2, v2); - AND(x5, x5, x4); - BEQZ(x5, 24); // undefined/NaN - FEQD(x5, v1, v2); - BNEZ(x5, 24); // equal - FLTD(x3, v1, v2); // x3 = (v1<v2)?1:0 - OR(xFlags, xFlags, x3); // CF is the least significant bit - J(16); // end - // NaN - ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); - J(8); // end - // equal - ORI(xFlags, xFlags, 1 << F_ZF); - // end + INST_NAME("FCOMIP ST0, STx"); } - } - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 0xC8 ... 0xDF: - case 0xE1 ... 0xE7: - case 0xF8 ... 0xFF: - DEFAULT; - break; - - default: - switch ((nextop >> 3) & 7) { - case 0: - INST_NAME("FILD ST0, Ew"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); - LH(x1, wback, fixedaddress); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + SET_DFNONE(); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + CLEAR_FLAGS(); + IFX (X_ZF | X_PF | X_CF) { if (ST_IS_F(0)) { - FCVTSL(v1, x1, RD_RNE); + FEQS(x5, v1, v1); + FEQS(x4, v2, v2); + AND(x5, x5, x4); + BEQZ(x5, 24); // undefined/NaN + FEQS(x5, v1, v2); + BNEZ(x5, 24); // equal + FLTS(x3, v1, v2); // x3 = (v1<v2)?1:0 + OR(xFlags, xFlags, x3); // CF is the least significant bit + J(16); // end + // NaN + ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); + J(8); // end + // equal + ORI(xFlags, xFlags, 1 << F_ZF); + // end } else { - FCVTDL(v1, x1, RD_RNE); - } - break; - case 1: - INST_NAME("FISTTP Ew, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); - if (!BOX64ENV(dynarec_fastround)) { - FSFLAGSI(0); // reset all bits + FEQD(x5, v1, v1); + FEQD(x4, v2, v2); + AND(x5, x5, x4); + BEQZ(x5, 24); // undefined/NaN + FEQD(x5, v1, v2); + BNEZ(x5, 24); // equal + FLTD(x3, v1, v2); // x3 = (v1<v2)?1:0 + OR(xFlags, xFlags, x3); // CF is the least significant bit + J(16); // end + // NaN + ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); + J(8); // end + // equal + ORI(xFlags, xFlags, 1 << F_ZF); + // end } - if (ST_IS_F(0)) { - FCVTWS(x4, v1, RD_RTZ); - } else { - FCVTWD(x4, v1, RD_RTZ); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + default: + DEFAULT; + break; + } + else + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("FILD ST0, Ew"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); + LH(x1, wback, fixedaddress); + if (ST_IS_F(0)) { + FCVTSL(v1, x1, RD_RNE); + } else { + FCVTDL(v1, x1, RD_RNE); + } + break; + case 1: + INST_NAME("FISTTP Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0); + if (!BOX64ENV(dynarec_fastround)) { + FSFLAGSI(0); // reset all bits + } + if (ST_IS_F(0)) { + FCVTWS(x4, v1, RD_RTZ); + } else { + FCVTWD(x4, v1, RD_RTZ); + } + if (!BOX64ENV(dynarec_fastround)) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1 << FR_NV); + BNEZ_MARK(x5); + SLLIW(x5, x4, 16); + SRAIW(x5, x5, 16); + BEQ_MARK2(x5, x4); + MARK; + MOV32w(x4, 0x8000); + } + MARK2; + SH(x4, wback, fixedaddress); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 2: + INST_NAME("FIST Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); + u8 = x87_setround(dyn, ninst, x1, x2); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); + if (!BOX64ENV(dynarec_fastround)) { + FSFLAGSI(0); // reset all bits + } + if (ST_IS_F(0)) { + FCVTWS(x4, v1, RD_DYN); + } else { + FCVTWD(x4, v1, RD_DYN); + } + x87_restoreround(dyn, ninst, u8); + if (!BOX64ENV(dynarec_fastround)) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1 << FR_NV); + BNEZ_MARK(x5); + SLLIW(x5, x4, 16); + SRAIW(x5, x5, 16); + BEQ_MARK2(x5, x4); + MARK; + MOV32w(x4, 0x8000); + } + MARK2; + SH(x4, wback, fixedaddress); + break; + case 3: + INST_NAME("FISTP Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); + u8 = x87_setround(dyn, ninst, x1, x2); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); + if (!BOX64ENV(dynarec_fastround)) { + FSFLAGSI(0); // reset all bits + } + if (ST_IS_F(0)) { + FCVTWS(x4, v1, RD_DYN); + } else { + FCVTWD(x4, v1, RD_DYN); + } + x87_restoreround(dyn, ninst, u8); + if (!BOX64ENV(dynarec_fastround)) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1 << FR_NV); + BNEZ_MARK(x5); + SLLIW(x5, x4, 16); + SRAIW(x5, x5, 16); + BEQ_MARK2(x5, x4); + MARK; + MOV32w(x4, 0x8000); + } + MARK2; + SH(x4, wback, fixedaddress); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 4: + INST_NAME("FBLD ST0, tbytes"); + X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + s0 = x87_stackcount(dyn, ninst, x3); + CALL(fpu_fbld, -1, ed, 0); + x87_unstackcount(dyn, ninst, x3, s0); + break; + case 5: + INST_NAME("FILD ST0, i64"); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_I64); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); + + if (ST_IS_I64(0)) { + FLD(v1, wback, fixedaddress); + } else { + LD(x1, wback, fixedaddress); + if (rex.is32bits) { + // need to also feed the STll stuff... + ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll)); + LWU(x5, xEmu, offsetof(x64emu_t, top)); + int a = 0 - dyn->e.x87stack; + if (a) { + ADDIW(x5, x5, a); + ANDI(x5, x5, 0x7); + } + SLLI(x5, x5, 4); // fpu_ll is 2 i64 + ADD(x5, x5, x4); + SD(x1, x5, 8); // ll } - if (!BOX64ENV(dynarec_fastround)) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, 1 << FR_NV); - BNEZ_MARK(x5); - SLLIW(x5, x4, 16); - SRAIW(x5, x5, 16); - BEQ_MARK2(x5, x4); - MARK; - MOV32w(x4, 0x8000); + FCVTDL(v1, x1, RD_DYN); + if (rex.is32bits) { + FSD(v1, x5, 0); // ref } - MARK2; - SH(x4, wback, fixedaddress); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 2: - INST_NAME("FIST Ew, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); + } + break; + case 6: + INST_NAME("FBSTP tbytes, ST0"); + x87_forget(dyn, ninst, x1, x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + s0 = x87_stackcount(dyn, ninst, x3); + CALL(fpu_fbst, -1, ed, 0); + x87_unstackcount(dyn, ninst, x3, s0); + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + case 7: + INST_NAME("FISTP i64, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64); + if (!ST_IS_I64(0)) { u8 = x87_setround(dyn, ninst, x1, x2); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); - if (!BOX64ENV(dynarec_fastround)) { - FSFLAGSI(0); // reset all bits - } - if (ST_IS_F(0)) { - FCVTWS(x4, v1, RD_DYN); - } else { - FCVTWD(x4, v1, RD_DYN); - } - x87_restoreround(dyn, ninst, u8); - if (!BOX64ENV(dynarec_fastround)) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, 1 << FR_NV); - BNEZ_MARK(x5); - SLLIW(x5, x4, 16); - SRAIW(x5, x5, 16); - BEQ_MARK2(x5, x4); + } + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); + + if (ST_IS_I64(0)) { + FSD(v1, wback, fixedaddress); + } else { + if (rex.is32bits) { + // need to check STll first... + ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll)); + LWU(x5, xEmu, offsetof(x64emu_t, top)); + int a = 0 - dyn->e.x87stack; + if (a) { + ADDIW(x5, x5, a); + ANDI(x5, x5, 0x7); + } + SLLI(x5, x5, 4); // fpu_ll is 2 i64 + ADD(x5, x5, x4); + FMVXD(x3, v1); + LD(x6, x5, 0); // ref + BNE_MARK(x6, x3); + LD(x6, x5, 8); // ll + SD(x6, wback, fixedaddress); + B_MARK3_nocond; MARK; - MOV32w(x4, 0x8000); } - MARK2; - SH(x4, wback, fixedaddress); - break; - case 3: - INST_NAME("FISTP Ew, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); - u8 = x87_setround(dyn, ninst, x1, x2); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); + if (!BOX64ENV(dynarec_fastround)) { FSFLAGSI(0); // reset all bits } - if (ST_IS_F(0)) { - FCVTWS(x4, v1, RD_DYN); - } else { - FCVTWD(x4, v1, RD_DYN); - } - x87_restoreround(dyn, ninst, u8); + FCVTLD(x4, v1, RD_DYN); if (!BOX64ENV(dynarec_fastround)) { FRFLAGS(x5); // get back FPSR to check the IOC bit ANDI(x5, x5, 1 << FR_NV); - BNEZ_MARK(x5); - SLLIW(x5, x4, 16); - SRAIW(x5, x5, 16); - BEQ_MARK2(x5, x4); - MARK; - MOV32w(x4, 0x8000); + BEQ_MARK2(x5, xZR); + MOV64x(x4, 0x8000000000000000LL); } MARK2; - SH(x4, wback, fixedaddress); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 4: - INST_NAME("FBLD ST0, tbytes"); - X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - s0 = x87_stackcount(dyn, ninst, x3); - CALL(fpu_fbld, -1, ed, 0); - x87_unstackcount(dyn, ninst, x3, s0); - break; - case 5: - INST_NAME("FILD ST0, i64"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_I64); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); - - if (ST_IS_I64(0)) { - FLD(v1, wback, fixedaddress); - } else { - LD(x1, wback, fixedaddress); - if (rex.is32bits) { - // need to also feed the STll stuff... - ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll)); - LWU(x5, xEmu, offsetof(x64emu_t, top)); - int a = 0 - dyn->e.x87stack; - if (a) { - ADDIW(x5, x5, a); - ANDI(x5, x5, 0x7); - } - SLLI(x5, x5, 4); // fpu_ll is 2 i64 - ADD(x5, x5, x4); - SD(x1, x5, 8); // ll - } - FCVTDL(v1, x1, RD_DYN); - if (rex.is32bits) { - FSD(v1, x5, 0); // ref - } - } - break; - case 6: - INST_NAME("FBSTP tbytes, ST0"); - x87_forget(dyn, ninst, x1, x2, 0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - s0 = x87_stackcount(dyn, ninst, x3); - CALL(fpu_fbst, -1, ed, 0); - x87_unstackcount(dyn, ninst, x3, s0); - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - case 7: - INST_NAME("FISTP i64, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64); - if (!ST_IS_I64(0)) { - u8 = x87_setround(dyn, ninst, x1, x2); - } - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); - - if (ST_IS_I64(0)) { - FSD(v1, wback, fixedaddress); - } else { - if (rex.is32bits) { - // need to check STll first... - ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll)); - LWU(x5, xEmu, offsetof(x64emu_t, top)); - int a = 0 - dyn->e.x87stack; - if (a) { - ADDIW(x5, x5, a); - ANDI(x5, x5, 0x7); - } - SLLI(x5, x5, 4); // fpu_ll is 2 i64 - ADD(x5, x5, x4); - FMVXD(x3, v1); - LD(x6, x5, 0); // ref - BNE_MARK(x6, x3); - LD(x6, x5, 8); // ll - SD(x6, wback, fixedaddress); - B_MARK3_nocond; - MARK; - } - - if (!BOX64ENV(dynarec_fastround)) { - FSFLAGSI(0); // reset all bits - } - FCVTLD(x4, v1, RD_DYN); - if (!BOX64ENV(dynarec_fastround)) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, 1 << FR_NV); - BEQ_MARK2(x5, xZR); - MOV64x(x4, 0x8000000000000000LL); - } - MARK2; - SD(x4, wback, fixedaddress); - MARK3; - x87_restoreround(dyn, ninst, u8); - } - X87_POP_OR_FAIL(dyn, ninst, x3); - break; - default: - DEFAULT; - break; - } - } + SD(x4, wback, fixedaddress); + MARK3; + x87_restoreround(dyn, ninst, u8); + } + X87_POP_OR_FAIL(dyn, ninst, x3); + break; + default: + DEFAULT; + break; + } return addr; } |