diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-11-28 22:14:45 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-11-28 15:14:45 +0100 |
| commit | 843fe30e4622a361923a9a748a4262c91f8af630 (patch) | |
| tree | 03f40257f704243255336e72267bfbed6b855d10 /src | |
| parent | 0da561cd2034a2df700a050a288f92dfa2c79e40 (diff) | |
| download | box64-843fe30e4622a361923a9a748a4262c91f8af630.tar.gz box64-843fe30e4622a361923a9a748a4262c91f8af630.zip | |
[RV64] Added nan propagation emulation for interpreter and DynaRec (#2091)
* [RV64] Added nan propagation emulation for interpreter and DynaRec * oops
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f.c | 24 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f_vector.c | 108 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 39 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f_vector.c | 30 | ||||
| -rw-r--r-- | src/emu/modrm.h | 12 | ||||
| -rw-r--r-- | src/emu/x64run64.c | 2 | ||||
| -rw-r--r-- | src/emu/x64runf20f.c | 37 | ||||
| -rw-r--r-- | src/emu/x64runf30f.c | 7 |
8 files changed, 120 insertions, 139 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c index b2cae02d..6031d97d 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f.c @@ -211,7 +211,11 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FADDD(v0, v0, v1); if(!box64_dynarec_fastnan) { AND(x3, x3, x4); - CBZ_NEXT(x3); + BNEZ_MARK(x3); + CBNZ_NEXT(x4); + FMVD(v0, v1); + B_NEXT_nocond; + MARK; FEQD(x3, v0, v0); CBNZ_NEXT(x3); FNEGD(v0, v0); @@ -229,7 +233,11 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FMULD(v0, v0, v1); if(!box64_dynarec_fastnan) { AND(x3, x3, x4); - CBZ_NEXT(x3); + BNEZ_MARK(x3); + CBNZ_NEXT(x4); + FMVD(v0, v1); + B_NEXT_nocond; + MARK; FEQD(x3, v0, v0); CBNZ_NEXT(x3); FNEGD(v0, v0); @@ -260,7 +268,11 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FSUBD(v0, v0, v1); if(!box64_dynarec_fastnan) { AND(x3, x3, x4); - CBZ_NEXT(x3); + BNEZ_MARK(x3); + CBNZ_NEXT(x4); + FMVD(v0, v1); + B_NEXT_nocond; + MARK; FEQD(x3, v0, v0); CBNZ_NEXT(x3); FNEGD(v0, v0); @@ -293,7 +305,11 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FDIVD(v0, v0, v1); if(!box64_dynarec_fastnan) { AND(x3, x3, x4); - CBZ_NEXT(x3); + BNEZ_MARK(x3); + CBNZ_NEXT(x4); + FMVD(v0, v1); + B_NEXT_nocond; + MARK; FEQD(x3, v0, v0); CBNZ_NEXT(x3); FNEGD(v0, v0); diff --git a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c index 6c1678f6..34646907 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c @@ -243,6 +243,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x58: + if (!box64_dynarec_fastnan) return 0; INST_NAME("ADDSD Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); @@ -257,32 +258,11 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW64); } - if (box64_dynarec_fastnan) { - VECTOR_LOAD_VMASK(0b01, x4, 1); - VFADD_VV(v0, v0, v1, VECTOR_MASKED); - } else { - VFMV_F_S(v0, v0); - VFMV_F_S(v1, v1); - FEQD(x3, v0, v0); - FEQD(x4, v1, v1); - FADDD(v0, v0, v1); - AND(x3, x3, x4); - BEQZ_MARK(x3); - FEQD(x3, v0, v0); - BNEZ_MARK(x3); - FNEGD(v0, v0); - MARK; - if (rv64_xtheadvector) { - d0 = fpu_get_scratch(dyn); - VFMV_S_F(d0, v0); - VECTOR_LOAD_VMASK(0b01, x4, 1); - VMERGE_VVM(v0, v0, d0); // implies VMASK - } else { - VFMV_S_F(v0, v0); - } - } + VECTOR_LOAD_VMASK(0b01, x4, 1); + VFADD_VV(v0, v0, v1, VECTOR_MASKED); break; case 0x59: + if (!box64_dynarec_fastnan) return 0; INST_NAME("MULSD Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); @@ -297,30 +277,8 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW64); } - if (box64_dynarec_fastnan) { - VECTOR_LOAD_VMASK(0b01, x4, 1); - VFMUL_VV(v0, v0, v1, VECTOR_MASKED); - } else { - VFMV_F_S(v0, v0); - VFMV_F_S(v1, v1); - FEQD(x3, v0, v0); - FEQD(x4, v1, v1); - FMULD(v0, v0, v1); - AND(x3, x3, x4); - BEQZ_MARK(x3); - FEQD(x3, v0, v0); - BNEZ_MARK(x3); - FNEGD(v0, v0); - MARK; - if (rv64_xtheadvector) { - d0 = fpu_get_scratch(dyn); - VFMV_S_F(d0, v0); - VECTOR_LOAD_VMASK(0b01, x4, 1); - VMERGE_VVM(v0, v0, d0); // implies VMASK - } else { - VFMV_S_F(v0, v0); - } - } + VECTOR_LOAD_VMASK(0b01, x4, 1); + VFMUL_VV(v0, v0, v1, VECTOR_MASKED); break; case 0x5A: INST_NAME("CVTSD2SS Gx, Ex"); @@ -364,6 +322,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x5C: + if (!box64_dynarec_fastnan) return 0; INST_NAME("SUBSD Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); @@ -378,30 +337,8 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW64); } - if (box64_dynarec_fastnan) { - VECTOR_LOAD_VMASK(0b01, x4, 1); - VFSUB_VV(v0, v0, v1, VECTOR_MASKED); - } else { - VFMV_F_S(v0, v0); - VFMV_F_S(v1, v1); - FEQD(x3, v0, v0); - FEQD(x4, v1, v1); - FSUBD(v0, v0, v1); - AND(x3, x3, x4); - BEQZ_MARK(x3); - FEQD(x3, v0, v0); - BNEZ_MARK(x3); - FNEGD(v0, v0); - MARK; - if (rv64_xtheadvector) { - d0 = fpu_get_scratch(dyn); - VFMV_S_F(d0, v0); - VECTOR_LOAD_VMASK(0b01, x4, 1); - VMERGE_VVM(v0, v0, d0); // implies VMASK - } else { - VFMV_S_F(v0, v0); - } - } + VECTOR_LOAD_VMASK(0b01, x4, 1); + VFSUB_VV(v0, v0, v1, VECTOR_MASKED); break; case 0x5D: INST_NAME("MINSD Gx, Ex"); @@ -445,6 +382,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x5E: + if (!box64_dynarec_fastnan) return 0; INST_NAME("DIVSD Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); @@ -459,30 +397,8 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW64); } - if (!box64_dynarec_fastnan) { - VFMV_F_S(v0, v0); - VFMV_F_S(v1, v1); - FEQD(x3, v0, v0); - FEQD(x4, v1, v1); - FDIVD(v0, v0, v1); - AND(x3, x3, x4); - BEQZ_MARK(x3); - FEQD(x3, v0, v0); - BNEZ_MARK(x3); - FNEGD(v0, v0); - MARK; - if (rv64_xtheadvector) { - d0 = fpu_get_scratch(dyn); - VFMV_S_F(d0, v0); - VECTOR_LOAD_VMASK(0b01, x4, 1); - VMERGE_VVM(v0, v0, d0); // implies VMASK - } else { - VFMV_S_F(v0, v0); - } - } else { - VECTOR_LOAD_VMASK(0b01, x4, 1); - VFDIV_VV(v0, v0, v1, VECTOR_MASKED); - } + VECTOR_LOAD_VMASK(0b01, x4, 1); + VFDIV_VV(v0, v0, v1, VECTOR_MASKED); break; case 0x5F: INST_NAME("MAXSD Gx, Ex"); diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 3cf3e630..de0e21e5 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -209,7 +209,18 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGXSS(v0); GETEXSS(d0, 0); + if (!box64_dynarec_fastnan) { + FEQS(x3, v0, v0); + FEQS(x4, d0, d0); + } FADDS(v0, v0, d0); + if (!box64_dynarec_fastnan) { + AND(x3, x3, x4); + BNEZ_MARK(x3); + CBNZ_NEXT(x4); + FMVS(v0, d0); + MARK; + } break; case 0x59: INST_NAME("MULSS Gx, Ex"); @@ -223,7 +234,11 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FMULS(v0, v0, d0); if (!box64_dynarec_fastnan) { AND(x3, x3, x4); - CBZ_NEXT(x3); + BNEZ_MARK(x3); + CBNZ_NEXT(x4); + FMVS(v0, d0); + B_NEXT_nocond; + MARK; FEQS(x3, v0, v0); CBNZ_NEXT(x3); FNEGS(v0, v0); @@ -268,7 +283,18 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGXSS(v0); GETEXSS(d0, 0); + if (!box64_dynarec_fastnan) { + FEQS(x3, v0, v0); + FEQS(x4, d0, d0); + } FSUBS(v0, v0, d0); + if (!box64_dynarec_fastnan) { + AND(x3, x3, x4); + BNEZ_MARK(x3); + CBNZ_NEXT(x4); + FMVS(v0, d0); + MARK; + } break; case 0x5D: INST_NAME("MINSS Gx, Ex"); @@ -290,7 +316,18 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGXSS(v0); GETEXSS(d0, 0); + if (!box64_dynarec_fastnan) { + FEQS(x3, v0, v0); + FEQS(x4, d0, d0); + } FDIVS(v0, v0, d0); + if (!box64_dynarec_fastnan) { + AND(x3, x3, x4); + BNEZ_MARK(x3); + CBNZ_NEXT(x4); + FMVS(v0, d0); + MARK; + } break; case 0x5F: INST_NAME("MAXSS Gx, Ex"); diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c index 3288938d..7cd82dfa 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c @@ -277,6 +277,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VFRDIV_VF(v0, v1, v1, VECTOR_MASKED); break; case 0x58: + if (!box64_dynarec_fastnan) return 0; INST_NAME("ADDSS Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); @@ -295,6 +296,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VFADD_VV(v0, v0, v1, VECTOR_MASKED); break; case 0x59: + if (!box64_dynarec_fastnan) return 0; INST_NAME("MULSS Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); @@ -309,30 +311,8 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW32); } - if (box64_dynarec_fastnan) { - VECTOR_LOAD_VMASK(0b0001, x4, 1); - VFMUL_VV(v0, v0, v1, VECTOR_MASKED); - } else { - VFMV_F_S(v0, v0); - VFMV_F_S(v1, v1); - FEQS(x3, v0, v0); - FEQS(x4, v1, v1); - FMULS(v0, v0, v1); - AND(x3, x3, x4); - BEQZ_MARK(x3); - FEQS(x3, v0, v0); - BNEZ_MARK(x3); - FNEGS(v0, v0); - MARK; - if (rv64_xtheadvector) { - d0 = fpu_get_scratch(dyn); - VFMV_S_F(d0, v0); - VECTOR_LOAD_VMASK(0b0001, x4, 1); - VMERGE_VVM(v0, v0, d0); // implies VMASK - } else { - VFMV_S_F(v0, v0); - } - } + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VFMUL_VV(v0, v0, v1, VECTOR_MASKED); break; case 0x5A: INST_NAME("CVTSS2SD Gx, Ex"); @@ -377,6 +357,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x5C: + if (!box64_dynarec_fastnan) return 0; INST_NAME("SUBSS Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); @@ -436,6 +417,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x5E: + if (!box64_dynarec_fastnan) return 0; INST_NAME("DIVSS Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); diff --git a/src/emu/modrm.h b/src/emu/modrm.h index d62d13c6..edb391d5 100644 --- a/src/emu/modrm.h +++ b/src/emu/modrm.h @@ -119,6 +119,18 @@ #define MODREG ((nextop&0xC0)==0xC0) +#if defined(__riscv) +#define NAN_PROPAGATION(dest, src, break_or_continue) \ + if (isnan(dest)) { \ + break_or_continue; \ + } else if (isnan(src)) { \ + (dest) = (src); \ + break_or_continue; \ + } +#else +#define NAN_PROPAGATION(dest, src, break_or_continue) +#endif + #define GOCOND(BASE, PREFIX, COND, NOTCOND, POST)\ case BASE+0x0: \ PREFIX \ diff --git a/src/emu/x64run64.c b/src/emu/x64run64.c index 00278e3b..9523fbb8 100644 --- a/src/emu/x64run64.c +++ b/src/emu/x64run64.c @@ -210,6 +210,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr) nextop = F8; GETEX_OFFS(0, tlsdata); GETGX; + NAN_PROPAGATION(GX->f[0], EX->f[0], break); GX->f[0] += EX->f[0]; break; @@ -223,6 +224,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr) nextop = F8; GETEX_OFFS(0, tlsdata); GETGX; + NAN_PROPAGATION(GX->f[0], EX->f[0], break); GX->f[0] *= EX->f[0]; break; diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c index 097d0e32..bcc3a9e5 100644 --- a/src/emu/x64runf20f.c +++ b/src/emu/x64runf20f.c @@ -215,20 +215,26 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) if((isinf(GX->d[0]) && isinf(EX->d[0]) && (EX->q[0]&0x8000000000000000LL)!=(GX->q[0]&0x8000000000000000LL))) GX->d[0] = -NAN; else - #endif - GX->d[0] += EX->d[0]; +#endif + { + NAN_PROPAGATION(GX->d[0], EX->d[0], break); + GX->d[0] += EX->d[0]; + } break; case 0x59: /* MULSD Gx, Ex */ nextop = F8; _GETEX(0); GETGX; #ifndef NOALIGN - // mul generate a -NAN only if doing (+/-)inf * (+/-)0 - if((isinf(GX->d[0]) && EX->d[0]==0.0) || (isinf(EX->d[0]) && GX->d[0]==0.0)) - GX->d[0] = -NAN; - else - #endif - GX->d[0] *= EX->d[0]; + // mul generate a -NAN only if doing (+/-)inf * (+/-)0 + if ((isinf(GX->d[0]) && EX->d[0] == 0.0) || (isinf(EX->d[0]) && GX->d[0] == 0.0)) + GX->d[0] = -NAN; + else +#endif + { + NAN_PROPAGATION(GX->d[0], EX->d[0], break); + GX->d[0] *= EX->d[0]; + } break; case 0x5A: /* CVTSD2SS Gx, Ex */ nextop = F8; @@ -242,12 +248,15 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) _GETEX(0); GETGX; #ifndef NOALIGN - // sub generate a -NAN only if doing inf - inf - if((isinf(GX->d[0]) && isinf(EX->d[0]) && (EX->q[0]&0x8000000000000000LL)==(GX->q[0]&0x8000000000000000LL))) - GX->d[0] = -NAN; - else - #endif - GX->d[0] -= EX->d[0]; + // sub generate a -NAN only if doing inf - inf + if ((isinf(GX->d[0]) && isinf(EX->d[0]) && (EX->q[0] & 0x8000000000000000LL) == (GX->q[0] & 0x8000000000000000LL))) + GX->d[0] = -NAN; + else +#endif + { + NAN_PROPAGATION(GX->d[0], EX->d[0], break); + GX->d[0] -= EX->d[0]; + } break; case 0x5D: /* MINSD Gx, Ex */ nextop = F8; diff --git a/src/emu/x64runf30f.c b/src/emu/x64runf30f.c index 69f8fee7..eb260b4d 100644 --- a/src/emu/x64runf30f.c +++ b/src/emu/x64runf30f.c @@ -211,18 +211,21 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr) nextop = F8; GETEX(0); GETGX; + NAN_PROPAGATION(GX->f[0], EX->f[0], break); GX->f[0] = sqrtf(EX->f[0]); break; case 0x52: /* RSQRTSS Gx, Ex */ nextop = F8; GETEX(0); GETGX; + NAN_PROPAGATION(GX->f[0], EX->f[0], break); GX->f[0] = 1.0f/sqrtf(EX->f[0]); break; case 0x53: /* RCPSS Gx, Ex */ nextop = F8; GETEX(0); GETGX; + NAN_PROPAGATION(GX->f[0], EX->f[0], break); GX->f[0] = 1.0f/EX->f[0]; break; @@ -230,12 +233,14 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr) nextop = F8; GETEX(0); GETGX; + NAN_PROPAGATION(GX->f[0], EX->f[0], break); GX->f[0] += EX->f[0]; break; case 0x59: /* MULSS Gx, Ex */ nextop = F8; GETEX(0); GETGX; + NAN_PROPAGATION(GX->f[0], EX->f[0], break); GX->f[0] *= EX->f[0]; break; case 0x5A: /* CVTSS2SD Gx, Ex */ @@ -264,6 +269,7 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr) nextop = F8; GETEX(0); GETGX; + NAN_PROPAGATION(GX->f[0], EX->f[0], break); GX->f[0] -= EX->f[0]; break; case 0x5D: /* MINSS Gx, Ex */ @@ -277,6 +283,7 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr) nextop = F8; GETEX(0); GETGX; + NAN_PROPAGATION(GX->f[0], EX->f[0], break); GX->f[0] /= EX->f[0]; break; case 0x5F: /* MAXSS Gx, Ex */ |