diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-03-31 10:28:18 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-03-31 10:28:18 +0200 |
| commit | 716eb97af90b21ed1085c9c6e1eb8d132d9f3f18 (patch) | |
| tree | 5414a91c1be56719b37c54c1ffec9ace90be1a5d | |
| parent | 3623cb9785a1c7b593ebc65c42c23a4db981ada4 (diff) | |
| download | box64-716eb97af90b21ed1085c9c6e1eb8d132d9f3f18.tar.gz box64-716eb97af90b21ed1085c9c6e1eb8d132d9f3f18.zip | |
More test17 impovements, fixed NAN for mulpd ([DYNAREC] too, introducing BOX64_DYNAREC_FASTNAN env. var. to keep old faster behaviour selectable)
| -rwxr-xr-x | docs/USAGE.md | 5 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 17 | ||||
| -rw-r--r-- | src/emu/x64run660f.c | 15 | ||||
| -rwxr-xr-x | src/include/debug.h | 1 | ||||
| -rwxr-xr-x | src/main.c | 10 | ||||
| -rw-r--r-- | tests/ref17.txt | 36 | ||||
| -rwxr-xr-x | tests/test17 | bin | 101568 -> 117240 bytes | |||
| -rw-r--r-- | tests/test17.c | 19 |
8 files changed, 97 insertions, 6 deletions
diff --git a/docs/USAGE.md b/docs/USAGE.md index d9de9d93..052239b9 100755 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -131,6 +131,11 @@ Enable/Disable simulation of Strong Memory model * 1 : Enable some Memory Barrier when reading from memory (on some MOV opcode) to simulate Strong Memory Model while trying to limit performance impact (Default when libmonobdwgc-2.0.so is loaded) * 2 : Enable some Memory Barrier when reading from memory (on some MOV opcode) to simulate Strong Memory Model +#### BOX64_DYNAREC_FASTNAN +Enable/Disable generation of -NAN +* 0 : Generate -NAN like on x86 (Default.) +* 1 : Don't do anything special with NAN, to go as fast as possible (was default before this option exsted) + #### BOX64_LIBGL * libXXXX set the name for libGL (defaults to libGL.so.1). * /PATH/TO/libGLXXX : Sets the name and path for libGL diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index c739ee15..abd12c04 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -703,8 +703,21 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("MULPD Gx, Ex"); nextop = F8; GETEX(q0, 0); - GETGX(v0); - VFMULQD(v0, v0, q0); + GETGX(q1); + if(!box64_dynarec_fastnan) { + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + // check if any input value was NAN + VFMAXQD(v0, q0, q1); // propagate NAN + FCMEQQD(v0, v0, v0); // 0 if NAN, 1 if not NAN + } + VFMULQD(q1, q1, q0); + if(!box64_dynarec_fastnan) { + FCMEQQD(v1, q1, q1); // 0 => out is NAN + VBICQ(v1, v0, v1); // forget it in any input was a NAN already + VSHLQ_64(v1, v1, 63); // only keep the sign bit + VORRQ(q1, q1, v1); // NAN -> -NAN + } break; case 0x5A: INST_NAME("CVTPD2PS Gx, Ex"); diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c index 09354192..5db9994a 100644 --- a/src/emu/x64run660f.c +++ b/src/emu/x64run660f.c @@ -796,8 +796,8 @@ int Run660F(x64emu_t *emu, rex_t rex) GETGX; for (int i=0; i<2; ++i) { #ifndef NOALIGN - if(EX->d[i]<0.0) // on x86, default nan are negative - GX->d[i] = -NAN; + if(EX->d[i]<0.0) // on x86, default nan are negative + GX->d[i] = -NAN; // but input NAN are not touched (so sqrt(+nan) -> +nan) else #endif GX->d[i] = sqrt(EX->d[i]); @@ -843,8 +843,15 @@ int Run660F(x64emu_t *emu, rex_t rex) nextop = F8; GETEX(0); GETGX; - GX->d[0] *= EX->d[0]; - GX->d[1] *= EX->d[1]; + for(int i=0; i<2; ++i) { + #ifndef NOALIGN + // mul generate a -NAN only if doing (+/-)inf * (+/-)0 + if((isinf(GX->d[i]) && EX->d[i]==0.0) || (isinf(EX->d[i]) && GX->d[i]==0.0)) + GX->d[i] = -NAN; + else + #endif + GX->d[i] *= EX->d[i]; + } break; case 0x5A: /* CVTPD2PS Gx, Ex */ nextop = F8; diff --git a/src/include/debug.h b/src/include/debug.h index b5f08599..b8ce4995 100755 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -15,6 +15,7 @@ extern int box64_dynarec_forced; extern uintptr_t box64_nodynarec_start, box64_nodynarec_end; extern int box64_dynarec_bigblock; extern int box64_dynarec_strongmem; +extern int box64_dynarec_fastnan; #ifdef ARM64 extern int arm64_asimd; extern int arm64_aes; diff --git a/src/main.c b/src/main.c index 208c251d..92d23936 100755 --- a/src/main.c +++ b/src/main.c @@ -45,6 +45,7 @@ int box64_dynarec_dump = 0; int box64_dynarec_forced = 0; int box64_dynarec_bigblock = 1; int box64_dynarec_strongmem = 0; +int box64_dynarec_fastnan = 0; uintptr_t box64_nodynarec_start = 0; uintptr_t box64_nodynarec_end = 0; #ifdef ARM64 @@ -405,6 +406,15 @@ void LoadLogEnv() if(box64_dynarec_strongmem) printf_log(LOG_INFO, "Dynarec will try to emulate a strong memory model%s\n", (box64_dynarec_strongmem==1)?" with limited performance loss":""); } + p = getenv("BOX64_DYNAREC_FASTNAN"); + if(p) { + if(strlen(p)==1) { + if(p[0]>='0' && p[0]<='1') + box64_dynarec_fastnan = p[0]-'0'; + } + if(box64_dynarec_fastnan) + printf_log(LOG_INFO, "Dynarec will not try to normalize generated NAN\n"); + } p = getenv("BOX64_NODYNAREC"); if(p) { if (strchr(p,'-')) { diff --git a/tests/ref17.txt b/tests/ref17.txt index 4ed0bbc0..674f824d 100644 --- a/tests/ref17.txt +++ b/tests/ref17.txt @@ -198,3 +198,39 @@ psqrtpd(1 2 ) = 1 1.41421 psqrtpd(0 -2 ) = 0 0xfff8000000000000 psqrtpd(inf -inf ) = inf 0xfff8000000000000 psqrtpd(0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +andpd(1 2 , 0 -2 ) = 0 2 +andpd(0 -2 , inf -inf ) = 0 -2 +andpd(1 2 , 0x7ff8000000000000 -0 ) = 1 0 +andpd(0 -2 , 0x7ff8000000000000 -0 ) = 0 -0 +andpd(inf -inf , 0x7ff8000000000000 -0 ) = inf -0 +andpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +andnpd(1 2 , 0 -2 ) = 0 -0 +andnpd(0 -2 , inf -inf ) = inf 1 +andnpd(1 2 , 0x7ff8000000000000 -0 ) = 3 -0 +andnpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 +andnpd(inf -inf , 0x7ff8000000000000 -0 ) = 1.11254e-308 0 +andnpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0 0 +orpd(1 2 , 0 -2 ) = 1 -2 +orpd(0 -2 , inf -inf ) = inf -inf +orpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +orpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +orpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +orpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +xorpd(1 2 , 0 -2 ) = 1 -0 +xorpd(0 -2 , inf -inf ) = inf 1 +xorpd(1 2 , 0x7ff8000000000000 -0 ) = 3 -2 +xorpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +xorpd(inf -inf , 0x7ff8000000000000 -0 ) = 1.11254e-308 inf +xorpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0 0 +addpd(1 2 , 0 -2 ) = 1 0 +addpd(0 -2 , inf -inf ) = inf -inf +addpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +addpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +addpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +addpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +mulpd(1 2 , 0 -2 ) = 0 -4 +mulpd(0 -2 , inf -inf ) = 0xfff8000000000000 inf +mulpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +mulpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 +mulpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0xfff8000000000000 +mulpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 diff --git a/tests/test17 b/tests/test17 index 264d8d40..9c23e0a3 100755 --- a/tests/test17 +++ b/tests/test17 Binary files differdiff --git a/tests/test17.c b/tests/test17.c index 2f55b431..e885892d 100644 --- a/tests/test17.c +++ b/tests/test17.c @@ -281,6 +281,11 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); a128.md = _mm_##A##_pd(A1.md); \ printf("%s(", #C); print_pd(A1); \ printf(") = "); print_pd(a128); printf("\n"); + #define GO2pd(A, C, A1, A2) \ + a128.md = _mm_##A##_pd(A1.md, A2.md); \ + printf("%s(", #C); print_pd(A1); \ + printf(", "); print_pd(A2); \ + printf(") = "); print_pd(a128); printf("\n"); GO2(shuffle, 8, pshufb, a128_8, b128_8) @@ -329,6 +334,20 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); GO1pd(sqrt, psqrtpd, b128_pd) GO1pd(sqrt, psqrtpd, c128_pd) GO1pd(sqrt, psqrtpd, d128_pd) + #define MULITGO2pd(A, B) \ + GO2pd(A, B, a128_pd, b128_pd) \ + GO2pd(A, B, b128_pd, c128_pd) \ + GO2pd(A, B, a128_pd, d128_pd) \ + GO2pd(A, B, b128_pd, d128_pd) \ + GO2pd(A, B, c128_pd, d128_pd) \ + GO2pd(A, B, d128_pd, d128_pd) + MULITGO2pd(and, andpd) + MULITGO2pd(andnot, andnpd) + MULITGO2pd(or, orpd) + MULITGO2pd(xor, xorpd) + MULITGO2pd(add, addpd) + MULITGO2pd(mul, mulpd) return 0; } + |