diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-03-31 09:11:54 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-03-31 09:11:54 +0200 |
| commit | 3623cb9785a1c7b593ebc65c42c23a4db981ada4 (patch) | |
| tree | 1f2a376d8eb416f2e0a9577ffc126241dad8e592 | |
| parent | 3f641c2b67ae210caa6ebe7a6e4253038dada6a5 (diff) | |
| download | box64-3623cb9785a1c7b593ebc65c42c23a4db981ada4.tar.gz box64-3623cb9785a1c7b593ebc65c42c23a4db981ada4.zip | |
Improved test17 (sse), and fixed NAN bordercase for PSQRTPD opcode
| -rw-r--r-- | src/emu/x64run660f.c | 10 | ||||
| -rw-r--r-- | tests/ref17.txt | 17 | ||||
| -rwxr-xr-x | tests/test17 | bin | 95272 -> 101568 bytes | |||
| -rw-r--r-- | tests/test17.c | 61 |
4 files changed, 83 insertions, 5 deletions
diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c index 148c99eb..09354192 100644 --- a/src/emu/x64run660f.c +++ b/src/emu/x64run660f.c @@ -794,8 +794,14 @@ int Run660F(x64emu_t *emu, rex_t rex) nextop = F8; GETEX(0); GETGX; - GX->d[0] = sqrt(EX->d[0]); - GX->d[1] = sqrt(EX->d[1]); + for (int i=0; i<2; ++i) { + #ifndef NOALIGN + if(EX->d[i]<0.0) // on x86, default nan are negative + GX->d[i] = -NAN; + else + #endif + GX->d[i] = sqrt(EX->d[i]); + } break; case 0x54: /* ANDPD Gx, Ex */ diff --git a/tests/ref17.txt b/tests/ref17.txt index 43882799..4ed0bbc0 100644 --- a/tests/ref17.txt +++ b/tests/ref17.txt @@ -181,3 +181,20 @@ pmovzxbq(0xffffffffffffffff 0x8000000000000000 ) = 0xff 0xff pmovzxwd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffff 0xffff 0x0 0x8000 pmovzxwq(0xffffffffffffffff 0x8000000000000000 ) = 0xffff 0xffff pmovzxdq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffff 0xffffffff +pminsd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x5 0xfffffffe +pmaxsd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x1 0x80000000 0x7fffffff 0x0 +pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 +pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 255) = 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 +pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 170) = 0xffff 0x7fff 0x7fff 0xffff 0x1 0x9000 0x3 0x8001 +pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 2) = 0xffff 0x7fff 0x7fff 0x0 0x1 0x2 0x3 0x8001 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0) = 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 2) = 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0xff 0x80 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 7) = 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 15) = 0x1 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 16) = 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +movmskpd(0xffffffffffffffff 0x8000000000000000 ) = 0x3 +psqrtpd(1 2 ) = 1 1.41421 +psqrtpd(0 -2 ) = 0 0xfff8000000000000 +psqrtpd(inf -inf ) = inf 0xfff8000000000000 +psqrtpd(0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 diff --git a/tests/test17 b/tests/test17 index b6be07fc..264d8d40 100755 --- a/tests/test17 +++ b/tests/test17 Binary files differdiff --git a/tests/test17.c b/tests/test17.c index d3992ea2..2f55b431 100644 --- a/tests/test17.c +++ b/tests/test17.c @@ -1,3 +1,5 @@ +// build with gcc -march=corei7 -O2 -g -msse -msse2 test17.c -o test17 +// and -m32 for 32bits version #include <string.h> #include <stdio.h> #include <stddef.h> @@ -103,10 +105,18 @@ const v128 b128_16 = {.u16 = { const v128 b128_32 = {.u32 = { 0x00000001, 0x80000000, 0x00000005, 0xfffffffe }}; +const v128 b128_64 = {.u64 = { + 0x0000000000000001LL, 0x8000000000000000LL +}}; const v128 c128_32 = {.u32 = { 0x00000001, 0x80000000, 0x80000005, 0x0000fffe }}; +const v128 a128_pd = {.d64 = { 1.0, 2.0}}; +const v128 b128_pd = {.d64 = { 0.0, -2.0}}; +const v128 c128_pd = {.d64 = { INFINITY, -INFINITY}}; +const v128 d128_pd = {.d64 = { NAN, -0.0}}; + void print_8(v128 v) { for(int i=0; i<16; ++i) printf("0x%x ", v.u8[i]); @@ -123,6 +133,20 @@ void print_64(v128 v) { for(int i=0; i<2; ++i) printf("0x%llx ", v.u64[i]); } +void print_ps(v128 v) { + for(int i=0; i<4; ++i) + if(isnanf(v.f32[i])) + printf("nan "); + else + printf("%g ", v.f32[i]); +} +void print_pd(v128 v) { + for(int i=0; i<2; ++i) + if(isnan(v.d64[i])) + printf("0x%llx ", v.u64[i]); + else + printf("%g ", v.d64[i]); +} int main(int argc, const char** argv) { @@ -226,13 +250,18 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); #define GO1(A, N, C) \ a128.mm = _mm_##A##_epi##N(a128_##N.mm); \ - printf("%s(", #C); print_##N(a128_##N); \ + printf("%s(", #C); print_##N(a128_##N); \ printf(") = "); print_##N(a128); printf("\n"); #define GO2(A, N, C, A1, A2) \ a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm); \ - printf("%s(", #C); print_##N(A1); \ + printf("%s(", #C); print_##N(A1); \ printf(", "); print_##N(A2); \ printf(") = "); print_##N(a128); printf("\n"); + #define GO2C(A, N, C, A1, A2, I) \ + a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm, I); \ + printf("%s(", #C); print_##N(A1); \ + printf(", "); print_##N(A2); \ + printf("%d) = ", I); print_##N(a128); printf("\n"); #define GO2i(A, A1, A2) \ i = _mm_##A##_si128(A1.mm, A2.mm); \ printf("p%s(", #A); print_64(A1); \ @@ -244,7 +273,15 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); printf(", "); print_##N(A2); \ printf(", "); print_##N(A3); \ printf(") = "); print_##N(a128); printf("\n"); - + #define GO1ipd(A, C, A1) \ + i = _mm_##A##_pd(A1.md); \ + printf("%s(", #C); print_64(A1); \ + printf(") = 0x%x\n", i); + #define GO1pd(A, C, A1) \ + a128.md = _mm_##A##_pd(A1.md); \ + printf("%s(", #C); print_pd(A1); \ + printf(") = "); print_pd(a128); printf("\n"); + GO2(shuffle, 8, pshufb, a128_8, b128_8) GO2(hadd, 16, phaddw, a128_16, b128_16) @@ -275,5 +312,23 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); GO1(cvtepu16, 32, pmovzxwd); GO1(cvtepu16, 64, pmovzxwq); GO1(cvtepu32, 64, pmovzxdq); + GO2(min, 32, pminsd, a128_32, b128_32) + GO2(max, 32, pmaxsd, a128_32, b128_32) + GO2C(blend, 16, pblendw, a128_16, b128_16, 0) + GO2C(blend, 16, pblendw, a128_16, b128_16, 0xff) + GO2C(blend, 16, pblendw, a128_16, b128_16, 0xaa) + GO2C(blend, 16, pblendw, a128_16, b128_16, 2) + GO2C(alignr, 8, palignr, a128_8, b128_8, 0) + GO2C(alignr, 8, palignr, a128_8, b128_8, 2) + GO2C(alignr, 8, palignr, a128_8, b128_8, 7) + GO2C(alignr, 8, palignr, a128_8, b128_8, 15) + GO2C(alignr, 8, palignr, a128_8, b128_8, 16) + GO2C(alignr, 8, palignr, a128_8, b128_8, 0xff) + GO1ipd(movemask, movmskpd, a128_64) + GO1pd(sqrt, psqrtpd, a128_pd) + GO1pd(sqrt, psqrtpd, b128_pd) + GO1pd(sqrt, psqrtpd, c128_pd) + GO1pd(sqrt, psqrtpd, d128_pd) + return 0; } |