diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-04-01 19:27:27 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-04-01 19:27:27 +0200 |
| commit | f2012fc6365c338b977a6e6a230e1d9d7c750d51 (patch) | |
| tree | 584792a0b26327fdde1da550b1b06e93587a2332 /tests | |
| parent | 16f82ba6b3a447fca0d9d1c56098cc1aace10d2c (diff) | |
| download | box64-f2012fc6365c338b977a6e6a230e1d9d7c750d51.tar.gz box64-f2012fc6365c338b977a6e6a230e1d9d7c750d51.zip | |
Added more sse2 opcode to test17, and added nan handling to SQRTSD and MULSD ([DYNAREC] too)
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/ref17.txt | 117 | ||||
| -rwxr-xr-x | tests/test17 | bin | 168888 -> 221720 bytes | |||
| -rw-r--r-- | tests/test17.c | 135 |
3 files changed, 245 insertions, 7 deletions
diff --git a/tests/ref17.txt b/tests/ref17.txt index 3d0e67a2..eb67ae5e 100644 --- a/tests/ref17.txt +++ b/tests/ref17.txt @@ -357,3 +357,120 @@ psubq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xfffff paddb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x84 0x15 0x21 0x82 0x2 0x81 0x6 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 paddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0xffff 0x7ffe 0xffff 0x51 0x9002 0x1 0x2 paddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x80000004 0xfffffffe +pmovhlps(1 2 3 -4 , 0 -2 -10 0.5 ) = -10 0.5 3 -4 +unpcklps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 0 2 -2 +unpckhps(1 2 3 -4 , 0 -2 -10 0.5 ) = 3 -10 -4 0.5 +pmovhps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 2 0 -2 +psqrtps(1 2 3 -4 ) = 1 1.41421 1.73205 nan +psqrtps(0 -2 -10 0.5 ) = 0 nan nan 0.707107 +psqrtps(inf -inf -inf 1 ) = inf nan nan 1 +psqrtps(nan -0 nan inf ) = nan -0 nan inf +prcpps(nan -0 nan inf ) = nan -inf nan 0 +andps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 2 2 0 +andps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 -2 -8 0.5 +andps(1 2 3 -4 , nan -0 nan inf ) = 1 0 3 4 +andps(0 -2 -10 0.5 , nan -0 nan inf ) = 0 -0 -8 0.5 +andps(inf -inf -inf 1 , nan -0 nan inf ) = inf -0 -inf 1 +andps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +andnps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -0 -2.93874e-38 0.5 +andnps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf 1 0.25 1.17549e-38 +andnps(1 2 3 -4 , nan -0 nan inf ) = 3 -0 -1 0.5 +andnps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 0 0.375 4 +andnps(inf -inf -inf 1 , nan -0 nan inf ) = 5.87747e-39 0 5.87747e-39 2 +andnps(nan -0 nan inf , nan -0 nan inf ) = 0 0 0 0 +orps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 -2 -14 -inf +orps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -inf nan 1 +orps(1 2 3 -4 , nan -0 nan inf ) = nan -2 nan -inf +orps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan inf +orps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan inf +orps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +xorps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 -0 -4.11423e-38 -inf +xorps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf 1 0.3125 1.17549e-38 +xorps(1 2 3 -4 , nan -0 nan inf ) = 3 -2 -1 -0.5 +xorps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 2 0.4375 4 +xorps(inf -inf -inf 1 , nan -0 nan inf ) = 5.87747e-39 inf 5.87747e-39 2 +xorps(nan -0 nan inf , nan -0 nan inf ) = 0 0 0 0 +addps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 0 -7 -3.5 +addps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -inf -inf 1.5 +addps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan inf +addps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan inf +addps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan inf +addps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +mulps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -4 -30 -2 +mulps(0 -2 -10 0.5 , inf -inf -inf 1 ) = nan inf inf 0.5 +mulps(1 2 3 -4 , nan -0 nan inf ) = nan -0 nan -inf +mulps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 0 nan inf +mulps(inf -inf -inf 1 , nan -0 nan inf ) = nan nan nan inf +mulps(nan -0 nan inf , nan -0 nan inf ) = nan 0 nan inf +subps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 4 13 -4.5 +subps(0 -2 -10 0.5 , inf -inf -inf 1 ) = -inf inf inf -0.5 +subps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan -inf +subps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan -inf +subps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan -inf +subps(nan -0 nan inf , nan -0 nan inf ) = nan 0 nan nan +minps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -2 -10 -4 +minps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 -inf -inf 0.5 +minps(1 2 3 -4 , nan -0 nan inf ) = nan -0 nan -4 +minps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan 0.5 +minps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan 1 +minps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +divps(1 2 3 -4 , 0 -2 -10 0.5 ) = inf -1 -0.3 -8 +divps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 0 0 0.5 +divps(1 2 3 -4 , nan -0 nan inf ) = nan -inf nan -0 +divps(0 -2 -10 0.5 , nan -0 nan inf ) = nan inf nan 0 +divps(inf -inf -inf 1 , nan -0 nan inf ) = nan inf nan 0 +divps(nan -0 nan inf , nan -0 nan inf ) = nan nan nan nan +maxps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 2 3 0.5 +maxps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -2 -10 1 +maxps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan inf +maxps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -0 nan inf +maxps(inf -inf -inf 1 , nan -0 nan inf ) = nan -0 nan inf +maxps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +shufps(1 2 3 -4 , 0 -2 -10 0.5 , 0) = 1 1 0 0 +shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 0) = 0 0 inf inf +shufps(1 2 3 -4 , nan -0 nan inf , 0) = 1 1 nan nan +shufps(0 -2 -10 0.5 , nan -0 nan inf , 0) = 0 0 nan nan +shufps(inf -inf -inf 1 , nan -0 nan inf , 0) = inf inf nan nan +shufps(nan -0 nan inf , nan -0 nan inf , 0) = nan nan nan nan +shufps(1 2 3 -4 , 0 -2 -10 0.5 , 21) = 2 2 -2 0 +shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 21) = -2 -2 -inf inf +shufps(1 2 3 -4 , nan -0 nan inf , 21) = 2 2 -0 nan +shufps(0 -2 -10 0.5 , nan -0 nan inf , 21) = -2 -2 -0 nan +shufps(inf -inf -inf 1 , nan -0 nan inf , 21) = -inf -inf -0 nan +shufps(nan -0 nan inf , nan -0 nan inf , 21) = -0 -0 -0 nan +shufps(1 2 3 -4 , 0 -2 -10 0.5 , 255) = -4 -4 0.5 0.5 +shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 255) = 0.5 0.5 1 1 +shufps(1 2 3 -4 , nan -0 nan inf , 255) = -4 -4 inf inf +shufps(0 -2 -10 0.5 , nan -0 nan inf , 255) = 0.5 0.5 inf inf +shufps(inf -inf -inf 1 , nan -0 nan inf , 255) = 1 1 inf inf +shufps(nan -0 nan inf , nan -0 nan inf , 255) = inf inf inf inf +shufps(1 2 3 -4 , 0 -2 -10 0.5 , 2) = 3 1 0 0 +shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 2) = -10 0 inf inf +shufps(1 2 3 -4 , nan -0 nan inf , 2) = 3 1 nan nan +shufps(0 -2 -10 0.5 , nan -0 nan inf , 2) = -10 0 nan nan +shufps(inf -inf -inf 1 , nan -0 nan inf , 2) = -inf inf nan nan +shufps(nan -0 nan inf , nan -0 nan inf , 2) = nan nan nan nan +sqrtsd(1 2 , 1 2 ) = 1 2 +sqrtsd(1 2 , 0 -2 ) = 0 2 +sqrtsd(1 2 , inf -inf ) = inf 2 +sqrtsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +sqrtsd(1 2 , 2 1 ) = 1.41421 2 +sqrtsd(1 2 , -2 0 ) = 0xfff8000000000000 2 +sqrtsd(1 2 , -inf inf ) = 0xfff8000000000000 2 +sqrtsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 +addsd(1 2 , 1 2 ) = 2 2 +addsd(1 2 , 0 -2 ) = 1 2 +addsd(1 2 , inf -inf ) = inf 2 +addsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +addsd(1 2 , 2 1 ) = 3 2 +addsd(1 2 , -2 0 ) = -1 2 +addsd(1 2 , -inf inf ) = -inf 2 +addsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 +mulsd(1 2 , 1 2 ) = 1 2 +mulsd(1 2 , 0 -2 ) = 0 2 +mulsd(1 2 , inf -inf ) = inf 2 +mulsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +mulsd(1 2 , 2 1 ) = 2 2 +mulsd(1 2 , -2 0 ) = -2 2 +mulsd(1 2 , -inf inf ) = -inf 2 +mulsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 diff --git a/tests/test17 b/tests/test17 index f5cffaeb..f2632c6b 100755 --- a/tests/test17 +++ b/tests/test17 Binary files differdiff --git a/tests/test17.c b/tests/test17.c index 454d5543..0627d8c5 100644 --- a/tests/test17.c +++ b/tests/test17.c @@ -116,6 +116,16 @@ const v128 a128_pd = {.d64 = { 1.0, 2.0}}; const v128 b128_pd = {.d64 = { 0.0, -2.0}}; const v128 c128_pd = {.d64 = { INFINITY, -INFINITY}}; const v128 d128_pd = {.d64 = { NAN, -0.0}}; +const v128 a128_ps = {.f32 = { 1.0, 2.0, 3.0, -4.0}}; +const v128 b128_ps = {.f32 = { 0.0, -2.0, -10.0, 0.5}}; +const v128 c128_ps = {.f32 = { INFINITY, -INFINITY, -INFINITY, 1.0}}; +const v128 d128_ps = {.f32 = { NAN, -0.0, -NAN, INFINITY}}; + +v128 reverse_pd(v128 a) { + v128 ret; + ret.md = _mm_shuffle_pd(a.md, a.md, 1); + return ret; +} void print_8(v128 v) { for(int i=0; i<16; ++i) @@ -148,6 +158,7 @@ void print_pd(v128 v) { else printf("%g ", v.d64[i]); } +#define print_sd print_pd int main(int argc, const char** argv) { @@ -301,7 +312,90 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); printf("%s(", #C); print_pd(A1); \ printf(", "); print_pd(A2); \ printf(") = "); print_pd(a128); printf("\n"); + #define GO2Cpd(A, C, A1, A2, I) \ + a128.md = _mm_##A##_pd(A1.md, A2.md, I); \ + printf("%s(", #C); print_pd(A1); \ + printf(", "); print_pd(A2); \ + printf(", %d) = ", I); print_pd(a128); printf("\n"); + #define GO1isd(A, C, A1) \ + i = _mm_##A##_sd(A1.md); \ + printf("%s(", #C); print_64(A1); \ + printf(") = 0x%x\n", i); + #define GO1sd(A, C, A1) \ + a128.md = _mm_##A##_sd(A1.md); \ + printf("%s(", #C); print_sd(A1); \ + printf(") = "); print_sd(a128); printf("\n"); + #define GO2sd(A, C, A1, A2) \ + a128.md = _mm_##A##_sd(A1.md, A2.md); \ + printf("%s(", #C); print_sd(A1); \ + printf(", "); print_sd(A2); \ + printf(") = "); print_sd(a128); printf("\n"); + #define GO2Csd(A, C, A1, A2, I) \ + a128.md = _mm_##A##_sd(A1.md, A2.md, I); \ + printf("%s(", #C); print_sd(A1); \ + printf(", "); print_sd(A2); \ + printf(", %d) = ", I); print_sd(a128); printf("\n"); + #define GO1ips(A, C, A1) \ + i = _mm_##A##_ps(A1.mf); \ + printf("%s(", #C); print_32(A1); \ + printf(") = 0x%x\n", i); + #define GO1ps(A, C, A1) \ + a128.mf = _mm_##A##_ps(A1.mf); \ + printf("%s(", #C); print_ps(A1); \ + printf(") = "); print_ps(a128); printf("\n"); + #define GO2ps(A, C, A1, A2) \ + a128.mf = _mm_##A##_ps(A1.mf, A2.mf); \ + printf("%s(", #C); print_ps(A1); \ + printf(", "); print_ps(A2); \ + printf(") = "); print_ps(a128); printf("\n"); + #define GO2Cps(A, C, A1, A2, I) \ + a128.mf = _mm_##A##_ps(A1.mf, A2.mf, I); \ + printf("%s(", #C); print_ps(A1); \ + printf(", "); print_ps(A2); \ + printf(", %d) = ", I); print_ps(a128); printf("\n"); + #define MULITGO2pd(A, B) \ + GO2pd(A, B, a128_pd, b128_pd) \ + GO2pd(A, B, b128_pd, c128_pd) \ + GO2pd(A, B, a128_pd, d128_pd) \ + GO2pd(A, B, b128_pd, d128_pd) \ + GO2pd(A, B, c128_pd, d128_pd) \ + GO2pd(A, B, d128_pd, d128_pd) + + #define MULITGO2Cpd(A, B, I) \ + GO2Cpd(A, B, a128_pd, b128_pd, I) \ + GO2Cpd(A, B, b128_pd, c128_pd, I) \ + GO2Cpd(A, B, a128_pd, d128_pd, I) \ + GO2Cpd(A, B, b128_pd, d128_pd, I) \ + GO2Cpd(A, B, c128_pd, d128_pd, I) \ + GO2Cpd(A, B, d128_pd, d128_pd, I) + + #define MULITGO2ps(A, B) \ + GO2ps(A, B, a128_ps, b128_ps) \ + GO2ps(A, B, b128_ps, c128_ps) \ + GO2ps(A, B, a128_ps, d128_ps) \ + GO2ps(A, B, b128_ps, d128_ps) \ + GO2ps(A, B, c128_ps, d128_ps) \ + GO2ps(A, B, d128_ps, d128_ps) + + #define MULITGO2Cps(A, B, I) \ + GO2Cps(A, B, a128_ps, b128_ps, I) \ + GO2Cps(A, B, b128_ps, c128_ps, I) \ + GO2Cps(A, B, a128_ps, d128_ps, I) \ + GO2Cps(A, B, b128_ps, d128_ps, I) \ + GO2Cps(A, B, c128_ps, d128_ps, I) \ + GO2Cps(A, B, d128_ps, d128_ps, I) + + #define MULTIGO2sd(A, B) \ + GO2sd(A, B, a128_pd, a128_pd) \ + GO2sd(A, B, a128_pd, b128_pd) \ + GO2sd(A, B, a128_pd, c128_pd) \ + GO2sd(A, B, a128_pd, d128_pd) \ + GO2sd(A, B, a128_pd, reverse_pd(a128_pd)) \ + GO2sd(A, B, a128_pd, reverse_pd(b128_pd)) \ + GO2sd(A, B, a128_pd, reverse_pd(c128_pd)) \ + GO2sd(A, B, a128_pd, reverse_pd(d128_pd)) + GO2(shuffle, 8, pshufb, a128_8, b128_8) GO2(hadd, 16, phaddw, a128_16, b128_16) @@ -349,13 +443,6 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); GO1pd(sqrt, psqrtpd, b128_pd) GO1pd(sqrt, psqrtpd, c128_pd) GO1pd(sqrt, psqrtpd, d128_pd) - #define MULITGO2pd(A, B) \ - GO2pd(A, B, a128_pd, b128_pd) \ - GO2pd(A, B, b128_pd, c128_pd) \ - GO2pd(A, B, a128_pd, d128_pd) \ - GO2pd(A, B, b128_pd, d128_pd) \ - GO2pd(A, B, c128_pd, d128_pd) \ - GO2pd(A, B, d128_pd, d128_pd) MULITGO2pd(and, andpd) MULITGO2pd(andnot, andnpd) MULITGO2pd(or, orpd) @@ -460,6 +547,40 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); GO2(add, 8, paddb, a128_8, b128_8) GO2(add, 16, paddw, a128_16, b128_16) GO2(add, 32, paddd, a128_32, b128_32) + GO2ps(movehl, pmovhlps, a128_ps, b128_ps) + GO2ps(unpacklo, unpcklps, a128_ps, b128_ps) + GO2ps(unpackhi, unpckhps, a128_ps, b128_ps) + GO2ps(movelh, pmovhps, a128_ps, b128_ps) + GO1ps(sqrt, psqrtps, a128_ps) + GO1ps(sqrt, psqrtps, b128_ps) + GO1ps(sqrt, psqrtps, c128_ps) + GO1ps(sqrt, psqrtps, d128_ps) + //GO1ps(rsqrt, prsqrtps, a128_ps) // difference in precision + //GO1ps(rsqrt, prsqrtps, b128_ps) // same + //GO1ps(rsqrt, prsqrtps, c128_ps) // same + //GO1ps(rsqrt, prsqrtps, d128_ps) // difference in the handling of NAN, (-)0, and INF in Dynarec + //GO1ps(rcp, prcpps, a128_ps) // deference in precision + //GO1ps(rcp, prcpps, b128_ps) // deference in precision + //GO1ps(rcp, prcpps, c128_ps) // deference in precision + GO1ps(rcp, prcpps, d128_ps) + MULITGO2ps(and, andps) + MULITGO2ps(andnot, andnps) + MULITGO2ps(or, orps) + MULITGO2ps(xor, xorps) + MULITGO2ps(add, addps) + MULITGO2ps(mul, mulps) + MULITGO2ps(sub, subps) + MULITGO2ps(min, minps) + MULITGO2ps(div, divps) + MULITGO2ps(max, maxps) +// MULITGO2Cps(cmp, cmpps, 0) // use avx for some reason + MULITGO2Cps(shuffle, shufps, 0) + MULITGO2Cps(shuffle, shufps, 0x15) + MULITGO2Cps(shuffle, shufps, 0xff) + MULITGO2Cps(shuffle, shufps, 0x02) + MULTIGO2sd(sqrt, sqrtsd) + MULTIGO2sd(add, addsd) + MULTIGO2sd(mul, mulsd) return 0; } |