diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-04-01 11:27:27 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-04-01 11:27:27 +0200 |
| commit | 16f82ba6b3a447fca0d9d1c56098cc1aace10d2c (patch) | |
| tree | 23d251759ec65a10351d7b569a38ca442dde689a | |
| parent | 5cd8176478bfba8b321a25e2ac849af7f2f93c82 (diff) | |
| download | box64-16f82ba6b3a447fca0d9d1c56098cc1aace10d2c.tar.gz box64-16f82ba6b3a447fca0d9d1c56098cc1aace10d2c.zip | |
Even more test17 coverage
| -rw-r--r-- | tests/ref17.txt | 40 | ||||
| -rwxr-xr-x | tests/test17 | bin | 147912 -> 168888 bytes | |||
| -rw-r--r-- | tests/test17.c | 53 |
3 files changed, 91 insertions, 2 deletions
diff --git a/tests/ref17.txt b/tests/ref17.txt index 533dff62..3d0e67a2 100644 --- a/tests/ref17.txt +++ b/tests/ref17.txt @@ -317,3 +317,43 @@ haddpd(1 2 , 0x7ff8000000000000 -0 ) = 3 0x7ff8000000000000 haddpd(0 -2 , 0x7ff8000000000000 -0 ) = -2 0x7ff8000000000000 haddpd(inf -inf , 0x7ff8000000000000 -0 ) = 0xfff8000000000000 0x7ff8000000000000 haddpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0x7ff8000000000000 +psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +psrld(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x0 0x0 +psrlq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x7fffffffffffffff 0x4000000000000000 +paddq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x0 0x0 +pmullw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x8000 0x8001 0x0 0x50 0x2000 0xfffa 0x1 +psubusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x7f 0x7a 0x0 0x0 0x0 0x0 0x81 0xf6 0x7d 0x0 0x4c 0x9b 0x0 0xb0 0x31 +psubusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x1 0x0 0x0 0x0 0x0 0x0 0x0 +pminub(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x1 0x5 0x0 0x1 0x2 0x3 0x0 0x8 0x7 0x72 0x6 0xa 0x0 0x10 0x1 +pand(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x3000000050000 0x2000408 +paddusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x84 0x15 0x21 0x82 0xff 0x81 0xff 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 +paddusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0xffff 0xffff 0xffff 0x51 0x9002 0xffff 0xffff +pmaxub(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x7f 0x15 0x20 0x80 0xff 0x81 0xfe 0x84 0x81 0x52 0xa5 0xf 0xc0 0x32 +pandn(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0xfc802015000100 0x1100f0a04810300 +pavgb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x80 0x41 0x42 0xb 0x11 0x41 0x81 0x41 0x83 0x46 0x7a 0x2c 0x58 0x8 0x68 0x1a +psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff +psrad(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0xffffffff 0x0 0x0 +pavgb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xc000 0x8000 0xbfff 0x8000 0x29 0x4801 0x8001 0x8001 +pmulhuw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x3fff 0x7ffe 0x0 0x0 0x1 0x2 0x4001 +pmulhw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xc000 0xffff 0x0 0x0 0xffff 0xffff 0x3fff +psubsb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x7a 0xeb 0xe1 0x7f 0x4 0x81 0xf6 0x80 0x7f 0x4c 0x9b 0xf1 0xb0 0x31 +psubsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x8000 0x7fff 0x1 0xffb1 0x7002 0x5 0x0 +pminsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x8000 0xffff 0xffff 0x1 0x9000 0xfffe 0x8001 +por(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x81ff8221157f81ff 0x33d00faf56f387fe +paddusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x7f 0x15 0x21 0x82 0x2 0x81 0x6 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 +paddusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0xffff 0x7ffe 0xffff 0x51 0x9002 0x1 0x8000 +pmaxsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x7fff 0x7fff 0x0 0x50 0x2 0x3 0x8001 +pxor(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x81fc8221157a81ff 0x33d00faf54f383f6 +psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +pslld(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x0 0x0 +psllq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xfffffffffffffffe 0x0 +pmuludq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x0 0x7ffffffb 0x2 +pmaddwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xc001 0x8001 0xffff 0x2050 0xffff 0xfffb 0x3ffe +psadbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x27 0x4 0x0 0x0 0x0 0x0 0x0 0x0 0x59 0x3 0x0 0x0 0x0 0x0 0x0 0x0 +psubb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x7f 0x7a 0xeb 0xe1 0x82 0x4 0x81 0xf6 0x7d 0xf1 0x4c 0x9b 0xf1 0xb0 0x31 +psubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x1 0x8000 0x1 0xffb1 0x7002 0x5 0x0 +psubd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xfffffffe 0x0 0x7ffffffa 0x2 +psubq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xfffffffffffffffe 0x0 +paddb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x84 0x15 0x21 0x82 0x2 0x81 0x6 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 +paddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0xffff 0x7ffe 0xffff 0x51 0x9002 0x1 0x2 +paddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x80000004 0xfffffffe diff --git a/tests/test17 b/tests/test17 index aa5cd04f..f5cffaeb 100755 --- a/tests/test17 +++ b/tests/test17 Binary files differdiff --git a/tests/test17.c b/tests/test17.c index aa0d80a5..454d5543 100644 --- a/tests/test17.c +++ b/tests/test17.c @@ -133,6 +133,7 @@ void print_64(v128 v) { for(int i=0; i<2; ++i) printf("0x%llx ", v.u64[i]); } +#define print_128 print_64 void print_ps(v128 v) { for(int i=0; i<4; ++i) if(isnanf(v.f32[i])) @@ -261,6 +262,16 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); printf("%s(", #C); print_##N(A1); \ printf(", "); print_##N(A2); \ printf(") = "); print_##N(a128); printf("\n"); + #define GO2u(A, N, C, A1, A2) \ + a128.mm = _mm_##A##_epu##N(A1.mm, A2.mm); \ + printf("%s(", #C); print_##N(A1); \ + printf(", "); print_##N(A2); \ + printf(") = "); print_##N(a128); printf("\n"); + #define GO2f(A, C, A1, A2) \ + a128.mm = _mm_##A##_si128(A1.mm, A2.mm); \ + printf("%s(", #C); print_128(A1); \ + printf(", "); print_128(A2); \ + printf(") = "); print_128(a128); printf("\n"); #define GO2C(A, N, C, A1, A2, I) \ a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm, I); \ printf("%s(", #C); print_##N(A1); \ @@ -409,8 +420,46 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); GO2(cmpeq, 16, pcmpeqw, a128_16, b128_16) GO2(cmpeq, 32, pcmpeqd, a128_32, b128_32) MULITGO2pd(hadd, haddpd) + GO2(srl, 16, psrlw, a128_16, b128_16) + GO2(srl, 32, psrld, a128_32, b128_32) + GO2(srl, 64, psrlq, a128_64, b128_64) + GO2(add, 64, paddq, a128_64, b128_64) + GO2(mullo, 16, pmullw, a128_16, b128_16) + GO2u(subs, 8, psubusb, a128_8, b128_8) + GO2u(subs, 16, psubusw, a128_16, b128_16) + GO2u(min, 8, pminub, a128_8, b128_8) + GO2f(and, pand, a128_8, b128_8) + GO2u(adds, 8, paddusb, a128_8, b128_8) + GO2u(adds, 16, paddusw, a128_16, b128_16) + GO2u(max, 8, pmaxub, a128_8, b128_8) + GO2f(andnot, pandn, a128_8, b128_8) + GO2u(avg, 8, pavgb, a128_8, b128_8) + GO2(sra, 16, psraw, a128_16, b128_16) + GO2(sra, 32, psrad, a128_32, b128_32) + GO2u(avg, 16, pavgb, a128_16, b128_16) + GO2u(mulhi, 16, pmulhuw, a128_16, b128_16) + GO2(mulhi, 16, pmulhw, a128_16, b128_16) + GO2(subs, 8, psubsb, a128_8, b128_8) + GO2(subs, 16, psubsw, a128_16, b128_16) + GO2(min, 16, pminsw, a128_16, b128_16) + GO2f(or, por, a128_8, b128_8) + GO2(adds, 8, paddusb, a128_8, b128_8) + GO2(adds, 16, paddusw, a128_16, b128_16) + GO2(max, 16, pmaxsw, a128_16, b128_16) + GO2f(xor, pxor, a128_8, b128_8) + GO2(sll, 16, psllw, a128_16, b128_16) + GO2(sll, 32, pslld, a128_32, b128_32) + GO2(sll, 64, psllq, a128_64, b128_64) + GO2u(mul, 32, pmuludq, a128_32, b128_32) + GO2(madd, 16, pmaddwd, a128_16, b128_16) + GO2u(sad, 8, psadbw, a128_8, b128_8) + GO2(sub, 8, psubb, a128_8, b128_8) + GO2(sub, 16, psubw, a128_16, b128_16) + GO2(sub, 32, psubd, a128_32, b128_32) + GO2(sub, 64, psubq, a128_64, b128_64) + GO2(add, 8, paddb, a128_8, b128_8) + GO2(add, 16, paddw, a128_16, b128_16) + GO2(add, 32, paddd, a128_32, b128_32) return 0; } - - |