diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-05 19:44:25 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-05 19:44:25 +0200 |
| commit | c0ebe095213b5048b54ff41d0d5550750af2cbdb (patch) | |
| tree | 716d642f786937f7e1bf0dad525e477b6b996abc /tests | |
| parent | b568cc529e9b10b6b42b2139351b4b3cb0858a28 (diff) | |
| download | box64-c0ebe095213b5048b54ff41d0d5550750af2cbdb.tar.gz box64-c0ebe095213b5048b54ff41d0d5550750af2cbdb.zip | |
Added an AVX test (not on Android, needs to be build there)
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/ref30.txt | 756 | ||||
| -rwxr-xr-x | tests/test30 | bin | 0 -> 192688 bytes | |||
| -rw-r--r-- | tests/test30.c | 641 |
3 files changed, 1397 insertions, 0 deletions
diff --git a/tests/ref30.txt b/tests/ref30.txt new file mode 100644 index 00000000..f2689e47 --- /dev/null +++ b/tests/ref30.txt @@ -0,0 +1,756 @@ +ucomiss 1.000000, 2.000000 => 0x203 +ucomiss 2.000000, 1.000000 => 0x202 +ucomiss 1.000000, inf => 0x203 +ucomiss inf, 1.000000 => 0x202 +ucomiss 1.000000, -inf => 0x202 +ucomiss -inf, 1.000000 => 0x203 +ucomiss 1.000000, nan => 0x247 +ucomiss nan, 1.000000 => 0x247 +ucomiss 1.000000, 1.000000 => 0x242 +ucomiss 1.000000, 1.000000 => 0x242 +ucomiss inf, inf => 0x242 +ucomiss -inf, inf => 0x203 +ucomiss inf, -inf => 0x202 +ucomiss nan, nan => 0x247 +minss 1, 2 => 1 +minss 2, 1 => 1 +minss -inf, 2 => -inf +minss 2, -inf => -inf +minss inf, 2 => 2 +minss 2, inf => 2 +minss nan, 2 => 2 +minss 2, nan => nan +minss nan, 3.40282e+38 => 3.40282e+38 +minss 3.40282e+38, nan => nan +minss -inf, 3.40282e+38 => -inf +minss 3.40282e+38, -inf => -inf +minss inf, 3.40282e+38 => 3.40282e+38 +minss 3.40282e+38, inf => 3.40282e+38 +maxss 1, 2 => 2 +maxss 2, 1 => 2 +maxss -inf, 2 => 2 +maxss 2, -inf => 2 +maxss inf, 2 => inf +maxss 2, inf => inf +maxss nan, 2 => 2 +maxss 2, nan => nan +maxss nan, 3.40282e+38 => 3.40282e+38 +maxss 3.40282e+38, nan => nan +maxss -inf, 3.40282e+38 => 3.40282e+38 +maxss 3.40282e+38, -inf => 3.40282e+38 +maxss inf, 3.40282e+38 => inf +maxss 3.40282e+38, inf => inf +cmpss 0 1.000000, 2.000000 => 0x0 +cmpss 0 2.000000, 1.000000 => 0x0 +cmpss 0 1.000000, inf => 0x0 +cmpss 0 inf, 1.000000 => 0x0 +cmpss 0 1.000000, -inf => 0x0 +cmpss 0 -inf, 1.000000 => 0x0 +cmpss 0 1.000000, nan => 0x0 +cmpss 0 nan, 1.000000 => 0x0 +cmpss 0 1.000000, 1.000000 => 0xffffffff +cmpss 0 1.000000, 1.000000 => 0xffffffff +cmpss 0 inf, inf => 0xffffffff +cmpss 0 -inf, inf => 0x0 +cmpss 0 inf, -inf => 0x0 +cmpss 0 nan, nan => 0x0 +cmpss 1 1.000000, 2.000000 => 0xffffffff +cmpss 1 2.000000, 1.000000 => 0x0 +cmpss 1 1.000000, inf => 0xffffffff +cmpss 1 inf, 1.000000 => 0x0 +cmpss 1 1.000000, -inf => 0x0 +cmpss 1 -inf, 1.000000 => 0xffffffff +cmpss 1 1.000000, nan => 0x0 +cmpss 1 nan, 1.000000 => 0x0 +cmpss 1 1.000000, 1.000000 => 0x0 +cmpss 1 1.000000, 1.000000 => 0x0 +cmpss 1 inf, inf => 0x0 +cmpss 1 -inf, inf => 0xffffffff +cmpss 1 inf, -inf => 0x0 +cmpss 1 nan, nan => 0x0 +cmpss 2 1.000000, 2.000000 => 0xffffffff +cmpss 2 2.000000, 1.000000 => 0x0 +cmpss 2 1.000000, inf => 0xffffffff +cmpss 2 inf, 1.000000 => 0x0 +cmpss 2 1.000000, -inf => 0x0 +cmpss 2 -inf, 1.000000 => 0xffffffff +cmpss 2 1.000000, nan => 0x0 +cmpss 2 nan, 1.000000 => 0x0 +cmpss 2 1.000000, 1.000000 => 0xffffffff +cmpss 2 1.000000, 1.000000 => 0xffffffff +cmpss 2 inf, inf => 0xffffffff +cmpss 2 -inf, inf => 0xffffffff +cmpss 2 inf, -inf => 0x0 +cmpss 2 nan, nan => 0x0 +cmpss 3 1.000000, 2.000000 => 0x0 +cmpss 3 2.000000, 1.000000 => 0x0 +cmpss 3 1.000000, inf => 0x0 +cmpss 3 inf, 1.000000 => 0x0 +cmpss 3 1.000000, -inf => 0x0 +cmpss 3 -inf, 1.000000 => 0x0 +cmpss 3 1.000000, nan => 0xffffffff +cmpss 3 nan, 1.000000 => 0xffffffff +cmpss 3 1.000000, 1.000000 => 0x0 +cmpss 3 1.000000, 1.000000 => 0x0 +cmpss 3 inf, inf => 0x0 +cmpss 3 -inf, inf => 0x0 +cmpss 3 inf, -inf => 0x0 +cmpss 3 nan, nan => 0xffffffff +cmpss 4 1.000000, 2.000000 => 0xffffffff +cmpss 4 2.000000, 1.000000 => 0xffffffff +cmpss 4 1.000000, inf => 0xffffffff +cmpss 4 inf, 1.000000 => 0xffffffff +cmpss 4 1.000000, -inf => 0xffffffff +cmpss 4 -inf, 1.000000 => 0xffffffff +cmpss 4 1.000000, nan => 0xffffffff +cmpss 4 nan, 1.000000 => 0xffffffff +cmpss 4 1.000000, 1.000000 => 0x0 +cmpss 4 1.000000, 1.000000 => 0x0 +cmpss 4 inf, inf => 0x0 +cmpss 4 -inf, inf => 0xffffffff +cmpss 4 inf, -inf => 0xffffffff +cmpss 4 nan, nan => 0xffffffff +cmpss 5 1.000000, 2.000000 => 0x0 +cmpss 5 2.000000, 1.000000 => 0xffffffff +cmpss 5 1.000000, inf => 0x0 +cmpss 5 inf, 1.000000 => 0xffffffff +cmpss 5 1.000000, -inf => 0xffffffff +cmpss 5 -inf, 1.000000 => 0x0 +cmpss 5 1.000000, nan => 0xffffffff +cmpss 5 nan, 1.000000 => 0xffffffff +cmpss 5 1.000000, 1.000000 => 0xffffffff +cmpss 5 1.000000, 1.000000 => 0xffffffff +cmpss 5 inf, inf => 0xffffffff +cmpss 5 -inf, inf => 0x0 +cmpss 5 inf, -inf => 0xffffffff +cmpss 5 nan, nan => 0xffffffff +cmpss 6 1.000000, 2.000000 => 0x0 +cmpss 6 2.000000, 1.000000 => 0xffffffff +cmpss 6 1.000000, inf => 0x0 +cmpss 6 inf, 1.000000 => 0xffffffff +cmpss 6 1.000000, -inf => 0xffffffff +cmpss 6 -inf, 1.000000 => 0x0 +cmpss 6 1.000000, nan => 0xffffffff +cmpss 6 nan, 1.000000 => 0xffffffff +cmpss 6 1.000000, 1.000000 => 0x0 +cmpss 6 1.000000, 1.000000 => 0x0 +cmpss 6 inf, inf => 0x0 +cmpss 6 -inf, inf => 0x0 +cmpss 6 inf, -inf => 0xffffffff +cmpss 6 nan, nan => 0xffffffff +cmpss 7 1.000000, 2.000000 => 0xffffffff +cmpss 7 2.000000, 1.000000 => 0xffffffff +cmpss 7 1.000000, inf => 0xffffffff +cmpss 7 inf, 1.000000 => 0xffffffff +cmpss 7 1.000000, -inf => 0xffffffff +cmpss 7 -inf, 1.000000 => 0xffffffff +cmpss 7 1.000000, nan => 0x0 +cmpss 7 nan, 1.000000 => 0x0 +cmpss 7 1.000000, 1.000000 => 0xffffffff +cmpss 7 1.000000, 1.000000 => 0xffffffff +cmpss 7 inf, inf => 0xffffffff +cmpss 7 -inf, inf => 0xffffffff +cmpss 7 inf, -inf => 0xffffffff +cmpss 7 nan, nan => 0x0 +pshufb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x2 0x2 0xff 0x0 0x0 0xff 0xfe 0x81 0x0 0x3 0x72 0x32 0xff 0x80 +phaddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x7fff 0x3 0x8004 0xffff 0xfffe 0x9050 0x7fff +phaddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x7fffffff 0x7fffffff 0x80000001 0x3 +phaddsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x7fff 0x3 0x8004 0xffff 0xfffe 0x9050 0x8000 +pmaddubsw(0x80ff 0x7f 0x201 0x8103 0x84fe 0x5272 0xa5 0x32c0 , 0x100 0x1505 0x8020 0xff 0x708 0x681 0xf0a 0x110 ) = 0x80 0x27b 0xff20 0xfffd 0xb8c 0xc95e 0x672 0xc32 +phsubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x7fff 0xffff 0x8002 0x1 0x0 0x7050 0x7ffd +psignb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x80 0x7f 0x0 0x1 0xfe 0xfd 0x0 0xfe 0x84 0x8e 0x52 0xa5 0x0 0xc0 0x32 +psignw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8000 0x8001 0x0 0x1 0xfffe 0xfffd 0x7fff +psignd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x7fffffff 0x0 +pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8001 0xffff 0x0 0x0 0xfffe 0x0 0x7ffe +pblendvps(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe , 0x1 0x80000000 0x80000005 0xfffe ) = 0xffffffff 0x80000000 0x5 0x0 +ptestz(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0 +ptestc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0 +ptestnzc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 1 +pabsb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 ) = 0x1 0x80 0x7f 0x0 0x1 0x2 0x3 0x7f 0x2 0x7c 0x72 0x52 0x5b 0x0 0x40 0x32 +pabsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x1 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x7fff +pabsd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0x1 0x80000000 0x7fffffff 0x0 +pmovsxbw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0xffff 0xffff 0x0 0xff80 0xffff 0x7f 0x0 0x0 +pmovsxbd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffffffff 0xffffffff 0xffffffff 0xffffffff +pmovsxbq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffffffffffff 0xffffffffffffffff +pmovsxwd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffffffff 0xffffffff 0x0 0xffff8000 +pmovsxwq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffffffffffff 0xffffffffffffffff +pmovsxdq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffffffffffff 0xffffffffffffffff +pmovzxbw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0xff 0xff 0x0 0x80 0xff 0x7f 0x0 0x0 +pmovzxbd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xff 0xff 0xff 0xff +pmovzxbq(0xffffffffffffffff 0x8000000000000000 ) = 0xff 0xff +pmovzxwd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffff 0xffff 0x0 0x8000 +pmovzxwq(0xffffffffffffffff 0x8000000000000000 ) = 0xffff 0xffff +pmovzxdq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffff 0xffffffff +pminsd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x5 0xfffffffe +pmaxsd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x1 0x80000000 0x7fffffff 0x0 +pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 +pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 255) = 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 +pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 170) = 0xffff 0x7fff 0x7fff 0xffff 0x1 0x9000 0x3 0x8001 +pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 2) = 0xffff 0x7fff 0x7fff 0x0 0x1 0x2 0x3 0x8001 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0) = 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 2) = 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0xff 0x80 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 7) = 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 15) = 0x1 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 16) = 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 +palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +movmskpd(0xffffffffffffffff 0x8000000000000000 ) = 0x3 +psqrtpd(1 2 ) = 1 1.41421 +psqrtpd(0 -2 ) = 0 0xfff8000000000000 +psqrtpd(inf -inf ) = inf 0xfff8000000000000 +psqrtpd(0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +andpd(1 2 , 0 -2 ) = 0 2 +andpd(0 -2 , inf -inf ) = 0 -2 +andpd(1 2 , 0x7ff8000000000000 -0 ) = 1 0 +andpd(0 -2 , 0x7ff8000000000000 -0 ) = 0 -0 +andpd(inf -inf , 0x7ff8000000000000 -0 ) = inf -0 +andpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +andnpd(1 2 , 0 -2 ) = 0 -0 +andnpd(0 -2 , inf -inf ) = inf 1 +andnpd(1 2 , 0x7ff8000000000000 -0 ) = 3 -0 +andnpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 +andnpd(inf -inf , 0x7ff8000000000000 -0 ) = 1.11254e-308 0 +andnpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0 0 +orpd(1 2 , 0 -2 ) = 1 -2 +orpd(0 -2 , inf -inf ) = inf -inf +orpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +orpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +orpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +orpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +xorpd(1 2 , 0 -2 ) = 1 -0 +xorpd(0 -2 , inf -inf ) = inf 1 +xorpd(1 2 , 0x7ff8000000000000 -0 ) = 3 -2 +xorpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +xorpd(inf -inf , 0x7ff8000000000000 -0 ) = 1.11254e-308 inf +xorpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0 0 +addpd(1 2 , 0 -2 ) = 1 0 +addpd(0 -2 , inf -inf ) = inf -inf +addpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +addpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +addpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +addpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +mulpd(1 2 , 0 -2 ) = 0 -4 +mulpd(0 -2 , inf -inf ) = 0xfff8000000000000 inf +mulpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +mulpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 +mulpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0xfff8000000000000 +mulpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 +subpd(1 2 , 0 -2 ) = 1 4 +subpd(0 -2 , inf -inf ) = -inf inf +subpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +subpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +subpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +subpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 +minpd(1 2 , 0 -2 ) = 0 -2 +minpd(0 -2 , inf -inf ) = 0 -inf +minpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +minpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +minpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +minpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +divpd(1 2 , 0 -2 ) = inf -1 +divpd(0 -2 , inf -inf ) = 0 0 +divpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +divpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 inf +divpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 inf +divpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0xfff8000000000000 +maxpd(1 2 , 0 -2 ) = 1 2 +maxpd(0 -2 , inf -inf ) = inf -2 +maxpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +maxpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +maxpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +maxpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 +punpcklbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x0 0x80 0x1 0x7f 0x5 0x0 0x15 0x1 0x20 0x2 0x80 0x3 0xff 0x81 0x0 +punpcklwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x8000 0x8000 0x7fff 0x7fff 0xffff 0x0 0xffff +punpckldq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x1 0x80000000 0x80000000 +ppacksswb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x80ff 0x7f 0x201 0x8003 0x7f80 0xffff 0x8050 0x80fe +pcmpgtb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x0 0xff 0x0 0x0 0xff 0xff 0x0 0x0 0x0 0xff 0xff 0x0 0x0 0x0 0xff +pcmpgtw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x0 0xffff 0xffff 0x0 0xffff 0xffff 0x0 +pcmpgtd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0xffffffff 0xffffffff +packuswb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xff 0x201 0x3 0xff00 0x0 0x50 0x0 +punpckhbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xfe 0x8 0x84 0x7 0x72 0x81 0x52 0x6 0xa5 0xa 0x0 0xf 0xc0 0x10 0x32 0x1 +punpckhwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x50 0x2 0x9000 0x3 0xfffe 0x8001 0x8001 +punpckhdq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x7fffffff 0x5 0x0 0xfffffffe +ppackssdw(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x8000ffff 0x7fff 0x80000001 0xfffe0005 +punpcklqdq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xffffffffffffffff 0x1 +punpckhqdq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x8000000000000000 0x8000000000000000 +pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0xffffffff 0xffffffff 0xffffffff +pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0x0 0x0 0x0 0x0 +pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0x7fffffff 0x7fffffff 0x7fffffff 0x7fffffff +pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0x7fffffff 0xffffffff 0xffffffff 0xffffffff +psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 +psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 170) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 2) = 0x3fff 0x2000 0x1fff 0x0 0x0 0x0 0x0 0x2000 +psrld(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0x80000000 0x7fffffff 0x0 +psrld(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0x0 0x0 0x0 0x0 +psrld(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0x0 0x0 0x0 0x0 +psrld(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0x3fffffff 0x20000000 0x1fffffff 0x0 +psrlq(0xffffffffffffffff 0x8000000000000000 0) = 0xffffffffffffffff 0x8000000000000000 +psrlq(0xffffffffffffffff 0x8000000000000000 255) = 0x0 0x0 +psrlq(0xffffffffffffffff 0x8000000000000000 170) = 0x0 0x0 +psrlq(0xffffffffffffffff 0x8000000000000000 2) = 0x3fffffffffffffff 0x2000000000000000 +psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 +psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 255) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff +psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 170) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff +psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 2) = 0xffff 0xe000 0x1fff 0x0 0x0 0x0 0x0 0xe000 +psrad(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0x80000000 0x7fffffff 0x0 +psrad(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0xffffffff 0xffffffff 0x0 0x0 +psrad(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0xffffffff 0xffffffff 0x0 0x0 +psrad(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0xffffffff 0xe0000000 0x1fffffff 0x0 +psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 +psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 170) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 2) = 0xfffc 0x0 0xfffc 0x0 0x4 0x8 0xc 0x4 +pslld(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0x80000000 0x7fffffff 0x0 +pslld(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0x0 0x0 0x0 0x0 +pslld(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0x0 0x0 0x0 0x0 +pslld(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0xfffffffc 0x0 0xfffffffc 0x0 +psllq(0xffffffffffffffff 0x8000000000000000 0) = 0xffffffffffffffff 0x8000000000000000 +psllq(0xffffffffffffffff 0x8000000000000000 255) = 0x0 0x0 +psllq(0xffffffffffffffff 0x8000000000000000 170) = 0x0 0x0 +psllq(0xffffffffffffffff 0x8000000000000000 2) = 0xfffffffffffffffc 0x0 +pcmpeqb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +pcmpeqw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0xffff +pcmpeqd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0xffffffff 0x0 0x0 +haddpd(1 2 , 0 -2 ) = 3 -2 +haddpd(0 -2 , inf -inf ) = -2 0xfff8000000000000 +haddpd(1 2 , 0x7ff8000000000000 -0 ) = 3 0x7ff8000000000000 +haddpd(0 -2 , 0x7ff8000000000000 -0 ) = -2 0x7ff8000000000000 +haddpd(inf -inf , 0x7ff8000000000000 -0 ) = 0xfff8000000000000 0x7ff8000000000000 +haddpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0x7ff8000000000000 +psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +psrld(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x0 0x0 +psrlq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x7fffffffffffffff 0x4000000000000000 +paddq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x0 0x0 +pmullw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x8000 0x8001 0x0 0x50 0x2000 0xfffa 0x1 +psubusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x7f 0x7a 0x0 0x0 0x0 0x0 0x81 0xf6 0x7d 0x0 0x4c 0x9b 0x0 0xb0 0x31 +psubusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x1 0x0 0x0 0x0 0x0 0x0 0x0 +pminub(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x1 0x5 0x0 0x1 0x2 0x3 0x0 0x8 0x7 0x72 0x6 0xa 0x0 0x10 0x1 +pand(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x3000000050000 0x2000408 +paddusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x84 0x15 0x21 0x82 0xff 0x81 0xff 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 +paddusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0xffff 0xffff 0xffff 0x51 0x9002 0xffff 0xffff +pmaxub(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x7f 0x15 0x20 0x80 0xff 0x81 0xfe 0x84 0x81 0x52 0xa5 0xf 0xc0 0x32 +pandn(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0xfc802015000100 0x1100f0a04810300 +pavgb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x80 0x41 0x42 0xb 0x11 0x41 0x81 0x41 0x83 0x46 0x7a 0x2c 0x58 0x8 0x68 0x1a +psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff +psrad(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0xffffffff 0x0 0x0 +pavgb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xc000 0x8000 0xbfff 0x8000 0x29 0x4801 0x8001 0x8001 +pmulhuw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x3fff 0x7ffe 0x0 0x0 0x1 0x2 0x4001 +pmulhw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xc000 0xffff 0x0 0x0 0xffff 0xffff 0x3fff +psubsb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x7a 0xeb 0xe1 0x7f 0x4 0x81 0xf6 0x80 0x7f 0x4c 0x9b 0xf1 0xb0 0x31 +psubsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x8000 0x7fff 0x1 0xffb1 0x7002 0x5 0x0 +pminsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x8000 0xffff 0xffff 0x1 0x9000 0xfffe 0x8001 +por(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x81ff8221157f81ff 0x33d00faf56f387fe +paddusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x7f 0x15 0x21 0x82 0x2 0x81 0x6 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 +paddusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0xffff 0x7ffe 0xffff 0x51 0x9002 0x1 0x8000 +pmaxsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x7fff 0x7fff 0x0 0x50 0x2 0x3 0x8001 +pxor(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x81fc8221157a81ff 0x33d00faf54f383f6 +psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 +pslld(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x0 0x0 +psllq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xfffffffffffffffe 0x0 +pmuludq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x0 0x7ffffffb 0x2 +pmaddwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xc001 0x8001 0xffff 0x2050 0xffff 0xfffb 0x3ffe +psadbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x27 0x4 0x0 0x0 0x0 0x0 0x0 0x0 0x59 0x3 0x0 0x0 0x0 0x0 0x0 0x0 +psubb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x7f 0x7a 0xeb 0xe1 0x82 0x4 0x81 0xf6 0x7d 0xf1 0x4c 0x9b 0xf1 0xb0 0x31 +psubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x1 0x8000 0x1 0xffb1 0x7002 0x5 0x0 +psubd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xfffffffe 0x0 0x7ffffffa 0x2 +psubq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xfffffffffffffffe 0x0 +paddb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x84 0x15 0x21 0x82 0x2 0x81 0x6 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 +paddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0xffff 0x7ffe 0xffff 0x51 0x9002 0x1 0x2 +paddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x80000004 0xfffffffe +pmovhlps(1 2 3 -4 , 0 -2 -10 0.5 ) = -10 0.5 3 -4 +unpcklps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 0 2 -2 +unpckhps(1 2 3 -4 , 0 -2 -10 0.5 ) = 3 -10 -4 0.5 +pmovhps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 2 0 -2 +psqrtps(1 2 3 -4 ) = 1 1.41421 1.73205 nan +psqrtps(0 -2 -10 0.5 ) = 0 nan nan 0.707107 +psqrtps(inf -inf -inf 1 ) = inf nan nan 1 +psqrtps(nan -0 nan inf ) = nan -0 nan inf +prcpps(nan -0 nan inf ) = nan -inf nan 0 +andps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 2 2 0 +andps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 -2 -8 0.5 +andps(1 2 3 -4 , nan -0 nan inf ) = 1 0 3 4 +andps(0 -2 -10 0.5 , nan -0 nan inf ) = 0 -0 -8 0.5 +andps(inf -inf -inf 1 , nan -0 nan inf ) = inf -0 -inf 1 +andps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +andnps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -0 -2.93874e-38 0.5 +andnps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf 1 0.25 1.17549e-38 +andnps(1 2 3 -4 , nan -0 nan inf ) = 3 -0 -1 0.5 +andnps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 0 0.375 4 +andnps(inf -inf -inf 1 , nan -0 nan inf ) = 5.87747e-39 0 5.87747e-39 2 +andnps(nan -0 nan inf , nan -0 nan inf ) = 0 0 0 0 +orps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 -2 -14 -inf +orps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -inf nan 1 +orps(1 2 3 -4 , nan -0 nan inf ) = nan -2 nan -inf +orps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan inf +orps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan inf +orps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +xorps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 -0 -4.11423e-38 -inf +xorps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf 1 0.3125 1.17549e-38 +xorps(1 2 3 -4 , nan -0 nan inf ) = 3 -2 -1 -0.5 +xorps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 2 0.4375 4 +xorps(inf -inf -inf 1 , nan -0 nan inf ) = 5.87747e-39 inf 5.87747e-39 2 +xorps(nan -0 nan inf , nan -0 nan inf ) = 0 0 0 0 +addps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 0 -7 -3.5 +addps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -inf -inf 1.5 +addps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan inf +addps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan inf +addps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan inf +addps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +mulps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -4 -30 -2 +mulps(0 -2 -10 0.5 , inf -inf -inf 1 ) = nan inf inf 0.5 +mulps(1 2 3 -4 , nan -0 nan inf ) = nan -0 nan -inf +mulps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 0 nan inf +mulps(inf -inf -inf 1 , nan -0 nan inf ) = nan nan nan inf +mulps(nan -0 nan inf , nan -0 nan inf ) = nan 0 nan inf +subps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 4 13 -4.5 +subps(0 -2 -10 0.5 , inf -inf -inf 1 ) = -inf inf inf -0.5 +subps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan -inf +subps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan -inf +subps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan -inf +subps(nan -0 nan inf , nan -0 nan inf ) = nan 0 nan nan +minps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -2 -10 -4 +minps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 -inf -inf 0.5 +minps(1 2 3 -4 , nan -0 nan inf ) = nan -0 nan -4 +minps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan 0.5 +minps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan 1 +minps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +divps(1 2 3 -4 , 0 -2 -10 0.5 ) = inf -1 -0.3 -8 +divps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 0 0 0.5 +divps(1 2 3 -4 , nan -0 nan inf ) = nan -inf nan -0 +divps(0 -2 -10 0.5 , nan -0 nan inf ) = nan inf nan 0 +divps(inf -inf -inf 1 , nan -0 nan inf ) = nan inf nan 0 +divps(nan -0 nan inf , nan -0 nan inf ) = nan nan nan nan +maxps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 2 3 0.5 +maxps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -2 -10 1 +maxps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan inf +maxps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -0 nan inf +maxps(inf -inf -inf 1 , nan -0 nan inf ) = nan -0 nan inf +maxps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 0) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 0) = 0 0 0 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 0) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 0) = 0 0 0 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 0) = 0 0 0 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 0) = 0 nan 0 nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 1) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 1) = nan 0 0 nan +cmpps(1 2 3 -4 , nan -0 nan inf , 1) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 1) = 0 nan 0 nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 1) = 0 nan 0 nan +cmpps(nan -0 nan inf , nan -0 nan inf , 1) = 0 0 0 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 2) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 2) = nan 0 0 nan +cmpps(1 2 3 -4 , nan -0 nan inf , 2) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 2) = 0 nan 0 nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 2) = 0 nan 0 nan +cmpps(nan -0 nan inf , nan -0 nan inf , 2) = 0 nan 0 nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 3) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 3) = 0 0 0 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 3) = nan 0 nan 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 3) = nan 0 nan 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 3) = nan 0 nan 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 3) = nan 0 nan 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 4) = nan nan nan nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 4) = nan nan nan nan +cmpps(1 2 3 -4 , nan -0 nan inf , 4) = nan nan nan nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 4) = nan nan nan nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 4) = nan nan nan nan +cmpps(nan -0 nan inf , nan -0 nan inf , 4) = nan 0 nan 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 5) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 5) = 0 nan nan 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 5) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 5) = nan 0 nan 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 5) = nan 0 nan 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 5) = nan nan nan nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 6) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 6) = 0 nan nan 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 6) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 6) = nan 0 nan 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 6) = nan 0 nan 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 6) = nan 0 nan 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 7) = nan nan nan nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 7) = nan nan nan nan +cmpps(1 2 3 -4 , nan -0 nan inf , 7) = 0 nan 0 nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 7) = 0 nan 0 nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 7) = 0 nan 0 nan +cmpps(nan -0 nan inf , nan -0 nan inf , 7) = 0 nan 0 nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 8) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 8) = 0 0 0 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 8) = nan 0 nan 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 8) = nan 0 nan 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 8) = nan 0 nan 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 8) = nan nan nan nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 9) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 9) = nan 0 0 nan +cmpps(1 2 3 -4 , nan -0 nan inf , 9) = nan 0 nan nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 9) = nan nan nan nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 9) = nan nan nan nan +cmpps(nan -0 nan inf , nan -0 nan inf , 9) = nan 0 nan 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 10) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 10) = nan 0 0 nan +cmpps(1 2 3 -4 , nan -0 nan inf , 10) = nan 0 nan nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 10) = nan nan nan nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 10) = nan nan nan nan +cmpps(nan -0 nan inf , nan -0 nan inf , 10) = nan nan nan nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 11) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 11) = 0 0 0 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 11) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 11) = 0 0 0 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 11) = 0 0 0 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 11) = 0 0 0 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 12) = nan nan nan nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 12) = nan nan nan nan +cmpps(1 2 3 -4 , nan -0 nan inf , 12) = 0 nan 0 nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 12) = 0 nan 0 nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 12) = 0 nan 0 nan +cmpps(nan -0 nan inf , nan -0 nan inf , 12) = 0 0 0 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 13) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 13) = 0 nan nan 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 13) = 0 nan 0 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 13) = 0 0 0 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 13) = 0 0 0 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 13) = 0 nan 0 nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 14) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 14) = 0 nan nan 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 14) = 0 nan 0 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 14) = 0 0 0 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 14) = 0 0 0 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 14) = 0 0 0 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 15) = nan nan nan nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 15) = nan nan nan nan +cmpps(1 2 3 -4 , nan -0 nan inf , 15) = nan nan nan nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 15) = nan nan nan nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 15) = nan nan nan nan +cmpps(nan -0 nan inf , nan -0 nan inf , 15) = nan nan nan nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 16) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 16) = 0 0 0 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 16) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 16) = 0 0 0 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 16) = 0 0 0 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 16) = 0 nan 0 nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 17) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 17) = nan 0 0 nan +cmpps(1 2 3 -4 , nan -0 nan inf , 17) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 17) = 0 nan 0 nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 17) = 0 nan 0 nan +cmpps(nan -0 nan inf , nan -0 nan inf , 17) = 0 0 0 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 18) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 18) = nan 0 0 nan +cmpps(1 2 3 -4 , nan -0 nan inf , 18) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 18) = 0 nan 0 nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 18) = 0 nan 0 nan +cmpps(nan -0 nan inf , nan -0 nan inf , 18) = 0 nan 0 nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 19) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 19) = 0 0 0 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 19) = nan 0 nan 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 19) = nan 0 nan 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 19) = nan 0 nan 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 19) = nan 0 nan 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 20) = nan nan nan nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 20) = nan nan nan nan +cmpps(1 2 3 -4 , nan -0 nan inf , 20) = nan nan nan nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 20) = nan nan nan nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 20) = nan nan nan nan +cmpps(nan -0 nan inf , nan -0 nan inf , 20) = nan 0 nan 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 21) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 21) = 0 nan nan 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 21) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 21) = nan 0 nan 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 21) = nan 0 nan 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 21) = nan nan nan nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 22) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 22) = 0 nan nan 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 22) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 22) = nan 0 nan 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 22) = nan 0 nan 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 22) = nan 0 nan 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 23) = nan nan nan nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 23) = nan nan nan nan +cmpps(1 2 3 -4 , nan -0 nan inf , 23) = 0 nan 0 nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 23) = 0 nan 0 nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 23) = 0 nan 0 nan +cmpps(nan -0 nan inf , nan -0 nan inf , 23) = 0 nan 0 nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 24) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 24) = 0 0 0 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 24) = nan 0 nan 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 24) = nan 0 nan 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 24) = nan 0 nan 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 24) = nan nan nan nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 25) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 25) = nan 0 0 nan +cmpps(1 2 3 -4 , nan -0 nan inf , 25) = nan 0 nan nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 25) = nan nan nan nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 25) = nan nan nan nan +cmpps(nan -0 nan inf , nan -0 nan inf , 25) = nan 0 nan 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 26) = 0 0 0 nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 26) = nan 0 0 nan +cmpps(1 2 3 -4 , nan -0 nan inf , 26) = nan 0 nan nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 26) = nan nan nan nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 26) = nan nan nan nan +cmpps(nan -0 nan inf , nan -0 nan inf , 26) = nan nan nan nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 27) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 27) = 0 0 0 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 27) = 0 0 0 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 27) = 0 0 0 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 27) = 0 0 0 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 27) = 0 0 0 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 28) = nan nan nan nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 28) = nan nan nan nan +cmpps(1 2 3 -4 , nan -0 nan inf , 28) = 0 nan 0 nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 28) = 0 nan 0 nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 28) = 0 nan 0 nan +cmpps(nan -0 nan inf , nan -0 nan inf , 28) = 0 0 0 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 29) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 29) = 0 nan nan 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 29) = 0 nan 0 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 29) = 0 0 0 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 29) = 0 0 0 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 29) = 0 nan 0 nan +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 30) = nan nan nan 0 +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 30) = 0 nan nan 0 +cmpps(1 2 3 -4 , nan -0 nan inf , 30) = 0 nan 0 0 +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 30) = 0 0 0 0 +cmpps(inf -inf -inf 1 , nan -0 nan inf , 30) = 0 0 0 0 +cmpps(nan -0 nan inf , nan -0 nan inf , 30) = 0 0 0 0 +cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 31) = nan nan nan nan +cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 31) = nan nan nan nan +cmpps(1 2 3 -4 , nan -0 nan inf , 31) = nan nan nan nan +cmpps(0 -2 -10 0.5 , nan -0 nan inf , 31) = nan nan nan nan +cmpps(inf -inf -inf 1 , nan -0 nan inf , 31) = nan nan nan nan +cmpps(nan -0 nan inf , nan -0 nan inf , 31) = nan nan nan nan +shufps(1 2 3 -4 , 0 -2 -10 0.5 , 0) = 1 1 0 0 +shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 0) = 0 0 inf inf +shufps(1 2 3 -4 , nan -0 nan inf , 0) = 1 1 nan nan +shufps(0 -2 -10 0.5 , nan -0 nan inf , 0) = 0 0 nan nan +shufps(inf -inf -inf 1 , nan -0 nan inf , 0) = inf inf nan nan +shufps(nan -0 nan inf , nan -0 nan inf , 0) = nan nan nan nan +shufps(1 2 3 -4 , 0 -2 -10 0.5 , 21) = 2 2 -2 0 +shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 21) = -2 -2 -inf inf +shufps(1 2 3 -4 , nan -0 nan inf , 21) = 2 2 -0 nan +shufps(0 -2 -10 0.5 , nan -0 nan inf , 21) = -2 -2 -0 nan +shufps(inf -inf -inf 1 , nan -0 nan inf , 21) = -inf -inf -0 nan +shufps(nan -0 nan inf , nan -0 nan inf , 21) = -0 -0 -0 nan +shufps(1 2 3 -4 , 0 -2 -10 0.5 , 255) = -4 -4 0.5 0.5 +shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 255) = 0.5 0.5 1 1 +shufps(1 2 3 -4 , nan -0 nan inf , 255) = -4 -4 inf inf +shufps(0 -2 -10 0.5 , nan -0 nan inf , 255) = 0.5 0.5 inf inf +shufps(inf -inf -inf 1 , nan -0 nan inf , 255) = 1 1 inf inf +shufps(nan -0 nan inf , nan -0 nan inf , 255) = inf inf inf inf +shufps(1 2 3 -4 , 0 -2 -10 0.5 , 2) = 3 1 0 0 +shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 2) = -10 0 inf inf +shufps(1 2 3 -4 , nan -0 nan inf , 2) = 3 1 nan nan +shufps(0 -2 -10 0.5 , nan -0 nan inf , 2) = -10 0 nan nan +shufps(inf -inf -inf 1 , nan -0 nan inf , 2) = -inf inf nan nan +shufps(nan -0 nan inf , nan -0 nan inf , 2) = nan nan nan nan +sqrtsd(1 2 , 1 2 ) = 1 2 +sqrtsd(1 2 , 0 -2 ) = 0 2 +sqrtsd(1 2 , inf -inf ) = inf 2 +sqrtsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +sqrtsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +sqrtsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +sqrtsd(1 2 , 2 1 ) = 1.41421 2 +sqrtsd(1 2 , -2 0 ) = 0xfff8000000000000 2 +sqrtsd(1 2 , -inf inf ) = 0xfff8000000000000 2 +sqrtsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 +sqrtsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 +sqrtsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 +addsd(1 2 , 1 2 ) = 2 2 +addsd(1 2 , 0 -2 ) = 1 2 +addsd(1 2 , inf -inf ) = inf 2 +addsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +addsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +addsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +addsd(1 2 , 2 1 ) = 3 2 +addsd(1 2 , -2 0 ) = -1 2 +addsd(1 2 , -inf inf ) = -inf 2 +addsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 +addsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 +addsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 +mulsd(1 2 , 1 2 ) = 1 2 +mulsd(1 2 , 0 -2 ) = 0 2 +mulsd(1 2 , inf -inf ) = inf 2 +mulsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +mulsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +mulsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +mulsd(1 2 , 2 1 ) = 2 2 +mulsd(1 2 , -2 0 ) = -2 2 +mulsd(1 2 , -inf inf ) = -inf 2 +mulsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 +mulsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 +mulsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 +subsd(1 2 , 1 2 ) = 0 2 +subsd(1 2 , 0 -2 ) = 1 2 +subsd(1 2 , inf -inf ) = -inf 2 +subsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +subsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +subsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +subsd(1 2 , 2 1 ) = -1 2 +subsd(1 2 , -2 0 ) = 3 2 +subsd(1 2 , -inf inf ) = inf 2 +subsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 +subsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 +subsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 +minsd(1 2 , 1 2 ) = 1 2 +minsd(1 2 , 0 -2 ) = 0 2 +minsd(1 2 , inf -inf ) = 1 2 +minsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +minsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +minsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +minsd(1 2 , 2 1 ) = 1 2 +minsd(1 2 , -2 0 ) = -2 2 +minsd(1 2 , -inf inf ) = -inf 2 +minsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 +minsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 +minsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 +divsd(1 2 , 1 2 ) = 1 2 +divsd(1 2 , 0 -2 ) = inf 2 +divsd(1 2 , inf -inf ) = 0 2 +divsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +divsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +divsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +divsd(1 2 , 2 1 ) = 0.5 2 +divsd(1 2 , -2 0 ) = -0.5 2 +divsd(1 2 , -inf inf ) = -0 2 +divsd(1 2 , -0 0x7ff8000000000000 ) = -inf 2 +divsd(0 -2 , -0 0x7ff8000000000000 ) = 0xfff8000000000000 -2 +divsd(0 -2 , -0 0x7ff8000000000000 ) = 0xfff8000000000000 -2 +maxsd(1 2 , 1 2 ) = 1 2 +maxsd(1 2 , 0 -2 ) = 1 2 +maxsd(1 2 , inf -inf ) = inf 2 +maxsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 +maxsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 +maxsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf +maxsd(1 2 , 2 1 ) = 2 2 +maxsd(1 2 , -2 0 ) = 1 2 +maxsd(1 2 , -inf inf ) = 1 2 +maxsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 +maxsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 +maxsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 +cvtps2dq(1 2 3 -4 ) = 0x1 0x2 0x3 0xfffffffc +cvtps2dq(0 -2 -10 0.5 ) = 0x0 0xfffffffe 0xfffffff6 0x0 +cvtps2dq(inf -inf -inf 1 ) = 0x80000000 0x80000000 0x80000000 0x1 +cvtps2dq(nan -0 nan inf ) = 0x80000000 0x0 0x80000000 0x80000000 +dpps(1 2 3 -4 , 0 -2 -10 0.5 , 255) = -36 -36 -36 -36 +dpps(0 -2 -10 0.5 , inf -inf -inf 1 , 255) = nan nan nan nan +dpps(1 2 3 -4 , nan -0 nan inf , 255) = nan nan nan nan +dpps(0 -2 -10 0.5 , nan -0 nan inf , 255) = nan nan nan nan +dpps(inf -inf -inf 1 , nan -0 nan inf , 255) = nan nan nan nan +dpps(nan -0 nan inf , nan -0 nan inf , 255) = nan nan nan nan +dpps(1 2 3 -4 , 0 -2 -10 0.5 , 63) = -4 -4 -4 -4 +dpps(0 -2 -10 0.5 , inf -inf -inf 1 , 63) = nan nan nan nan +dpps(1 2 3 -4 , nan -0 nan inf , 63) = nan nan nan nan +dpps(0 -2 -10 0.5 , nan -0 nan inf , 63) = nan nan nan nan +dpps(inf -inf -inf 1 , nan -0 nan inf , 63) = nan nan nan nan +dpps(nan -0 nan inf , nan -0 nan inf , 63) = nan nan nan nan +dpps(1 2 3 -4 , 0 -2 -10 0.5 , 243) = -36 -36 0 0 +dpps(0 -2 -10 0.5 , inf -inf -inf 1 , 243) = nan nan 0 0 +dpps(1 2 3 -4 , nan -0 nan inf , 243) = nan nan 0 0 +dpps(0 -2 -10 0.5 , nan -0 nan inf , 243) = nan nan 0 0 +dpps(inf -inf -inf 1 , nan -0 nan inf , 243) = nan nan 0 0 +dpps(nan -0 nan inf , nan -0 nan inf , 243) = nan nan 0 0 +dpps(1 2 3 -4 , 0 -2 -10 0.5 , 83) = -30 -30 0 0 +dpps(0 -2 -10 0.5 , inf -inf -inf 1 , 83) = nan nan 0 0 +dpps(1 2 3 -4 , nan -0 nan inf , 83) = nan nan 0 0 +dpps(0 -2 -10 0.5 , nan -0 nan inf , 83) = nan nan 0 0 +dpps(inf -inf -inf 1 , nan -0 nan inf , 83) = nan nan 0 0 +dpps(nan -0 nan inf , nan -0 nan inf , 83) = nan nan 0 0 diff --git a/tests/test30 b/tests/test30 new file mode 100755 index 00000000..ac95ac6f --- /dev/null +++ b/tests/test30 Binary files differdiff --git a/tests/test30.c b/tests/test30.c new file mode 100644 index 00000000..26288423 --- /dev/null +++ b/tests/test30.c @@ -0,0 +1,641 @@ +// build with gcc -O0 -g -msse -msse2 -mssse3 -msse4.1 -mavx test30.c -o test30 +#include <inttypes.h> +#include <string.h> +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdint.h> +#include <math.h> +#include <pmmintrin.h> +#include <immintrin.h> + +typedef unsigned char u8x16 __attribute__ ((vector_size (16))); +typedef unsigned short u16x8 __attribute__ ((vector_size (16))); +typedef unsigned int u32x4 __attribute__ ((vector_size (16))); +typedef unsigned long int u64x2 __attribute__ ((vector_size (16))); +typedef float f32x4 __attribute__ ((vector_size (16))); +typedef double d64x2 __attribute__ ((vector_size (16))); + +typedef union { + __m128i mm; + __m128 mf; + __m128d md; + u8x16 u8; + u16x8 u16; + u32x4 u32; + u64x2 u64; + f32x4 f32; + d64x2 d64; +} v128; + +uint64_t _ucomiss_(float a, float b) +{ + uint64_t ret = 0x202; + v128 va, vb; + va.f32[0] = a; + vb.f32[0] = b; + if(_mm_ucomigt_ss(va.mf, vb.mf)) + ret |= 0x000; + else if(_mm_ucomilt_ss(va.mf, vb.mf)) + ret |= 0x001; + else if(_mm_ucomieq_ss(va.mf, vb.mf)) + ret |= 0x040; + else + ret |= 0x045; + return ret; +} + +uint64_t _minss_(float a, float b) +{ + v128 va, vb, ret; + va.f32[0] = a; + vb.f32[0] = b; + ret.mf = _mm_min_ss(va.mf, vb.mf); + return ret.u64[0]; +} +uint64_t _maxss_(float a, float b) +{ + v128 va, vb, ret; + va.f32[0] = a; + vb.f32[0] = b; + ret.mf = _mm_max_ss(va.mf, vb.mf); + return ret.u64[0]; +} + +#define CMPSS(A, B) \ +uint64_t _cmpss_##A(float a, float b) \ +{ \ + v128 va, vb, ret; \ + va.f32[0] = a; \ + vb.f32[0] = b; \ + ret.mf = _mm_cmp##B##_ss(va.mf, vb.mf); \ + return ret.u64[0]; \ +} +CMPSS(0, eq) +CMPSS(1, lt) +CMPSS(2, le) +CMPSS(3, unord) +CMPSS(4, neq) +CMPSS(5, nlt) +CMPSS(6, nle) +CMPSS(7, ord) +#undef CMPSS + +const v128 a128_8 = {.u8 = { + 0xff, 0x80, 0x7f, 0x00, 0x01, 0x02, 0x03, 0x81, + 0xfe, 0x84, 0x72, 0x52, 0xa5, 0x00, 0xc0, 0x32 +}}; +const v128 a128_16 = {.u16 = { + 0xffff, 0x8000, 0x7fff, 0x0000, 0x0001, 0x0002, 0x0003, 0x8001 +}}; +const v128 a128_32 = {.u32 = { + 0xffffffff, 0x80000000, 0x7fffffff, 0x00000000 +}}; +const v128 a128_64 = {.u64 = { + 0xffffffffffffffffLL, 0x8000000000000000LL +}}; + +const v128 b128_8 = {.u8 = { + 0x00, 0x01, 0x05, 0x15, 0x20, 0x80, 0xff, 0x00, + 0x08, 0x07, 0x81, 0x06, 0x0a, 0x0f, 0x10, 0x01 +}}; +const v128 b128_16 = {.u16 = { + 0x8000, 0x7fff, 0xffff, 0xffff, 0x0050, 0x9000, 0xfffe, 0x8001 +}}; +const v128 b128_32 = {.u32 = { + 0x00000001, 0x80000000, 0x00000005, 0xfffffffe +}}; +const v128 b128_64 = {.u64 = { + 0x0000000000000001LL, 0x8000000000000000LL +}}; +const v128 c128_32 = {.u32 = { + 0x00000001, 0x80000000, 0x80000005, 0x0000fffe +}}; + +const v128 a128_pd = {.d64 = { 1.0, 2.0}}; +const v128 b128_pd = {.d64 = { 0.0, -2.0}}; +const v128 c128_pd = {.d64 = { INFINITY, -INFINITY}}; +const v128 d128_pd = {.d64 = { NAN, -0.0}}; +const v128 a128_ps = {.f32 = { 1.0, 2.0, 3.0, -4.0}}; +const v128 b128_ps = {.f32 = { 0.0, -2.0, -10.0, 0.5}}; +const v128 c128_ps = {.f32 = { INFINITY, -INFINITY, -INFINITY, 1.0}}; +const v128 d128_ps = {.f32 = { NAN, -0.0, -NAN, INFINITY}}; + +v128 reverse_pd(v128 a) { + v128 ret; + ret.md = _mm_shuffle_pd(a.md, a.md, 1); + return ret; +} + +void print_8(v128 v) { + for(int i=0; i<16; ++i) + printf("0x%x ", v.u8[i]); +} +void print_16(v128 v) { + for(int i=0; i<8; ++i) + printf("0x%x ", v.u16[i]); +} +void print_32(v128 v) { + for(int i=0; i<4; ++i) + printf("0x%x ", v.u32[i]); +} +void print_64(v128 v) { + for(int i=0; i<2; ++i) + printf("0x%"PRIx64" ", v.u64[i]); +} +#define print_128 print_64 +void print_ps(v128 v) { + for(int i=0; i<4; ++i) + if(isnanf(v.f32[i])) + printf("nan "); + else + printf("%g ", v.f32[i]); +} +void print_pd(v128 v) { + for(int i=0; i<2; ++i) + if(isnan(v.d64[i])) + printf("0x%"PRIx64" ", v.u64[i]); + else + printf("%g ", v.d64[i]); +} +#define print_sd print_pd + +int main(int argc, const char** argv) +{ + float a, b; + uint32_t flags; + uint32_t maxf = 0x7f7fffff; + uint32_t minf = 0xff7fffff; + uint32_t r; + +#define GO1(A, N) \ +a = 1.0f; b = 2.0f; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags); \ +b = INFINITY; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags); \ +b = -INFINITY; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags); \ +b = NAN; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags); \ +b = a; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags); \ +a = b = INFINITY; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags); \ +a = -INFINITY; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags); \ +a = b = NAN; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags); + +#define GO2(A, N) \ +a = 1.0f; b = 2.0f; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = -INFINITY; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = +INFINITY; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = NAN; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +b = *(float*)&maxf; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = -INFINITY; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = +INFINITY; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); + + GO1(_ucomiss_, "ucomiss") + GO2(_minss_, "minss") + GO2(_maxss_, "maxss") + GO1(_cmpss_0, "cmpss 0") + GO1(_cmpss_1, "cmpss 1") + GO1(_cmpss_2, "cmpss 2") + GO1(_cmpss_3, "cmpss 3") + GO1(_cmpss_4, "cmpss 4") + GO1(_cmpss_5, "cmpss 5") + GO1(_cmpss_6, "cmpss 6") + GO1(_cmpss_7, "cmpss 7") + + #undef GO1 + #undef GO2 + v128 a128; + int i; + + #define GO1(A, N, C) \ + a128.mm = _mm_##A##_epi##N(a128_##N.mm); \ + printf("%s(", #C); print_##N(a128_##N); \ + printf(") = "); print_##N(a128); printf("\n"); + #define GO1C(A, N, C, A1, I) \ + a128.mm = _mm_##A##_epi##N(A1.mm, I); \ + printf("%s(", #C); print_##N(A1); \ + printf("%d) = ", I); print_##N(a128); printf("\n"); + #define GO2(A, N, C, A1, A2) \ + a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm); \ + printf("%s(", #C); print_##N(A1); \ + printf(", "); print_##N(A2); \ + printf(") = "); print_##N(a128); printf("\n"); + #define GO2u(A, N, C, A1, A2) \ + a128.mm = _mm_##A##_epu##N(A1.mm, A2.mm); \ + printf("%s(", #C); print_##N(A1); \ + printf(", "); print_##N(A2); \ + printf(") = "); print_##N(a128); printf("\n"); + #define GO2f(A, C, A1, A2) \ + a128.mm = _mm_##A##_si128(A1.mm, A2.mm); \ + printf("%s(", #C); print_128(A1); \ + printf(", "); print_128(A2); \ + printf(") = "); print_128(a128); printf("\n"); + #define GO2C(A, N, C, A1, A2, I) \ + a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm, I); \ + printf("%s(", #C); print_##N(A1); \ + printf(", "); print_##N(A2); \ + printf("%d) = ", I); print_##N(a128); printf("\n"); + #define GO2i(A, A1, A2) \ + i = _mm_##A##_si128(A1.mm, A2.mm); \ + printf("p%s(", #A); print_64(A1); \ + printf(", "); print_64(A2); \ + printf(") = %d\n", i); + #define GO3PS(A, N, A1, A2, A3) \ + a128.mf = _mm_##A##_ps(A1.mf, A2.mf, A3.mf); \ + printf("p%s%s(", #A, "ps"); print_##N(A1); \ + printf(", "); print_##N(A2); \ + printf(", "); print_##N(A3); \ + printf(") = "); print_##N(a128); printf("\n"); + #define GO1ipd(A, C, A1) \ + i = _mm_##A##_pd(A1.md); \ + printf("%s(", #C); print_64(A1); \ + printf(") = 0x%x\n", i); + #define GO1pd(A, C, A1) \ + a128.md = _mm_##A##_pd(A1.md); \ + printf("%s(", #C); print_pd(A1); \ + printf(") = "); print_pd(a128); printf("\n"); + #define GO2pd(A, C, A1, A2) \ + a128.md = _mm_##A##_pd(A1.md, A2.md); \ + printf("%s(", #C); print_pd(A1); \ + printf(", "); print_pd(A2); \ + printf(") = "); print_pd(a128); printf("\n"); + #define GO2Cpd(A, C, A1, A2, I) \ + a128.md = _mm_##A##_pd(A1.md, A2.md, I); \ + printf("%s(", #C); print_pd(A1); \ + printf(", "); print_pd(A2); \ + printf(", %d) = ", I); print_pd(a128); printf("\n"); + #define GO1isd(A, C, A1) \ + i = _mm_##A##_sd(A1.md); \ + printf("%s(", #C); print_64(A1); \ + printf(") = 0x%x\n", i); + #define GO1sd(A, C, A1) \ + a128.md = _mm_##A##_sd(A1.md); \ + printf("%s(", #C); print_sd(A1); \ + printf(") = "); print_sd(a128); printf("\n"); + #define GO2sd(A, C, A1, A2) \ + a128.md = _mm_##A##_sd(A1.md, A2.md); \ + printf("%s(", #C); print_sd(A1); \ + printf(", "); print_sd(A2); \ + printf(") = "); print_sd(a128); printf("\n"); + #define GO2Csd(A, C, A1, A2, I) \ + a128.md = _mm_##A##_sd(A1.md, A2.md, I); \ + printf("%s(", #C); print_sd(A1); \ + printf(", "); print_sd(A2); \ + printf(", %d) = ", I); print_sd(a128); printf("\n"); + #define GO1ips(A, C, A1) \ + i = _mm_##A##_ps(A1.mf); \ + printf("%s(", #C); print_32(A1); \ + printf(") = 0x%x\n", i); + #define GO1ps(A, C, A1) \ + a128.mf = _mm_##A##_ps(A1.mf); \ + printf("%s(", #C); print_ps(A1); \ + printf(") = "); print_ps(a128); printf("\n"); + #define GO2ps(A, C, A1, A2) \ + a128.mf = _mm_##A##_ps(A1.mf, A2.mf); \ + printf("%s(", #C); print_ps(A1); \ + printf(", "); print_ps(A2); \ + printf(") = "); print_ps(a128); printf("\n"); + #define GO2Cps(A, C, A1, A2, I) \ + a128.mf = _mm_##A##_ps(A1.mf, A2.mf, I); \ + printf("%s(", #C); print_ps(A1); \ + printf(", "); print_ps(A2); \ + printf(", %d) = ", I); print_ps(a128); printf("\n"); + #define GO1ps2dq(A, C, A1) \ + a128.mm = _mm_##A##_epi32(A1.mf); \ + printf("%s(", #C); print_ps(A1); \ + printf(") = "); print_32(a128); printf("\n"); + + #define MULITGO2pd(A, B) \ + GO2pd(A, B, a128_pd, b128_pd) \ + GO2pd(A, B, b128_pd, c128_pd) \ + GO2pd(A, B, a128_pd, d128_pd) \ + GO2pd(A, B, b128_pd, d128_pd) \ + GO2pd(A, B, c128_pd, d128_pd) \ + GO2pd(A, B, d128_pd, d128_pd) + + #define MULITGO2Cpd(A, B, I) \ + GO2Cpd(A, B, a128_pd, b128_pd, I) \ + GO2Cpd(A, B, b128_pd, c128_pd, I) \ + GO2Cpd(A, B, a128_pd, d128_pd, I) \ + GO2Cpd(A, B, b128_pd, d128_pd, I) \ + GO2Cpd(A, B, c128_pd, d128_pd, I) \ + GO2Cpd(A, B, d128_pd, d128_pd, I) + + #define MULITGO2ps(A, B) \ + GO2ps(A, B, a128_ps, b128_ps) \ + GO2ps(A, B, b128_ps, c128_ps) \ + GO2ps(A, B, a128_ps, d128_ps) \ + GO2ps(A, B, b128_ps, d128_ps) \ + GO2ps(A, B, c128_ps, d128_ps) \ + GO2ps(A, B, d128_ps, d128_ps) + + #define MULTIGO1ps2dq(A, B) \ + GO1ps2dq(A, B, a128_ps) \ + GO1ps2dq(A, B, b128_ps) \ + GO1ps2dq(A, B, c128_ps) \ + GO1ps2dq(A, B, d128_ps) + + #define MULITGO2Cps(A, B, I) \ + GO2Cps(A, B, a128_ps, b128_ps, I) \ + GO2Cps(A, B, b128_ps, c128_ps, I) \ + GO2Cps(A, B, a128_ps, d128_ps, I) \ + GO2Cps(A, B, b128_ps, d128_ps, I) \ + GO2Cps(A, B, c128_ps, d128_ps, I) \ + GO2Cps(A, B, d128_ps, d128_ps, I) + + #define MULTIGO2sd(A, B) \ + GO2sd(A, B, a128_pd, a128_pd) \ + GO2sd(A, B, a128_pd, b128_pd) \ + GO2sd(A, B, a128_pd, c128_pd) \ + GO2sd(A, B, a128_pd, d128_pd) \ + GO2sd(A, B, b128_pd, d128_pd) \ + GO2sd(A, B, c128_pd, d128_pd) \ + GO2sd(A, B, a128_pd, reverse_pd(a128_pd)) \ + GO2sd(A, B, a128_pd, reverse_pd(b128_pd)) \ + GO2sd(A, B, a128_pd, reverse_pd(c128_pd)) \ + GO2sd(A, B, a128_pd, reverse_pd(d128_pd)) \ + GO2sd(A, B, b128_pd, reverse_pd(d128_pd)) \ + GO2sd(A, B, b128_pd, reverse_pd(d128_pd)) + + + GO2(shuffle, 8, pshufb, a128_8, b128_8) + GO2(hadd, 16, phaddw, a128_16, b128_16) + GO2(hadd, 32, phaddd, a128_32, b128_32) + GO2(hadds, 16, phaddsw, a128_16, b128_16) + GO2(maddubs, 16, pmaddubsw, a128_8, b128_8) + GO2(hsub, 16, phsubw, a128_16, b128_16) + GO2(sign, 8, psignb, a128_8, b128_8) + GO2(sign, 16, psignw, a128_16, b128_16) + GO2(sign, 32, psignd, a128_32, b128_32) + GO2(mulhrs, 16, pmulhrsw, a128_16, b128_16) + GO3PS(blendv, 32, a128_32, b128_32, c128_32) + GO2i(testz, a128_32, b128_32) + GO2i(testc, a128_32, b128_32) + GO2i(testnzc, a128_32, b128_32) + GO1(abs, 8, pabsb) + GO1(abs, 16, pabsw) + GO1(abs, 32, pabsd) + GO1(cvtepi8, 16, pmovsxbw); + GO1(cvtepi8, 32, pmovsxbd); + GO1(cvtepi8, 64, pmovsxbq); + GO1(cvtepi16, 32, pmovsxwd); + GO1(cvtepi16, 64, pmovsxwq); + GO1(cvtepi32, 64, pmovsxdq); + GO1(cvtepu8, 16, pmovzxbw); + GO1(cvtepu8, 32, pmovzxbd); + GO1(cvtepu8, 64, pmovzxbq); + GO1(cvtepu16, 32, pmovzxwd); + GO1(cvtepu16, 64, pmovzxwq); + GO1(cvtepu32, 64, pmovzxdq); + GO2(min, 32, pminsd, a128_32, b128_32) + GO2(max, 32, pmaxsd, a128_32, b128_32) + GO2C(blend, 16, pblendw, a128_16, b128_16, 0) + GO2C(blend, 16, pblendw, a128_16, b128_16, 0xff) + GO2C(blend, 16, pblendw, a128_16, b128_16, 0xaa) + GO2C(blend, 16, pblendw, a128_16, b128_16, 2) + GO2C(alignr, 8, palignr, a128_8, b128_8, 0) + GO2C(alignr, 8, palignr, a128_8, b128_8, 2) + GO2C(alignr, 8, palignr, a128_8, b128_8, 7) + GO2C(alignr, 8, palignr, a128_8, b128_8, 15) + GO2C(alignr, 8, palignr, a128_8, b128_8, 16) + GO2C(alignr, 8, palignr, a128_8, b128_8, 0xff) + GO1ipd(movemask, movmskpd, a128_64) + GO1pd(sqrt, psqrtpd, a128_pd) + GO1pd(sqrt, psqrtpd, b128_pd) + GO1pd(sqrt, psqrtpd, c128_pd) + GO1pd(sqrt, psqrtpd, d128_pd) + MULITGO2pd(and, andpd) + MULITGO2pd(andnot, andnpd) + MULITGO2pd(or, orpd) + MULITGO2pd(xor, xorpd) + MULITGO2pd(add, addpd) + MULITGO2pd(mul, mulpd) + MULITGO2pd(sub, subpd) + MULITGO2pd(min, minpd) + MULITGO2pd(div, divpd) + MULITGO2pd(max, maxpd) + GO2(unpacklo, 8, punpcklbw, a128_8, b128_8) + GO2(unpacklo, 16, punpcklwd, a128_16, b128_16) + GO2(unpacklo, 32, punpckldq, a128_32, b128_32) + GO2(packs, 16, ppacksswb, a128_16, b128_16) + GO2(cmpgt, 8, pcmpgtb, a128_8, b128_8) + GO2(cmpgt, 16, pcmpgtw, a128_16, b128_16) + GO2(cmpgt, 32, pcmpgtd, a128_32, b128_32) + GO2(packus, 16, packuswb, a128_16, b128_16) + GO2(unpackhi, 8, punpckhbw, a128_8, b128_8) + GO2(unpackhi, 16, punpckhwd, a128_16, b128_16) + GO2(unpackhi, 32, punpckhdq, a128_32, b128_32) + GO2(packs, 32, ppackssdw, a128_32, b128_32) + GO2(unpacklo, 64, punpcklqdq, a128_64, b128_64) + GO2(unpackhi, 64, punpckhqdq, a128_64, b128_64) + GO1C(shuffle, 32, pshufd, a128_32, 0) + GO1C(shuffle, 32, pshufd, a128_32, 0xff) + GO1C(shuffle, 32, pshufd, a128_32, 0xaa) + GO1C(shuffle, 32, pshufd, a128_32, 2) + GO1C(srli, 16, psrlw, a128_16, 0) + GO1C(srli, 16, psrlw, a128_16, 0xff) + GO1C(srli, 16, psrlw, a128_16, 0xaa) + GO1C(srli, 16, psrlw, a128_16, 2) + GO1C(srli, 32, psrld, a128_32, 0) + GO1C(srli, 32, psrld, a128_32, 0xff) + GO1C(srli, 32, psrld, a128_32, 0xaa) + GO1C(srli, 32, psrld, a128_32, 2) + GO1C(srli, 64, psrlq, a128_64, 0) + GO1C(srli, 64, psrlq, a128_64, 0xff) + GO1C(srli, 64, psrlq, a128_64, 0xaa) + GO1C(srli, 64, psrlq, a128_64, 2) + GO1C(srai, 16, psraw, a128_16, 0) + GO1C(srai, 16, psraw, a128_16, 0xff) + GO1C(srai, 16, psraw, a128_16, 0xaa) + GO1C(srai, 16, psraw, a128_16, 2) + GO1C(srai, 32, psrad, a128_32, 0) + GO1C(srai, 32, psrad, a128_32, 0xff) + GO1C(srai, 32, psrad, a128_32, 0xaa) + GO1C(srai, 32, psrad, a128_32, 2) + GO1C(slli, 16, psllw, a128_16, 0) + GO1C(slli, 16, psllw, a128_16, 0xff) + GO1C(slli, 16, psllw, a128_16, 0xaa) + GO1C(slli, 16, psllw, a128_16, 2) + GO1C(slli, 32, pslld, a128_32, 0) + GO1C(slli, 32, pslld, a128_32, 0xff) + GO1C(slli, 32, pslld, a128_32, 0xaa) + GO1C(slli, 32, pslld, a128_32, 2) + GO1C(slli, 64, psllq, a128_64, 0) + GO1C(slli, 64, psllq, a128_64, 0xff) + GO1C(slli, 64, psllq, a128_64, 0xaa) + GO1C(slli, 64, psllq, a128_64, 2) + GO2(cmpeq, 8, pcmpeqb, a128_8, b128_8) + GO2(cmpeq, 16, pcmpeqw, a128_16, b128_16) + GO2(cmpeq, 32, pcmpeqd, a128_32, b128_32) + MULITGO2pd(hadd, haddpd) + GO2(srl, 16, psrlw, a128_16, b128_16) + GO2(srl, 32, psrld, a128_32, b128_32) + GO2(srl, 64, psrlq, a128_64, b128_64) + GO2(add, 64, paddq, a128_64, b128_64) + GO2(mullo, 16, pmullw, a128_16, b128_16) + GO2u(subs, 8, psubusb, a128_8, b128_8) + GO2u(subs, 16, psubusw, a128_16, b128_16) + GO2u(min, 8, pminub, a128_8, b128_8) + GO2f(and, pand, a128_8, b128_8) + GO2u(adds, 8, paddusb, a128_8, b128_8) + GO2u(adds, 16, paddusw, a128_16, b128_16) + GO2u(max, 8, pmaxub, a128_8, b128_8) + GO2f(andnot, pandn, a128_8, b128_8) + GO2u(avg, 8, pavgb, a128_8, b128_8) + GO2(sra, 16, psraw, a128_16, b128_16) + GO2(sra, 32, psrad, a128_32, b128_32) + GO2u(avg, 16, pavgb, a128_16, b128_16) + GO2u(mulhi, 16, pmulhuw, a128_16, b128_16) + GO2(mulhi, 16, pmulhw, a128_16, b128_16) + GO2(subs, 8, psubsb, a128_8, b128_8) + GO2(subs, 16, psubsw, a128_16, b128_16) + GO2(min, 16, pminsw, a128_16, b128_16) + GO2f(or, por, a128_8, b128_8) + GO2(adds, 8, paddusb, a128_8, b128_8) + GO2(adds, 16, paddusw, a128_16, b128_16) + GO2(max, 16, pmaxsw, a128_16, b128_16) + GO2f(xor, pxor, a128_8, b128_8) + GO2(sll, 16, psllw, a128_16, b128_16) + GO2(sll, 32, pslld, a128_32, b128_32) + GO2(sll, 64, psllq, a128_64, b128_64) + GO2u(mul, 32, pmuludq, a128_32, b128_32) + GO2(madd, 16, pmaddwd, a128_16, b128_16) + GO2u(sad, 8, psadbw, a128_8, b128_8) + GO2(sub, 8, psubb, a128_8, b128_8) + GO2(sub, 16, psubw, a128_16, b128_16) + GO2(sub, 32, psubd, a128_32, b128_32) + GO2(sub, 64, psubq, a128_64, b128_64) + GO2(add, 8, paddb, a128_8, b128_8) + GO2(add, 16, paddw, a128_16, b128_16) + GO2(add, 32, paddd, a128_32, b128_32) + GO2ps(movehl, pmovhlps, a128_ps, b128_ps) + GO2ps(unpacklo, unpcklps, a128_ps, b128_ps) + GO2ps(unpackhi, unpckhps, a128_ps, b128_ps) + GO2ps(movelh, pmovhps, a128_ps, b128_ps) + GO1ps(sqrt, psqrtps, a128_ps) + GO1ps(sqrt, psqrtps, b128_ps) + GO1ps(sqrt, psqrtps, c128_ps) + GO1ps(sqrt, psqrtps, d128_ps) + //GO1ps(rsqrt, prsqrtps, a128_ps) // difference in precision + //GO1ps(rsqrt, prsqrtps, b128_ps) // same + //GO1ps(rsqrt, prsqrtps, c128_ps) // same + //GO1ps(rsqrt, prsqrtps, d128_ps) // difference in the handling of NAN, (-)0, and INF in Dynarec + //GO1ps(rcp, prcpps, a128_ps) // deference in precision + //GO1ps(rcp, prcpps, b128_ps) // deference in precision + //GO1ps(rcp, prcpps, c128_ps) // deference in precision + GO1ps(rcp, prcpps, d128_ps) + MULITGO2ps(and, andps) + MULITGO2ps(andnot, andnps) + MULITGO2ps(or, orps) + MULITGO2ps(xor, xorps) + MULITGO2ps(add, addps) + MULITGO2ps(mul, mulps) + MULITGO2ps(sub, subps) + MULITGO2ps(min, minps) + MULITGO2ps(div, divps) + MULITGO2ps(max, maxps) + MULITGO2Cps(cmp, cmpps, 0) + MULITGO2Cps(cmp, cmpps, 1) + MULITGO2Cps(cmp, cmpps, 2) + MULITGO2Cps(cmp, cmpps, 3) + MULITGO2Cps(cmp, cmpps, 4) + MULITGO2Cps(cmp, cmpps, 5) + MULITGO2Cps(cmp, cmpps, 6) + MULITGO2Cps(cmp, cmpps, 7) + MULITGO2Cps(cmp, cmpps, 8) + MULITGO2Cps(cmp, cmpps, 9) + MULITGO2Cps(cmp, cmpps, 10) + MULITGO2Cps(cmp, cmpps, 11) + MULITGO2Cps(cmp, cmpps, 12) + MULITGO2Cps(cmp, cmpps, 13) + MULITGO2Cps(cmp, cmpps, 14) + MULITGO2Cps(cmp, cmpps, 15) + MULITGO2Cps(cmp, cmpps, 16) + MULITGO2Cps(cmp, cmpps, 17) + MULITGO2Cps(cmp, cmpps, 18) + MULITGO2Cps(cmp, cmpps, 19) + MULITGO2Cps(cmp, cmpps, 20) + MULITGO2Cps(cmp, cmpps, 21) + MULITGO2Cps(cmp, cmpps, 22) + MULITGO2Cps(cmp, cmpps, 23) + MULITGO2Cps(cmp, cmpps, 24) + MULITGO2Cps(cmp, cmpps, 25) + MULITGO2Cps(cmp, cmpps, 26) + MULITGO2Cps(cmp, cmpps, 27) + MULITGO2Cps(cmp, cmpps, 28) + MULITGO2Cps(cmp, cmpps, 29) + MULITGO2Cps(cmp, cmpps, 30) + MULITGO2Cps(cmp, cmpps, 31) + MULITGO2Cps(shuffle, shufps, 0) + MULITGO2Cps(shuffle, shufps, 0x15) + MULITGO2Cps(shuffle, shufps, 0xff) + MULITGO2Cps(shuffle, shufps, 0x02) + MULTIGO2sd(sqrt, sqrtsd) + MULTIGO2sd(add, addsd) + MULTIGO2sd(mul, mulsd) + MULTIGO2sd(sub, subsd) + MULTIGO2sd(min, minsd) + MULTIGO2sd(div, divsd) + MULTIGO2sd(max, maxsd) + MULTIGO1ps2dq(cvtps, cvtps2dq) + MULITGO2Cps(dp, dpps, 0xff) + MULITGO2Cps(dp, dpps, 0x3f) + MULITGO2Cps(dp, dpps, 0xf3) + MULITGO2Cps(dp, dpps, 0x53) + + return 0; +} + |