about summary refs log tree commit diff stats
path: root/tests
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-05 19:44:25 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-05 19:44:25 +0200
commitc0ebe095213b5048b54ff41d0d5550750af2cbdb (patch)
tree716d642f786937f7e1bf0dad525e477b6b996abc /tests
parentb568cc529e9b10b6b42b2139351b4b3cb0858a28 (diff)
downloadbox64-c0ebe095213b5048b54ff41d0d5550750af2cbdb.tar.gz
box64-c0ebe095213b5048b54ff41d0d5550750af2cbdb.zip
Added an AVX test (not on Android, needs to be build there)
Diffstat (limited to 'tests')
-rw-r--r--tests/ref30.txt756
-rwxr-xr-xtests/test30bin0 -> 192688 bytes
-rw-r--r--tests/test30.c641
3 files changed, 1397 insertions, 0 deletions
diff --git a/tests/ref30.txt b/tests/ref30.txt
new file mode 100644
index 00000000..f2689e47
--- /dev/null
+++ b/tests/ref30.txt
@@ -0,0 +1,756 @@
+ucomiss 1.000000, 2.000000 => 0x203
+ucomiss 2.000000, 1.000000 => 0x202
+ucomiss 1.000000, inf => 0x203
+ucomiss inf, 1.000000 => 0x202
+ucomiss 1.000000, -inf => 0x202
+ucomiss -inf, 1.000000 => 0x203
+ucomiss 1.000000, nan => 0x247
+ucomiss nan, 1.000000 => 0x247
+ucomiss 1.000000, 1.000000 => 0x242
+ucomiss 1.000000, 1.000000 => 0x242
+ucomiss inf, inf => 0x242
+ucomiss -inf, inf => 0x203
+ucomiss inf, -inf => 0x202
+ucomiss nan, nan => 0x247
+minss 1, 2 => 1
+minss 2, 1 => 1
+minss -inf, 2 => -inf
+minss 2, -inf => -inf
+minss inf, 2 => 2
+minss 2, inf => 2
+minss nan, 2 => 2
+minss 2, nan => nan
+minss nan, 3.40282e+38 => 3.40282e+38
+minss 3.40282e+38, nan => nan
+minss -inf, 3.40282e+38 => -inf
+minss 3.40282e+38, -inf => -inf
+minss inf, 3.40282e+38 => 3.40282e+38
+minss 3.40282e+38, inf => 3.40282e+38
+maxss 1, 2 => 2
+maxss 2, 1 => 2
+maxss -inf, 2 => 2
+maxss 2, -inf => 2
+maxss inf, 2 => inf
+maxss 2, inf => inf
+maxss nan, 2 => 2
+maxss 2, nan => nan
+maxss nan, 3.40282e+38 => 3.40282e+38
+maxss 3.40282e+38, nan => nan
+maxss -inf, 3.40282e+38 => 3.40282e+38
+maxss 3.40282e+38, -inf => 3.40282e+38
+maxss inf, 3.40282e+38 => inf
+maxss 3.40282e+38, inf => inf
+cmpss 0 1.000000, 2.000000 => 0x0
+cmpss 0 2.000000, 1.000000 => 0x0
+cmpss 0 1.000000, inf => 0x0
+cmpss 0 inf, 1.000000 => 0x0
+cmpss 0 1.000000, -inf => 0x0
+cmpss 0 -inf, 1.000000 => 0x0
+cmpss 0 1.000000, nan => 0x0
+cmpss 0 nan, 1.000000 => 0x0
+cmpss 0 1.000000, 1.000000 => 0xffffffff
+cmpss 0 1.000000, 1.000000 => 0xffffffff
+cmpss 0 inf, inf => 0xffffffff
+cmpss 0 -inf, inf => 0x0
+cmpss 0 inf, -inf => 0x0
+cmpss 0 nan, nan => 0x0
+cmpss 1 1.000000, 2.000000 => 0xffffffff
+cmpss 1 2.000000, 1.000000 => 0x0
+cmpss 1 1.000000, inf => 0xffffffff
+cmpss 1 inf, 1.000000 => 0x0
+cmpss 1 1.000000, -inf => 0x0
+cmpss 1 -inf, 1.000000 => 0xffffffff
+cmpss 1 1.000000, nan => 0x0
+cmpss 1 nan, 1.000000 => 0x0
+cmpss 1 1.000000, 1.000000 => 0x0
+cmpss 1 1.000000, 1.000000 => 0x0
+cmpss 1 inf, inf => 0x0
+cmpss 1 -inf, inf => 0xffffffff
+cmpss 1 inf, -inf => 0x0
+cmpss 1 nan, nan => 0x0
+cmpss 2 1.000000, 2.000000 => 0xffffffff
+cmpss 2 2.000000, 1.000000 => 0x0
+cmpss 2 1.000000, inf => 0xffffffff
+cmpss 2 inf, 1.000000 => 0x0
+cmpss 2 1.000000, -inf => 0x0
+cmpss 2 -inf, 1.000000 => 0xffffffff
+cmpss 2 1.000000, nan => 0x0
+cmpss 2 nan, 1.000000 => 0x0
+cmpss 2 1.000000, 1.000000 => 0xffffffff
+cmpss 2 1.000000, 1.000000 => 0xffffffff
+cmpss 2 inf, inf => 0xffffffff
+cmpss 2 -inf, inf => 0xffffffff
+cmpss 2 inf, -inf => 0x0
+cmpss 2 nan, nan => 0x0
+cmpss 3 1.000000, 2.000000 => 0x0
+cmpss 3 2.000000, 1.000000 => 0x0
+cmpss 3 1.000000, inf => 0x0
+cmpss 3 inf, 1.000000 => 0x0
+cmpss 3 1.000000, -inf => 0x0
+cmpss 3 -inf, 1.000000 => 0x0
+cmpss 3 1.000000, nan => 0xffffffff
+cmpss 3 nan, 1.000000 => 0xffffffff
+cmpss 3 1.000000, 1.000000 => 0x0
+cmpss 3 1.000000, 1.000000 => 0x0
+cmpss 3 inf, inf => 0x0
+cmpss 3 -inf, inf => 0x0
+cmpss 3 inf, -inf => 0x0
+cmpss 3 nan, nan => 0xffffffff
+cmpss 4 1.000000, 2.000000 => 0xffffffff
+cmpss 4 2.000000, 1.000000 => 0xffffffff
+cmpss 4 1.000000, inf => 0xffffffff
+cmpss 4 inf, 1.000000 => 0xffffffff
+cmpss 4 1.000000, -inf => 0xffffffff
+cmpss 4 -inf, 1.000000 => 0xffffffff
+cmpss 4 1.000000, nan => 0xffffffff
+cmpss 4 nan, 1.000000 => 0xffffffff
+cmpss 4 1.000000, 1.000000 => 0x0
+cmpss 4 1.000000, 1.000000 => 0x0
+cmpss 4 inf, inf => 0x0
+cmpss 4 -inf, inf => 0xffffffff
+cmpss 4 inf, -inf => 0xffffffff
+cmpss 4 nan, nan => 0xffffffff
+cmpss 5 1.000000, 2.000000 => 0x0
+cmpss 5 2.000000, 1.000000 => 0xffffffff
+cmpss 5 1.000000, inf => 0x0
+cmpss 5 inf, 1.000000 => 0xffffffff
+cmpss 5 1.000000, -inf => 0xffffffff
+cmpss 5 -inf, 1.000000 => 0x0
+cmpss 5 1.000000, nan => 0xffffffff
+cmpss 5 nan, 1.000000 => 0xffffffff
+cmpss 5 1.000000, 1.000000 => 0xffffffff
+cmpss 5 1.000000, 1.000000 => 0xffffffff
+cmpss 5 inf, inf => 0xffffffff
+cmpss 5 -inf, inf => 0x0
+cmpss 5 inf, -inf => 0xffffffff
+cmpss 5 nan, nan => 0xffffffff
+cmpss 6 1.000000, 2.000000 => 0x0
+cmpss 6 2.000000, 1.000000 => 0xffffffff
+cmpss 6 1.000000, inf => 0x0
+cmpss 6 inf, 1.000000 => 0xffffffff
+cmpss 6 1.000000, -inf => 0xffffffff
+cmpss 6 -inf, 1.000000 => 0x0
+cmpss 6 1.000000, nan => 0xffffffff
+cmpss 6 nan, 1.000000 => 0xffffffff
+cmpss 6 1.000000, 1.000000 => 0x0
+cmpss 6 1.000000, 1.000000 => 0x0
+cmpss 6 inf, inf => 0x0
+cmpss 6 -inf, inf => 0x0
+cmpss 6 inf, -inf => 0xffffffff
+cmpss 6 nan, nan => 0xffffffff
+cmpss 7 1.000000, 2.000000 => 0xffffffff
+cmpss 7 2.000000, 1.000000 => 0xffffffff
+cmpss 7 1.000000, inf => 0xffffffff
+cmpss 7 inf, 1.000000 => 0xffffffff
+cmpss 7 1.000000, -inf => 0xffffffff
+cmpss 7 -inf, 1.000000 => 0xffffffff
+cmpss 7 1.000000, nan => 0x0
+cmpss 7 nan, 1.000000 => 0x0
+cmpss 7 1.000000, 1.000000 => 0xffffffff
+cmpss 7 1.000000, 1.000000 => 0xffffffff
+cmpss 7 inf, inf => 0xffffffff
+cmpss 7 -inf, inf => 0xffffffff
+cmpss 7 inf, -inf => 0xffffffff
+cmpss 7 nan, nan => 0x0
+pshufb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x2 0x2 0xff 0x0 0x0 0xff 0xfe 0x81 0x0 0x3 0x72 0x32 0xff 0x80 
+phaddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x7fff 0x3 0x8004 0xffff 0xfffe 0x9050 0x7fff 
+phaddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x7fffffff 0x7fffffff 0x80000001 0x3 
+phaddsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x7fff 0x3 0x8004 0xffff 0xfffe 0x9050 0x8000 
+pmaddubsw(0x80ff 0x7f 0x201 0x8103 0x84fe 0x5272 0xa5 0x32c0 , 0x100 0x1505 0x8020 0xff 0x708 0x681 0xf0a 0x110 ) = 0x80 0x27b 0xff20 0xfffd 0xb8c 0xc95e 0x672 0xc32 
+phsubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x7fff 0xffff 0x8002 0x1 0x0 0x7050 0x7ffd 
+psignb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x80 0x7f 0x0 0x1 0xfe 0xfd 0x0 0xfe 0x84 0x8e 0x52 0xa5 0x0 0xc0 0x32 
+psignw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8000 0x8001 0x0 0x1 0xfffe 0xfffd 0x7fff 
+psignd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x7fffffff 0x0 
+pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8001 0xffff 0x0 0x0 0xfffe 0x0 0x7ffe 
+pblendvps(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe , 0x1 0x80000000 0x80000005 0xfffe ) = 0xffffffff 0x80000000 0x5 0x0 
+ptestz(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0
+ptestc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0
+ptestnzc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 1
+pabsb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 ) = 0x1 0x80 0x7f 0x0 0x1 0x2 0x3 0x7f 0x2 0x7c 0x72 0x52 0x5b 0x0 0x40 0x32 
+pabsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x1 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x7fff 
+pabsd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0x1 0x80000000 0x7fffffff 0x0 
+pmovsxbw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0xffff 0xffff 0x0 0xff80 0xffff 0x7f 0x0 0x0 
+pmovsxbd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffffffff 0xffffffff 0xffffffff 0xffffffff 
+pmovsxbq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffffffffffff 0xffffffffffffffff 
+pmovsxwd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffffffff 0xffffffff 0x0 0xffff8000 
+pmovsxwq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffffffffffff 0xffffffffffffffff 
+pmovsxdq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffffffffffff 0xffffffffffffffff 
+pmovzxbw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0xff 0xff 0x0 0x80 0xff 0x7f 0x0 0x0 
+pmovzxbd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xff 0xff 0xff 0xff 
+pmovzxbq(0xffffffffffffffff 0x8000000000000000 ) = 0xff 0xff 
+pmovzxwd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffff 0xffff 0x0 0x8000 
+pmovzxwq(0xffffffffffffffff 0x8000000000000000 ) = 0xffff 0xffff 
+pmovzxdq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffff 0xffffffff 
+pminsd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x5 0xfffffffe 
+pmaxsd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x1 0x80000000 0x7fffffff 0x0 
+pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 
+pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 255) = 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 
+pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 170) = 0xffff 0x7fff 0x7fff 0xffff 0x1 0x9000 0x3 0x8001 
+pblendw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 2) = 0xffff 0x7fff 0x7fff 0x0 0x1 0x2 0x3 0x8001 
+palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0) = 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 
+palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 2) = 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0xff 0x80 
+palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 7) = 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 
+palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 15) = 0x1 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 
+palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 16) = 0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 
+palignr(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
+movmskpd(0xffffffffffffffff 0x8000000000000000 ) = 0x3
+psqrtpd(1 2 ) = 1 1.41421 
+psqrtpd(0 -2 ) = 0 0xfff8000000000000 
+psqrtpd(inf -inf ) = inf 0xfff8000000000000 
+psqrtpd(0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+andpd(1 2 , 0 -2 ) = 0 2 
+andpd(0 -2 , inf -inf ) = 0 -2 
+andpd(1 2 , 0x7ff8000000000000 -0 ) = 1 0 
+andpd(0 -2 , 0x7ff8000000000000 -0 ) = 0 -0 
+andpd(inf -inf , 0x7ff8000000000000 -0 ) = inf -0 
+andpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+andnpd(1 2 , 0 -2 ) = 0 -0 
+andnpd(0 -2 , inf -inf ) = inf 1 
+andnpd(1 2 , 0x7ff8000000000000 -0 ) = 3 -0 
+andnpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 
+andnpd(inf -inf , 0x7ff8000000000000 -0 ) = 1.11254e-308 0 
+andnpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0 0 
+orpd(1 2 , 0 -2 ) = 1 -2 
+orpd(0 -2 , inf -inf ) = inf -inf 
+orpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+orpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+orpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+orpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+xorpd(1 2 , 0 -2 ) = 1 -0 
+xorpd(0 -2 , inf -inf ) = inf 1 
+xorpd(1 2 , 0x7ff8000000000000 -0 ) = 3 -2 
+xorpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+xorpd(inf -inf , 0x7ff8000000000000 -0 ) = 1.11254e-308 inf 
+xorpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0 0 
+addpd(1 2 , 0 -2 ) = 1 0 
+addpd(0 -2 , inf -inf ) = inf -inf 
+addpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+addpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+addpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+addpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+mulpd(1 2 , 0 -2 ) = 0 -4 
+mulpd(0 -2 , inf -inf ) = 0xfff8000000000000 inf 
+mulpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+mulpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 
+mulpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0xfff8000000000000 
+mulpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 
+subpd(1 2 , 0 -2 ) = 1 4 
+subpd(0 -2 , inf -inf ) = -inf inf 
+subpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+subpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+subpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+subpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0 
+minpd(1 2 , 0 -2 ) = 0 -2 
+minpd(0 -2 , inf -inf ) = 0 -inf 
+minpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+minpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+minpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+minpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+divpd(1 2 , 0 -2 ) = inf -1 
+divpd(0 -2 , inf -inf ) = 0 0 
+divpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+divpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 inf 
+divpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 inf 
+divpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0xfff8000000000000 
+maxpd(1 2 , 0 -2 ) = 1 2 
+maxpd(0 -2 , inf -inf ) = inf -2 
+maxpd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+maxpd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+maxpd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+maxpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -0 
+punpcklbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x0 0x80 0x1 0x7f 0x5 0x0 0x15 0x1 0x20 0x2 0x80 0x3 0xff 0x81 0x0 
+punpcklwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x8000 0x8000 0x7fff 0x7fff 0xffff 0x0 0xffff 
+punpckldq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x1 0x80000000 0x80000000 
+ppacksswb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x80ff 0x7f 0x201 0x8003 0x7f80 0xffff 0x8050 0x80fe 
+pcmpgtb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x0 0xff 0x0 0x0 0xff 0xff 0x0 0x0 0x0 0xff 0xff 0x0 0x0 0x0 0xff 
+pcmpgtw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x0 0xffff 0xffff 0x0 0xffff 0xffff 0x0 
+pcmpgtd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0xffffffff 0xffffffff 
+packuswb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xff 0x201 0x3 0xff00 0x0 0x50 0x0 
+punpckhbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xfe 0x8 0x84 0x7 0x72 0x81 0x52 0x6 0xa5 0xa 0x0 0xf 0xc0 0x10 0x32 0x1 
+punpckhwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x50 0x2 0x9000 0x3 0xfffe 0x8001 0x8001 
+punpckhdq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x7fffffff 0x5 0x0 0xfffffffe 
+ppackssdw(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x8000ffff 0x7fff 0x80000001 0xfffe0005 
+punpcklqdq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xffffffffffffffff 0x1 
+punpckhqdq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x8000000000000000 0x8000000000000000 
+pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0xffffffff 0xffffffff 0xffffffff 
+pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0x0 0x0 0x0 0x0 
+pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0x7fffffff 0x7fffffff 0x7fffffff 0x7fffffff 
+pshufd(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0x7fffffff 0xffffffff 0xffffffff 0xffffffff 
+psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 
+psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
+psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 170) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
+psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 2) = 0x3fff 0x2000 0x1fff 0x0 0x0 0x0 0x0 0x2000 
+psrld(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0x80000000 0x7fffffff 0x0 
+psrld(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0x0 0x0 0x0 0x0 
+psrld(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0x0 0x0 0x0 0x0 
+psrld(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0x3fffffff 0x20000000 0x1fffffff 0x0 
+psrlq(0xffffffffffffffff 0x8000000000000000 0) = 0xffffffffffffffff 0x8000000000000000 
+psrlq(0xffffffffffffffff 0x8000000000000000 255) = 0x0 0x0 
+psrlq(0xffffffffffffffff 0x8000000000000000 170) = 0x0 0x0 
+psrlq(0xffffffffffffffff 0x8000000000000000 2) = 0x3fffffffffffffff 0x2000000000000000 
+psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 
+psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 255) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff 
+psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 170) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff 
+psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 2) = 0xffff 0xe000 0x1fff 0x0 0x0 0x0 0x0 0xe000 
+psrad(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0x80000000 0x7fffffff 0x0 
+psrad(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0xffffffff 0xffffffff 0x0 0x0 
+psrad(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0xffffffff 0xffffffff 0x0 0x0 
+psrad(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0xffffffff 0xe0000000 0x1fffffff 0x0 
+psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 0) = 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 
+psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 255) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
+psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 170) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
+psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 2) = 0xfffc 0x0 0xfffc 0x0 0x4 0x8 0xc 0x4 
+pslld(0xffffffff 0x80000000 0x7fffffff 0x0 0) = 0xffffffff 0x80000000 0x7fffffff 0x0 
+pslld(0xffffffff 0x80000000 0x7fffffff 0x0 255) = 0x0 0x0 0x0 0x0 
+pslld(0xffffffff 0x80000000 0x7fffffff 0x0 170) = 0x0 0x0 0x0 0x0 
+pslld(0xffffffff 0x80000000 0x7fffffff 0x0 2) = 0xfffffffc 0x0 0xfffffffc 0x0 
+psllq(0xffffffffffffffff 0x8000000000000000 0) = 0xffffffffffffffff 0x8000000000000000 
+psllq(0xffffffffffffffff 0x8000000000000000 255) = 0x0 0x0 
+psllq(0xffffffffffffffff 0x8000000000000000 170) = 0x0 0x0 
+psllq(0xffffffffffffffff 0x8000000000000000 2) = 0xfffffffffffffffc 0x0 
+pcmpeqb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
+pcmpeqw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0xffff 
+pcmpeqd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0xffffffff 0x0 0x0 
+haddpd(1 2 , 0 -2 ) = 3 -2 
+haddpd(0 -2 , inf -inf ) = -2 0xfff8000000000000 
+haddpd(1 2 , 0x7ff8000000000000 -0 ) = 3 0x7ff8000000000000 
+haddpd(0 -2 , 0x7ff8000000000000 -0 ) = -2 0x7ff8000000000000 
+haddpd(inf -inf , 0x7ff8000000000000 -0 ) = 0xfff8000000000000 0x7ff8000000000000 
+haddpd(0x7ff8000000000000 -0 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 0x7ff8000000000000 
+psrlw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
+psrld(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x0 0x0 
+psrlq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x7fffffffffffffff 0x4000000000000000 
+paddq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0x0 0x0 
+pmullw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x8000 0x8001 0x0 0x50 0x2000 0xfffa 0x1 
+psubusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x7f 0x7a 0x0 0x0 0x0 0x0 0x81 0xf6 0x7d 0x0 0x4c 0x9b 0x0 0xb0 0x31 
+psubusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x1 0x0 0x0 0x0 0x0 0x0 0x0 
+pminub(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x1 0x5 0x0 0x1 0x2 0x3 0x0 0x8 0x7 0x72 0x6 0xa 0x0 0x10 0x1 
+pand(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x3000000050000 0x2000408 
+paddusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x84 0x15 0x21 0x82 0xff 0x81 0xff 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 
+paddusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0xffff 0xffff 0xffff 0x51 0x9002 0xffff 0xffff 
+pmaxub(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x7f 0x15 0x20 0x80 0xff 0x81 0xfe 0x84 0x81 0x52 0xa5 0xf 0xc0 0x32 
+pandn(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0xfc802015000100 0x1100f0a04810300 
+pavgb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x80 0x41 0x42 0xb 0x11 0x41 0x81 0x41 0x83 0x46 0x7a 0x2c 0x58 0x8 0x68 0x1a 
+psraw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0xffff 0x0 0x0 0x0 0x0 0x0 0xffff 
+psrad(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0xffffffff 0x0 0x0 
+pavgb(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xc000 0x8000 0xbfff 0x8000 0x29 0x4801 0x8001 0x8001 
+pmulhuw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x3fff 0x7ffe 0x0 0x0 0x1 0x2 0x4001 
+pmulhw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xc000 0xffff 0x0 0x0 0xffff 0xffff 0x3fff 
+psubsb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x7a 0xeb 0xe1 0x7f 0x4 0x81 0xf6 0x80 0x7f 0x4c 0x9b 0xf1 0xb0 0x31 
+psubsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x8000 0x7fff 0x1 0xffb1 0x7002 0x5 0x0 
+pminsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x8000 0xffff 0xffff 0x1 0x9000 0xfffe 0x8001 
+por(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x81ff8221157f81ff 0x33d00faf56f387fe 
+paddusb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x7f 0x15 0x21 0x82 0x2 0x81 0x6 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 
+paddusw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0xffff 0x7ffe 0xffff 0x51 0x9002 0x1 0x8000 
+pmaxsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0xffff 0x7fff 0x7fff 0x0 0x50 0x2 0x3 0x8001 
+pxor(0x81030201007f80ff 0x32c000a5527284fe , 0xff802015050100 0x1100f0a06810708 ) = 0x81fc8221157a81ff 0x33d00faf54f383f6 
+psllw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 
+pslld(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x0 0x0 
+psllq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xfffffffffffffffe 0x0 
+pmuludq(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x0 0x7ffffffb 0x2 
+pmaddwd(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x0 0xc001 0x8001 0xffff 0x2050 0xffff 0xfffb 0x3ffe 
+psadbw(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x27 0x4 0x0 0x0 0x0 0x0 0x0 0x0 0x59 0x3 0x0 0x0 0x0 0x0 0x0 0x0 
+psubb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x7f 0x7a 0xeb 0xe1 0x82 0x4 0x81 0xf6 0x7d 0xf1 0x4c 0x9b 0xf1 0xb0 0x31 
+psubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x1 0x8000 0x1 0xffb1 0x7002 0x5 0x0 
+psubd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xfffffffe 0x0 0x7ffffffa 0x2 
+psubq(0xffffffffffffffff 0x8000000000000000 , 0x1 0x8000000000000000 ) = 0xfffffffffffffffe 0x0 
+paddb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x81 0x84 0x15 0x21 0x82 0x2 0x81 0x6 0x8b 0xf3 0x58 0xaf 0xf 0xd0 0x33 
+paddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0xffff 0x7ffe 0xffff 0x51 0x9002 0x1 0x2 
+paddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x0 0x0 0x80000004 0xfffffffe 
+pmovhlps(1 2 3 -4 , 0 -2 -10 0.5 ) = -10 0.5 3 -4 
+unpcklps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 0 2 -2 
+unpckhps(1 2 3 -4 , 0 -2 -10 0.5 ) = 3 -10 -4 0.5 
+pmovhps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 2 0 -2 
+psqrtps(1 2 3 -4 ) = 1 1.41421 1.73205 nan 
+psqrtps(0 -2 -10 0.5 ) = 0 nan nan 0.707107 
+psqrtps(inf -inf -inf 1 ) = inf nan nan 1 
+psqrtps(nan -0 nan inf ) = nan -0 nan inf 
+prcpps(nan -0 nan inf ) = nan -inf nan 0 
+andps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 2 2 0 
+andps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 -2 -8 0.5 
+andps(1 2 3 -4 , nan -0 nan inf ) = 1 0 3 4 
+andps(0 -2 -10 0.5 , nan -0 nan inf ) = 0 -0 -8 0.5 
+andps(inf -inf -inf 1 , nan -0 nan inf ) = inf -0 -inf 1 
+andps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf 
+andnps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -0 -2.93874e-38 0.5 
+andnps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf 1 0.25 1.17549e-38 
+andnps(1 2 3 -4 , nan -0 nan inf ) = 3 -0 -1 0.5 
+andnps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 0 0.375 4 
+andnps(inf -inf -inf 1 , nan -0 nan inf ) = 5.87747e-39 0 5.87747e-39 2 
+andnps(nan -0 nan inf , nan -0 nan inf ) = 0 0 0 0 
+orps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 -2 -14 -inf 
+orps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -inf nan 1 
+orps(1 2 3 -4 , nan -0 nan inf ) = nan -2 nan -inf 
+orps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan inf 
+orps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan inf 
+orps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf 
+xorps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 -0 -4.11423e-38 -inf 
+xorps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf 1 0.3125 1.17549e-38 
+xorps(1 2 3 -4 , nan -0 nan inf ) = 3 -2 -1 -0.5 
+xorps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 2 0.4375 4 
+xorps(inf -inf -inf 1 , nan -0 nan inf ) = 5.87747e-39 inf 5.87747e-39 2 
+xorps(nan -0 nan inf , nan -0 nan inf ) = 0 0 0 0 
+addps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 0 -7 -3.5 
+addps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -inf -inf 1.5 
+addps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan inf 
+addps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan inf 
+addps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan inf 
+addps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf 
+mulps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -4 -30 -2 
+mulps(0 -2 -10 0.5 , inf -inf -inf 1 ) = nan inf inf 0.5 
+mulps(1 2 3 -4 , nan -0 nan inf ) = nan -0 nan -inf 
+mulps(0 -2 -10 0.5 , nan -0 nan inf ) = nan 0 nan inf 
+mulps(inf -inf -inf 1 , nan -0 nan inf ) = nan nan nan inf 
+mulps(nan -0 nan inf , nan -0 nan inf ) = nan 0 nan inf 
+subps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 4 13 -4.5 
+subps(0 -2 -10 0.5 , inf -inf -inf 1 ) = -inf inf inf -0.5 
+subps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan -inf 
+subps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan -inf 
+subps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan -inf 
+subps(nan -0 nan inf , nan -0 nan inf ) = nan 0 nan nan 
+minps(1 2 3 -4 , 0 -2 -10 0.5 ) = 0 -2 -10 -4 
+minps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 -inf -inf 0.5 
+minps(1 2 3 -4 , nan -0 nan inf ) = nan -0 nan -4 
+minps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -2 nan 0.5 
+minps(inf -inf -inf 1 , nan -0 nan inf ) = nan -inf nan 1 
+minps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf 
+divps(1 2 3 -4 , 0 -2 -10 0.5 ) = inf -1 -0.3 -8 
+divps(0 -2 -10 0.5 , inf -inf -inf 1 ) = 0 0 0 0.5 
+divps(1 2 3 -4 , nan -0 nan inf ) = nan -inf nan -0 
+divps(0 -2 -10 0.5 , nan -0 nan inf ) = nan inf nan 0 
+divps(inf -inf -inf 1 , nan -0 nan inf ) = nan inf nan 0 
+divps(nan -0 nan inf , nan -0 nan inf ) = nan nan nan nan 
+maxps(1 2 3 -4 , 0 -2 -10 0.5 ) = 1 2 3 0.5 
+maxps(0 -2 -10 0.5 , inf -inf -inf 1 ) = inf -2 -10 1 
+maxps(1 2 3 -4 , nan -0 nan inf ) = nan 2 nan inf 
+maxps(0 -2 -10 0.5 , nan -0 nan inf ) = nan -0 nan inf 
+maxps(inf -inf -inf 1 , nan -0 nan inf ) = nan -0 nan inf 
+maxps(nan -0 nan inf , nan -0 nan inf ) = nan -0 nan inf 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 0) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 0) = 0 0 0 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 0) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 0) = 0 0 0 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 0) = 0 0 0 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 0) = 0 nan 0 nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 1) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 1) = nan 0 0 nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 1) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 1) = 0 nan 0 nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 1) = 0 nan 0 nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 1) = 0 0 0 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 2) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 2) = nan 0 0 nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 2) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 2) = 0 nan 0 nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 2) = 0 nan 0 nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 2) = 0 nan 0 nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 3) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 3) = 0 0 0 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 3) = nan 0 nan 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 3) = nan 0 nan 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 3) = nan 0 nan 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 3) = nan 0 nan 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 4) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 4) = nan nan nan nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 4) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 4) = nan nan nan nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 4) = nan nan nan nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 4) = nan 0 nan 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 5) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 5) = 0 nan nan 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 5) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 5) = nan 0 nan 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 5) = nan 0 nan 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 5) = nan nan nan nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 6) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 6) = 0 nan nan 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 6) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 6) = nan 0 nan 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 6) = nan 0 nan 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 6) = nan 0 nan 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 7) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 7) = nan nan nan nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 7) = 0 nan 0 nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 7) = 0 nan 0 nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 7) = 0 nan 0 nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 7) = 0 nan 0 nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 8) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 8) = 0 0 0 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 8) = nan 0 nan 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 8) = nan 0 nan 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 8) = nan 0 nan 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 8) = nan nan nan nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 9) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 9) = nan 0 0 nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 9) = nan 0 nan nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 9) = nan nan nan nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 9) = nan nan nan nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 9) = nan 0 nan 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 10) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 10) = nan 0 0 nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 10) = nan 0 nan nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 10) = nan nan nan nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 10) = nan nan nan nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 10) = nan nan nan nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 11) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 11) = 0 0 0 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 11) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 11) = 0 0 0 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 11) = 0 0 0 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 11) = 0 0 0 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 12) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 12) = nan nan nan nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 12) = 0 nan 0 nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 12) = 0 nan 0 nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 12) = 0 nan 0 nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 12) = 0 0 0 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 13) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 13) = 0 nan nan 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 13) = 0 nan 0 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 13) = 0 0 0 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 13) = 0 0 0 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 13) = 0 nan 0 nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 14) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 14) = 0 nan nan 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 14) = 0 nan 0 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 14) = 0 0 0 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 14) = 0 0 0 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 14) = 0 0 0 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 15) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 15) = nan nan nan nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 15) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 15) = nan nan nan nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 15) = nan nan nan nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 15) = nan nan nan nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 16) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 16) = 0 0 0 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 16) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 16) = 0 0 0 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 16) = 0 0 0 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 16) = 0 nan 0 nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 17) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 17) = nan 0 0 nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 17) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 17) = 0 nan 0 nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 17) = 0 nan 0 nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 17) = 0 0 0 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 18) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 18) = nan 0 0 nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 18) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 18) = 0 nan 0 nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 18) = 0 nan 0 nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 18) = 0 nan 0 nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 19) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 19) = 0 0 0 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 19) = nan 0 nan 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 19) = nan 0 nan 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 19) = nan 0 nan 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 19) = nan 0 nan 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 20) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 20) = nan nan nan nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 20) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 20) = nan nan nan nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 20) = nan nan nan nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 20) = nan 0 nan 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 21) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 21) = 0 nan nan 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 21) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 21) = nan 0 nan 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 21) = nan 0 nan 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 21) = nan nan nan nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 22) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 22) = 0 nan nan 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 22) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 22) = nan 0 nan 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 22) = nan 0 nan 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 22) = nan 0 nan 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 23) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 23) = nan nan nan nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 23) = 0 nan 0 nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 23) = 0 nan 0 nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 23) = 0 nan 0 nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 23) = 0 nan 0 nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 24) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 24) = 0 0 0 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 24) = nan 0 nan 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 24) = nan 0 nan 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 24) = nan 0 nan 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 24) = nan nan nan nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 25) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 25) = nan 0 0 nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 25) = nan 0 nan nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 25) = nan nan nan nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 25) = nan nan nan nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 25) = nan 0 nan 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 26) = 0 0 0 nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 26) = nan 0 0 nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 26) = nan 0 nan nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 26) = nan nan nan nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 26) = nan nan nan nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 26) = nan nan nan nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 27) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 27) = 0 0 0 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 27) = 0 0 0 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 27) = 0 0 0 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 27) = 0 0 0 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 27) = 0 0 0 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 28) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 28) = nan nan nan nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 28) = 0 nan 0 nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 28) = 0 nan 0 nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 28) = 0 nan 0 nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 28) = 0 0 0 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 29) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 29) = 0 nan nan 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 29) = 0 nan 0 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 29) = 0 0 0 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 29) = 0 0 0 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 29) = 0 nan 0 nan 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 30) = nan nan nan 0 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 30) = 0 nan nan 0 
+cmpps(1 2 3 -4 , nan -0 nan inf , 30) = 0 nan 0 0 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 30) = 0 0 0 0 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 30) = 0 0 0 0 
+cmpps(nan -0 nan inf , nan -0 nan inf , 30) = 0 0 0 0 
+cmpps(1 2 3 -4 , 0 -2 -10 0.5 , 31) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , inf -inf -inf 1 , 31) = nan nan nan nan 
+cmpps(1 2 3 -4 , nan -0 nan inf , 31) = nan nan nan nan 
+cmpps(0 -2 -10 0.5 , nan -0 nan inf , 31) = nan nan nan nan 
+cmpps(inf -inf -inf 1 , nan -0 nan inf , 31) = nan nan nan nan 
+cmpps(nan -0 nan inf , nan -0 nan inf , 31) = nan nan nan nan 
+shufps(1 2 3 -4 , 0 -2 -10 0.5 , 0) = 1 1 0 0 
+shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 0) = 0 0 inf inf 
+shufps(1 2 3 -4 , nan -0 nan inf , 0) = 1 1 nan nan 
+shufps(0 -2 -10 0.5 , nan -0 nan inf , 0) = 0 0 nan nan 
+shufps(inf -inf -inf 1 , nan -0 nan inf , 0) = inf inf nan nan 
+shufps(nan -0 nan inf , nan -0 nan inf , 0) = nan nan nan nan 
+shufps(1 2 3 -4 , 0 -2 -10 0.5 , 21) = 2 2 -2 0 
+shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 21) = -2 -2 -inf inf 
+shufps(1 2 3 -4 , nan -0 nan inf , 21) = 2 2 -0 nan 
+shufps(0 -2 -10 0.5 , nan -0 nan inf , 21) = -2 -2 -0 nan 
+shufps(inf -inf -inf 1 , nan -0 nan inf , 21) = -inf -inf -0 nan 
+shufps(nan -0 nan inf , nan -0 nan inf , 21) = -0 -0 -0 nan 
+shufps(1 2 3 -4 , 0 -2 -10 0.5 , 255) = -4 -4 0.5 0.5 
+shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 255) = 0.5 0.5 1 1 
+shufps(1 2 3 -4 , nan -0 nan inf , 255) = -4 -4 inf inf 
+shufps(0 -2 -10 0.5 , nan -0 nan inf , 255) = 0.5 0.5 inf inf 
+shufps(inf -inf -inf 1 , nan -0 nan inf , 255) = 1 1 inf inf 
+shufps(nan -0 nan inf , nan -0 nan inf , 255) = inf inf inf inf 
+shufps(1 2 3 -4 , 0 -2 -10 0.5 , 2) = 3 1 0 0 
+shufps(0 -2 -10 0.5 , inf -inf -inf 1 , 2) = -10 0 inf inf 
+shufps(1 2 3 -4 , nan -0 nan inf , 2) = 3 1 nan nan 
+shufps(0 -2 -10 0.5 , nan -0 nan inf , 2) = -10 0 nan nan 
+shufps(inf -inf -inf 1 , nan -0 nan inf , 2) = -inf inf nan nan 
+shufps(nan -0 nan inf , nan -0 nan inf , 2) = nan nan nan nan 
+sqrtsd(1 2 , 1 2 ) = 1 2 
+sqrtsd(1 2 , 0 -2 ) = 0 2 
+sqrtsd(1 2 , inf -inf ) = inf 2 
+sqrtsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+sqrtsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+sqrtsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+sqrtsd(1 2 , 2 1 ) = 1.41421 2 
+sqrtsd(1 2 , -2 0 ) = 0xfff8000000000000 2 
+sqrtsd(1 2 , -inf inf ) = 0xfff8000000000000 2 
+sqrtsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 
+sqrtsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+sqrtsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+addsd(1 2 , 1 2 ) = 2 2 
+addsd(1 2 , 0 -2 ) = 1 2 
+addsd(1 2 , inf -inf ) = inf 2 
+addsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+addsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+addsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+addsd(1 2 , 2 1 ) = 3 2 
+addsd(1 2 , -2 0 ) = -1 2 
+addsd(1 2 , -inf inf ) = -inf 2 
+addsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 
+addsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
+addsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
+mulsd(1 2 , 1 2 ) = 1 2 
+mulsd(1 2 , 0 -2 ) = 0 2 
+mulsd(1 2 , inf -inf ) = inf 2 
+mulsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+mulsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+mulsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+mulsd(1 2 , 2 1 ) = 2 2 
+mulsd(1 2 , -2 0 ) = -2 2 
+mulsd(1 2 , -inf inf ) = -inf 2 
+mulsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 
+mulsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+mulsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+subsd(1 2 , 1 2 ) = 0 2 
+subsd(1 2 , 0 -2 ) = 1 2 
+subsd(1 2 , inf -inf ) = -inf 2 
+subsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+subsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+subsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+subsd(1 2 , 2 1 ) = -1 2 
+subsd(1 2 , -2 0 ) = 3 2 
+subsd(1 2 , -inf inf ) = inf 2 
+subsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 
+subsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
+subsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
+minsd(1 2 , 1 2 ) = 1 2 
+minsd(1 2 , 0 -2 ) = 0 2 
+minsd(1 2 , inf -inf ) = 1 2 
+minsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+minsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+minsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+minsd(1 2 , 2 1 ) = 1 2 
+minsd(1 2 , -2 0 ) = -2 2 
+minsd(1 2 , -inf inf ) = -inf 2 
+minsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 
+minsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+minsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+divsd(1 2 , 1 2 ) = 1 2 
+divsd(1 2 , 0 -2 ) = inf 2 
+divsd(1 2 , inf -inf ) = 0 2 
+divsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+divsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+divsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+divsd(1 2 , 2 1 ) = 0.5 2 
+divsd(1 2 , -2 0 ) = -0.5 2 
+divsd(1 2 , -inf inf ) = -0 2 
+divsd(1 2 , -0 0x7ff8000000000000 ) = -inf 2 
+divsd(0 -2 , -0 0x7ff8000000000000 ) = 0xfff8000000000000 -2 
+divsd(0 -2 , -0 0x7ff8000000000000 ) = 0xfff8000000000000 -2 
+maxsd(1 2 , 1 2 ) = 1 2 
+maxsd(1 2 , 0 -2 ) = 1 2 
+maxsd(1 2 , inf -inf ) = inf 2 
+maxsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+maxsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+maxsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+maxsd(1 2 , 2 1 ) = 2 2 
+maxsd(1 2 , -2 0 ) = 1 2 
+maxsd(1 2 , -inf inf ) = 1 2 
+maxsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 
+maxsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+maxsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+cvtps2dq(1 2 3 -4 ) = 0x1 0x2 0x3 0xfffffffc 
+cvtps2dq(0 -2 -10 0.5 ) = 0x0 0xfffffffe 0xfffffff6 0x0 
+cvtps2dq(inf -inf -inf 1 ) = 0x80000000 0x80000000 0x80000000 0x1 
+cvtps2dq(nan -0 nan inf ) = 0x80000000 0x0 0x80000000 0x80000000 
+dpps(1 2 3 -4 , 0 -2 -10 0.5 , 255) = -36 -36 -36 -36 
+dpps(0 -2 -10 0.5 , inf -inf -inf 1 , 255) = nan nan nan nan 
+dpps(1 2 3 -4 , nan -0 nan inf , 255) = nan nan nan nan 
+dpps(0 -2 -10 0.5 , nan -0 nan inf , 255) = nan nan nan nan 
+dpps(inf -inf -inf 1 , nan -0 nan inf , 255) = nan nan nan nan 
+dpps(nan -0 nan inf , nan -0 nan inf , 255) = nan nan nan nan 
+dpps(1 2 3 -4 , 0 -2 -10 0.5 , 63) = -4 -4 -4 -4 
+dpps(0 -2 -10 0.5 , inf -inf -inf 1 , 63) = nan nan nan nan 
+dpps(1 2 3 -4 , nan -0 nan inf , 63) = nan nan nan nan 
+dpps(0 -2 -10 0.5 , nan -0 nan inf , 63) = nan nan nan nan 
+dpps(inf -inf -inf 1 , nan -0 nan inf , 63) = nan nan nan nan 
+dpps(nan -0 nan inf , nan -0 nan inf , 63) = nan nan nan nan 
+dpps(1 2 3 -4 , 0 -2 -10 0.5 , 243) = -36 -36 0 0 
+dpps(0 -2 -10 0.5 , inf -inf -inf 1 , 243) = nan nan 0 0 
+dpps(1 2 3 -4 , nan -0 nan inf , 243) = nan nan 0 0 
+dpps(0 -2 -10 0.5 , nan -0 nan inf , 243) = nan nan 0 0 
+dpps(inf -inf -inf 1 , nan -0 nan inf , 243) = nan nan 0 0 
+dpps(nan -0 nan inf , nan -0 nan inf , 243) = nan nan 0 0 
+dpps(1 2 3 -4 , 0 -2 -10 0.5 , 83) = -30 -30 0 0 
+dpps(0 -2 -10 0.5 , inf -inf -inf 1 , 83) = nan nan 0 0 
+dpps(1 2 3 -4 , nan -0 nan inf , 83) = nan nan 0 0 
+dpps(0 -2 -10 0.5 , nan -0 nan inf , 83) = nan nan 0 0 
+dpps(inf -inf -inf 1 , nan -0 nan inf , 83) = nan nan 0 0 
+dpps(nan -0 nan inf , nan -0 nan inf , 83) = nan nan 0 0 
diff --git a/tests/test30 b/tests/test30
new file mode 100755
index 00000000..ac95ac6f
--- /dev/null
+++ b/tests/test30
Binary files differdiff --git a/tests/test30.c b/tests/test30.c
new file mode 100644
index 00000000..26288423
--- /dev/null
+++ b/tests/test30.c
@@ -0,0 +1,641 @@
+// build with  gcc -O0 -g -msse -msse2 -mssse3 -msse4.1 -mavx test30.c -o test30
+#include <inttypes.h>
+#include <string.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include <pmmintrin.h>
+#include <immintrin.h> 
+
+typedef unsigned char u8x16 __attribute__ ((vector_size (16)));
+typedef unsigned short u16x8 __attribute__ ((vector_size (16)));
+typedef unsigned int  u32x4 __attribute__ ((vector_size (16)));
+typedef unsigned long int  u64x2 __attribute__ ((vector_size (16)));
+typedef float  f32x4 __attribute__ ((vector_size (16)));
+typedef double d64x2 __attribute__ ((vector_size (16)));
+
+typedef union {
+        __m128i mm;
+        __m128  mf;
+        __m128d md;
+        u8x16   u8;
+        u16x8   u16;
+        u32x4   u32;
+        u64x2   u64;
+        f32x4   f32;
+        d64x2   d64;
+} v128;
+
+uint64_t _ucomiss_(float a, float b)
+{
+    uint64_t ret = 0x202;
+    v128 va, vb;
+    va.f32[0] = a;
+    vb.f32[0] = b;
+    if(_mm_ucomigt_ss(va.mf, vb.mf))
+        ret |= 0x000;
+    else if(_mm_ucomilt_ss(va.mf, vb.mf))
+        ret |= 0x001;
+    else if(_mm_ucomieq_ss(va.mf, vb.mf))
+        ret |= 0x040;
+    else
+        ret |= 0x045;
+    return ret;
+}
+
+uint64_t _minss_(float a, float b)
+{
+    v128 va, vb, ret;
+    va.f32[0] = a;
+    vb.f32[0] = b;
+    ret.mf = _mm_min_ss(va.mf, vb.mf);
+    return ret.u64[0];
+}
+uint64_t _maxss_(float a, float b)
+{
+    v128 va, vb, ret;
+    va.f32[0] = a;
+    vb.f32[0] = b;
+    ret.mf = _mm_max_ss(va.mf, vb.mf);
+    return ret.u64[0];
+}
+
+#define CMPSS(A, B)                         \
+uint64_t _cmpss_##A(float a, float b)       \
+{                                           \
+    v128 va, vb, ret;                       \
+    va.f32[0] = a;                          \
+    vb.f32[0] = b;                          \
+    ret.mf = _mm_cmp##B##_ss(va.mf, vb.mf); \
+    return ret.u64[0];                      \
+}
+CMPSS(0, eq)
+CMPSS(1, lt)
+CMPSS(2, le)
+CMPSS(3, unord)
+CMPSS(4, neq)
+CMPSS(5, nlt)
+CMPSS(6, nle)
+CMPSS(7, ord)
+#undef CMPSS
+
+const v128 a128_8 = {.u8 = {
+    0xff, 0x80, 0x7f, 0x00, 0x01, 0x02, 0x03, 0x81,
+    0xfe, 0x84, 0x72, 0x52, 0xa5, 0x00, 0xc0, 0x32
+}};
+const v128 a128_16 = {.u16 = {
+    0xffff, 0x8000, 0x7fff, 0x0000, 0x0001, 0x0002, 0x0003, 0x8001
+}};
+const v128 a128_32 = {.u32 = {
+    0xffffffff, 0x80000000, 0x7fffffff, 0x00000000
+}};
+const v128 a128_64 = {.u64 = {
+    0xffffffffffffffffLL, 0x8000000000000000LL
+}};
+
+const v128 b128_8 = {.u8 = {
+    0x00, 0x01, 0x05, 0x15, 0x20, 0x80, 0xff, 0x00,
+    0x08, 0x07, 0x81, 0x06, 0x0a, 0x0f, 0x10, 0x01
+}};
+const v128 b128_16 = {.u16 = {
+    0x8000, 0x7fff, 0xffff, 0xffff, 0x0050, 0x9000, 0xfffe, 0x8001
+}};
+const v128 b128_32 = {.u32 = {
+    0x00000001, 0x80000000, 0x00000005, 0xfffffffe
+}};
+const v128 b128_64 = {.u64 = {
+    0x0000000000000001LL, 0x8000000000000000LL
+}};
+const v128 c128_32 = {.u32 = {
+    0x00000001, 0x80000000, 0x80000005, 0x0000fffe
+}};
+
+const v128 a128_pd = {.d64 = { 1.0, 2.0}};
+const v128 b128_pd = {.d64 = { 0.0, -2.0}};
+const v128 c128_pd = {.d64 = { INFINITY, -INFINITY}};
+const v128 d128_pd = {.d64 = { NAN, -0.0}};
+const v128 a128_ps = {.f32 = { 1.0, 2.0, 3.0, -4.0}};
+const v128 b128_ps = {.f32 = { 0.0, -2.0, -10.0, 0.5}};
+const v128 c128_ps = {.f32 = { INFINITY, -INFINITY, -INFINITY, 1.0}};
+const v128 d128_ps = {.f32 = { NAN, -0.0, -NAN, INFINITY}};
+
+v128 reverse_pd(v128 a) {
+    v128 ret;
+    ret.md = _mm_shuffle_pd(a.md, a.md, 1);
+    return ret;
+}
+
+void print_8(v128 v) {
+    for(int i=0; i<16; ++i)
+        printf("0x%x ", v.u8[i]);
+}
+void print_16(v128 v) {
+    for(int i=0; i<8; ++i)
+        printf("0x%x ", v.u16[i]);
+}
+void print_32(v128 v) {
+    for(int i=0; i<4; ++i)
+        printf("0x%x ", v.u32[i]);
+}
+void print_64(v128 v) {
+    for(int i=0; i<2; ++i)
+        printf("0x%"PRIx64" ", v.u64[i]);
+}
+#define print_128 print_64
+void print_ps(v128 v) {
+    for(int i=0; i<4; ++i)
+        if(isnanf(v.f32[i]))
+            printf("nan ");
+        else
+            printf("%g ", v.f32[i]);
+}
+void print_pd(v128 v) {
+    for(int i=0; i<2; ++i)
+        if(isnan(v.d64[i]))
+            printf("0x%"PRIx64" ", v.u64[i]);
+        else
+            printf("%g ", v.d64[i]);
+}
+#define print_sd print_pd
+
+int main(int argc, const char** argv)
+{
+ float a, b;
+ uint32_t flags;
+ uint32_t maxf = 0x7f7fffff;
+ uint32_t minf = 0xff7fffff;
+ uint32_t r;
+
+#define GO1(A, N)                                   \
+a = 1.0f; b = 2.0f;                                 \
+flags = A(a, b);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags);  \
+flags = A(b, a);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags);  \
+b = INFINITY;                                       \
+flags = A(a, b);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags);  \
+flags = A(b, a);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags);  \
+b = -INFINITY;                                      \
+flags = A(a, b);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags);  \
+flags = A(b, a);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags);  \
+b = NAN;                                            \
+flags = A(a, b);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags);  \
+flags = A(b, a);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags);  \
+b = a;                                              \
+flags = A(a, b);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags);  \
+flags = A(b, a);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags);  \
+a = b = INFINITY;                                   \
+flags = A(a, b);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags);  \
+a = -INFINITY;                                      \
+flags = A(a, b);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags);  \
+flags = A(b, a);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", b, a, flags);  \
+a = b = NAN;                                        \
+flags = A(a, b);                                    \
+printf(N " %f, %f => 0x%"PRIx32"\n", a, b, flags);
+
+#define GO2(A, N)                               \
+a = 1.0f; b = 2.0f;                             \
+r = A(a, b);                                    \
+printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
+r = A(b, a);                                    \
+printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
+a = -INFINITY;                                  \
+r = A(a, b);                                    \
+printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
+r = A(b, a);                                    \
+printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
+a = +INFINITY;                                  \
+r = A(a, b);                                    \
+printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
+r = A(b, a);                                    \
+printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
+a = NAN;                                        \
+r = A(a, b);                                    \
+printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
+r = A(b, a);                                    \
+printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
+b = *(float*)&maxf;                             \
+r = A(a, b);                                    \
+printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
+r = A(b, a);                                    \
+printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
+a = -INFINITY;                                  \
+r = A(a, b);                                    \
+printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
+r = A(b, a);                                    \
+printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
+a = +INFINITY;                                  \
+r = A(a, b);                                    \
+printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
+r = A(b, a);                                    \
+printf(N " %g, %g => %g\n", b, a, *(float*)&r);
+
+ GO1(_ucomiss_, "ucomiss")
+ GO2(_minss_, "minss")
+ GO2(_maxss_, "maxss")
+ GO1(_cmpss_0, "cmpss 0")
+ GO1(_cmpss_1, "cmpss 1")
+ GO1(_cmpss_2, "cmpss 2")
+ GO1(_cmpss_3, "cmpss 3")
+ GO1(_cmpss_4, "cmpss 4")
+ GO1(_cmpss_5, "cmpss 5")
+ GO1(_cmpss_6, "cmpss 6")
+ GO1(_cmpss_7, "cmpss 7")
+
+ #undef GO1
+ #undef GO2
+ v128 a128;
+ int i;
+
+ #define GO1(A, N, C)                               \
+ a128.mm = _mm_##A##_epi##N(a128_##N.mm);           \
+ printf("%s(", #C); print_##N(a128_##N);            \
+ printf(") = "); print_##N(a128); printf("\n");
+ #define GO1C(A, N, C, A1, I)                       \
+ a128.mm = _mm_##A##_epi##N(A1.mm, I);              \
+ printf("%s(", #C); print_##N(A1);                  \
+ printf("%d) = ", I); print_##N(a128); printf("\n");
+ #define GO2(A, N, C, A1, A2)                       \
+ a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm);          \
+ printf("%s(", #C); print_##N(A1);                  \
+ printf(", "); print_##N(A2);                       \
+ printf(") = "); print_##N(a128); printf("\n");
+ #define GO2u(A, N, C, A1, A2)                      \
+ a128.mm = _mm_##A##_epu##N(A1.mm, A2.mm);          \
+ printf("%s(", #C); print_##N(A1);                  \
+ printf(", "); print_##N(A2);                       \
+ printf(") = "); print_##N(a128); printf("\n");
+ #define GO2f(A, C, A1, A2)                         \
+ a128.mm = _mm_##A##_si128(A1.mm, A2.mm);           \
+ printf("%s(", #C); print_128(A1);                  \
+ printf(", "); print_128(A2);                       \
+ printf(") = "); print_128(a128); printf("\n");
+ #define GO2C(A, N, C, A1, A2, I)                   \
+ a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm, I);       \
+ printf("%s(", #C); print_##N(A1);                  \
+ printf(", "); print_##N(A2);                       \
+ printf("%d) = ", I); print_##N(a128); printf("\n");
+ #define GO2i(A, A1, A2)                            \
+ i = _mm_##A##_si128(A1.mm, A2.mm);                 \
+ printf("p%s(", #A); print_64(A1);                  \
+ printf(", "); print_64(A2);                        \
+ printf(") = %d\n", i);
+ #define GO3PS(A, N, A1, A2, A3)                    \
+ a128.mf = _mm_##A##_ps(A1.mf, A2.mf, A3.mf);       \
+ printf("p%s%s(", #A, "ps"); print_##N(A1);         \
+ printf(", "); print_##N(A2);                       \
+ printf(", "); print_##N(A3);                       \
+ printf(") = "); print_##N(a128); printf("\n");
+ #define GO1ipd(A, C, A1)                           \
+ i = _mm_##A##_pd(A1.md);                           \
+ printf("%s(", #C); print_64(A1);                   \
+ printf(") = 0x%x\n", i);
+ #define GO1pd(A, C, A1)                            \
+ a128.md = _mm_##A##_pd(A1.md);                     \
+ printf("%s(", #C); print_pd(A1);                   \
+ printf(") = "); print_pd(a128); printf("\n");
+ #define GO2pd(A, C, A1, A2)                        \
+ a128.md = _mm_##A##_pd(A1.md, A2.md);              \
+ printf("%s(", #C); print_pd(A1);                   \
+ printf(", "); print_pd(A2);                        \
+ printf(") = "); print_pd(a128); printf("\n");
+ #define GO2Cpd(A, C, A1, A2, I)                    \
+ a128.md = _mm_##A##_pd(A1.md, A2.md, I);           \
+ printf("%s(", #C); print_pd(A1);                   \
+ printf(", "); print_pd(A2);                        \
+ printf(", %d) = ", I); print_pd(a128); printf("\n");
+ #define GO1isd(A, C, A1)                           \
+ i = _mm_##A##_sd(A1.md);                           \
+ printf("%s(", #C); print_64(A1);                   \
+ printf(") = 0x%x\n", i);
+ #define GO1sd(A, C, A1)                            \
+ a128.md = _mm_##A##_sd(A1.md);                     \
+ printf("%s(", #C); print_sd(A1);                   \
+ printf(") = "); print_sd(a128); printf("\n");
+ #define GO2sd(A, C, A1, A2)                        \
+ a128.md = _mm_##A##_sd(A1.md, A2.md);              \
+ printf("%s(", #C); print_sd(A1);                   \
+ printf(", "); print_sd(A2);                        \
+ printf(") = "); print_sd(a128); printf("\n");
+ #define GO2Csd(A, C, A1, A2, I)                    \
+ a128.md = _mm_##A##_sd(A1.md, A2.md, I);           \
+ printf("%s(", #C); print_sd(A1);                   \
+ printf(", "); print_sd(A2);                        \
+ printf(", %d) = ", I); print_sd(a128); printf("\n");
+ #define GO1ips(A, C, A1)                           \
+ i = _mm_##A##_ps(A1.mf);                           \
+ printf("%s(", #C); print_32(A1);                   \
+ printf(") = 0x%x\n", i);
+ #define GO1ps(A, C, A1)                            \
+ a128.mf = _mm_##A##_ps(A1.mf);                     \
+ printf("%s(", #C); print_ps(A1);                   \
+ printf(") = "); print_ps(a128); printf("\n");
+ #define GO2ps(A, C, A1, A2)                        \
+ a128.mf = _mm_##A##_ps(A1.mf, A2.mf);              \
+ printf("%s(", #C); print_ps(A1);                   \
+ printf(", "); print_ps(A2);                        \
+ printf(") = "); print_ps(a128); printf("\n");
+ #define GO2Cps(A, C, A1, A2, I)                    \
+ a128.mf = _mm_##A##_ps(A1.mf, A2.mf, I);           \
+ printf("%s(", #C); print_ps(A1);                   \
+ printf(", "); print_ps(A2);                        \
+ printf(", %d) = ", I); print_ps(a128); printf("\n");
+ #define GO1ps2dq(A, C, A1)                         \
+ a128.mm = _mm_##A##_epi32(A1.mf);                  \
+ printf("%s(", #C); print_ps(A1);                   \
+ printf(") = "); print_32(a128); printf("\n");
+ 
+ #define MULITGO2pd(A, B)       \
+ GO2pd(A, B, a128_pd, b128_pd)  \
+ GO2pd(A, B, b128_pd, c128_pd)  \
+ GO2pd(A, B, a128_pd, d128_pd)  \
+ GO2pd(A, B, b128_pd, d128_pd)  \
+ GO2pd(A, B, c128_pd, d128_pd)  \
+ GO2pd(A, B, d128_pd, d128_pd)
+
+ #define MULITGO2Cpd(A, B, I)       \
+ GO2Cpd(A, B, a128_pd, b128_pd, I)  \
+ GO2Cpd(A, B, b128_pd, c128_pd, I)  \
+ GO2Cpd(A, B, a128_pd, d128_pd, I)  \
+ GO2Cpd(A, B, b128_pd, d128_pd, I)  \
+ GO2Cpd(A, B, c128_pd, d128_pd, I)  \
+ GO2Cpd(A, B, d128_pd, d128_pd, I)
+
+ #define MULITGO2ps(A, B)       \
+ GO2ps(A, B, a128_ps, b128_ps)  \
+ GO2ps(A, B, b128_ps, c128_ps)  \
+ GO2ps(A, B, a128_ps, d128_ps)  \
+ GO2ps(A, B, b128_ps, d128_ps)  \
+ GO2ps(A, B, c128_ps, d128_ps)  \
+ GO2ps(A, B, d128_ps, d128_ps)
+
+ #define MULTIGO1ps2dq(A, B)    \
+ GO1ps2dq(A, B, a128_ps)        \
+ GO1ps2dq(A, B, b128_ps)        \
+ GO1ps2dq(A, B, c128_ps)        \
+ GO1ps2dq(A, B, d128_ps)
+
+ #define MULITGO2Cps(A, B, I)       \
+ GO2Cps(A, B, a128_ps, b128_ps, I)  \
+ GO2Cps(A, B, b128_ps, c128_ps, I)  \
+ GO2Cps(A, B, a128_ps, d128_ps, I)  \
+ GO2Cps(A, B, b128_ps, d128_ps, I)  \
+ GO2Cps(A, B, c128_ps, d128_ps, I)  \
+ GO2Cps(A, B, d128_ps, d128_ps, I)
+
+ #define MULTIGO2sd(A, B)                   \
+ GO2sd(A, B, a128_pd, a128_pd)              \
+ GO2sd(A, B, a128_pd, b128_pd)              \
+ GO2sd(A, B, a128_pd, c128_pd)              \
+ GO2sd(A, B, a128_pd, d128_pd)              \
+ GO2sd(A, B, b128_pd, d128_pd)              \
+ GO2sd(A, B, c128_pd, d128_pd)              \
+ GO2sd(A, B, a128_pd, reverse_pd(a128_pd))  \
+ GO2sd(A, B, a128_pd, reverse_pd(b128_pd))  \
+ GO2sd(A, B, a128_pd, reverse_pd(c128_pd))  \
+ GO2sd(A, B, a128_pd, reverse_pd(d128_pd))  \
+ GO2sd(A, B, b128_pd, reverse_pd(d128_pd))  \
+ GO2sd(A, B, b128_pd, reverse_pd(d128_pd))
+
+
+ GO2(shuffle, 8, pshufb, a128_8, b128_8)
+ GO2(hadd, 16, phaddw, a128_16, b128_16)
+ GO2(hadd, 32, phaddd, a128_32, b128_32)
+ GO2(hadds, 16, phaddsw, a128_16, b128_16)
+ GO2(maddubs, 16, pmaddubsw, a128_8, b128_8)
+ GO2(hsub, 16, phsubw, a128_16, b128_16)
+ GO2(sign, 8, psignb, a128_8, b128_8)
+ GO2(sign, 16, psignw, a128_16, b128_16)
+ GO2(sign, 32, psignd, a128_32, b128_32)
+ GO2(mulhrs, 16, pmulhrsw, a128_16, b128_16)
+ GO3PS(blendv, 32, a128_32, b128_32, c128_32)
+ GO2i(testz, a128_32, b128_32)
+ GO2i(testc, a128_32, b128_32)
+ GO2i(testnzc, a128_32, b128_32)
+ GO1(abs, 8, pabsb)
+ GO1(abs, 16, pabsw)
+ GO1(abs, 32, pabsd)
+ GO1(cvtepi8, 16, pmovsxbw);
+ GO1(cvtepi8, 32, pmovsxbd);
+ GO1(cvtepi8, 64, pmovsxbq);
+ GO1(cvtepi16, 32, pmovsxwd);
+ GO1(cvtepi16, 64, pmovsxwq);
+ GO1(cvtepi32, 64, pmovsxdq);
+ GO1(cvtepu8, 16, pmovzxbw);
+ GO1(cvtepu8, 32, pmovzxbd);
+ GO1(cvtepu8, 64, pmovzxbq);
+ GO1(cvtepu16, 32, pmovzxwd);
+ GO1(cvtepu16, 64, pmovzxwq);
+ GO1(cvtepu32, 64, pmovzxdq);
+ GO2(min, 32, pminsd, a128_32, b128_32)
+ GO2(max, 32, pmaxsd, a128_32, b128_32)
+ GO2C(blend, 16, pblendw, a128_16, b128_16, 0)
+ GO2C(blend, 16, pblendw, a128_16, b128_16, 0xff)
+ GO2C(blend, 16, pblendw, a128_16, b128_16, 0xaa)
+ GO2C(blend, 16, pblendw, a128_16, b128_16, 2)
+ GO2C(alignr, 8, palignr, a128_8, b128_8, 0)
+ GO2C(alignr, 8, palignr, a128_8, b128_8, 2)
+ GO2C(alignr, 8, palignr, a128_8, b128_8, 7)
+ GO2C(alignr, 8, palignr, a128_8, b128_8, 15)
+ GO2C(alignr, 8, palignr, a128_8, b128_8, 16)
+ GO2C(alignr, 8, palignr, a128_8, b128_8, 0xff)
+ GO1ipd(movemask, movmskpd, a128_64)
+ GO1pd(sqrt, psqrtpd, a128_pd)
+ GO1pd(sqrt, psqrtpd, b128_pd)
+ GO1pd(sqrt, psqrtpd, c128_pd)
+ GO1pd(sqrt, psqrtpd, d128_pd)
+ MULITGO2pd(and, andpd)
+ MULITGO2pd(andnot, andnpd)
+ MULITGO2pd(or, orpd)
+ MULITGO2pd(xor, xorpd)
+ MULITGO2pd(add, addpd)
+ MULITGO2pd(mul, mulpd)
+ MULITGO2pd(sub, subpd)
+ MULITGO2pd(min, minpd)
+ MULITGO2pd(div, divpd)
+ MULITGO2pd(max, maxpd)
+ GO2(unpacklo, 8, punpcklbw, a128_8, b128_8)
+ GO2(unpacklo, 16, punpcklwd, a128_16, b128_16)
+ GO2(unpacklo, 32, punpckldq, a128_32, b128_32)
+ GO2(packs, 16, ppacksswb, a128_16, b128_16)
+ GO2(cmpgt, 8, pcmpgtb, a128_8, b128_8)
+ GO2(cmpgt, 16, pcmpgtw, a128_16, b128_16)
+ GO2(cmpgt, 32, pcmpgtd, a128_32, b128_32)
+ GO2(packus, 16, packuswb, a128_16, b128_16)
+ GO2(unpackhi, 8, punpckhbw, a128_8, b128_8)
+ GO2(unpackhi, 16, punpckhwd, a128_16, b128_16)
+ GO2(unpackhi, 32, punpckhdq, a128_32, b128_32)
+ GO2(packs, 32, ppackssdw, a128_32, b128_32)
+ GO2(unpacklo, 64, punpcklqdq, a128_64, b128_64)
+ GO2(unpackhi, 64, punpckhqdq, a128_64, b128_64)
+ GO1C(shuffle, 32, pshufd, a128_32, 0)
+ GO1C(shuffle, 32, pshufd, a128_32, 0xff)
+ GO1C(shuffle, 32, pshufd, a128_32, 0xaa)
+ GO1C(shuffle, 32, pshufd, a128_32, 2)
+ GO1C(srli, 16, psrlw, a128_16, 0)
+ GO1C(srli, 16, psrlw, a128_16, 0xff)
+ GO1C(srli, 16, psrlw, a128_16, 0xaa)
+ GO1C(srli, 16, psrlw, a128_16, 2)
+ GO1C(srli, 32, psrld, a128_32, 0)
+ GO1C(srli, 32, psrld, a128_32, 0xff)
+ GO1C(srli, 32, psrld, a128_32, 0xaa)
+ GO1C(srli, 32, psrld, a128_32, 2)
+ GO1C(srli, 64, psrlq, a128_64, 0)
+ GO1C(srli, 64, psrlq, a128_64, 0xff)
+ GO1C(srli, 64, psrlq, a128_64, 0xaa)
+ GO1C(srli, 64, psrlq, a128_64, 2)
+ GO1C(srai, 16, psraw, a128_16, 0)
+ GO1C(srai, 16, psraw, a128_16, 0xff)
+ GO1C(srai, 16, psraw, a128_16, 0xaa)
+ GO1C(srai, 16, psraw, a128_16, 2)
+ GO1C(srai, 32, psrad, a128_32, 0)
+ GO1C(srai, 32, psrad, a128_32, 0xff)
+ GO1C(srai, 32, psrad, a128_32, 0xaa)
+ GO1C(srai, 32, psrad, a128_32, 2)
+ GO1C(slli, 16, psllw, a128_16, 0)
+ GO1C(slli, 16, psllw, a128_16, 0xff)
+ GO1C(slli, 16, psllw, a128_16, 0xaa)
+ GO1C(slli, 16, psllw, a128_16, 2)
+ GO1C(slli, 32, pslld, a128_32, 0)
+ GO1C(slli, 32, pslld, a128_32, 0xff)
+ GO1C(slli, 32, pslld, a128_32, 0xaa)
+ GO1C(slli, 32, pslld, a128_32, 2)
+ GO1C(slli, 64, psllq, a128_64, 0)
+ GO1C(slli, 64, psllq, a128_64, 0xff)
+ GO1C(slli, 64, psllq, a128_64, 0xaa)
+ GO1C(slli, 64, psllq, a128_64, 2)
+ GO2(cmpeq, 8, pcmpeqb, a128_8, b128_8)
+ GO2(cmpeq, 16, pcmpeqw, a128_16, b128_16)
+ GO2(cmpeq, 32, pcmpeqd, a128_32, b128_32)
+ MULITGO2pd(hadd, haddpd)
+ GO2(srl, 16, psrlw, a128_16, b128_16)
+ GO2(srl, 32, psrld, a128_32, b128_32)
+ GO2(srl, 64, psrlq, a128_64, b128_64)
+ GO2(add, 64, paddq, a128_64, b128_64)
+ GO2(mullo, 16, pmullw, a128_16, b128_16)
+ GO2u(subs, 8, psubusb, a128_8, b128_8)
+ GO2u(subs, 16, psubusw, a128_16, b128_16)
+ GO2u(min, 8, pminub, a128_8, b128_8)
+ GO2f(and, pand, a128_8, b128_8)
+ GO2u(adds, 8, paddusb, a128_8, b128_8)
+ GO2u(adds, 16, paddusw, a128_16, b128_16)
+ GO2u(max, 8, pmaxub, a128_8, b128_8)
+ GO2f(andnot, pandn, a128_8, b128_8)
+ GO2u(avg, 8, pavgb, a128_8, b128_8)
+ GO2(sra, 16, psraw, a128_16, b128_16)
+ GO2(sra, 32, psrad, a128_32, b128_32)
+ GO2u(avg, 16, pavgb, a128_16, b128_16)
+ GO2u(mulhi, 16, pmulhuw, a128_16, b128_16)
+ GO2(mulhi, 16, pmulhw, a128_16, b128_16)
+ GO2(subs, 8, psubsb, a128_8, b128_8)
+ GO2(subs, 16, psubsw, a128_16, b128_16)
+ GO2(min, 16, pminsw, a128_16, b128_16)
+ GO2f(or, por, a128_8, b128_8)
+ GO2(adds, 8, paddusb, a128_8, b128_8)
+ GO2(adds, 16, paddusw, a128_16, b128_16)
+ GO2(max, 16, pmaxsw, a128_16, b128_16)
+ GO2f(xor, pxor, a128_8, b128_8)
+ GO2(sll, 16, psllw, a128_16, b128_16)
+ GO2(sll, 32, pslld, a128_32, b128_32)
+ GO2(sll, 64, psllq, a128_64, b128_64)
+ GO2u(mul, 32, pmuludq, a128_32, b128_32)
+ GO2(madd, 16, pmaddwd, a128_16, b128_16)
+ GO2u(sad, 8, psadbw, a128_8, b128_8)
+ GO2(sub, 8, psubb, a128_8, b128_8)
+ GO2(sub, 16, psubw, a128_16, b128_16)
+ GO2(sub, 32, psubd, a128_32, b128_32)
+ GO2(sub, 64, psubq, a128_64, b128_64)
+ GO2(add, 8, paddb, a128_8, b128_8)
+ GO2(add, 16, paddw, a128_16, b128_16)
+ GO2(add, 32, paddd, a128_32, b128_32)
+ GO2ps(movehl, pmovhlps, a128_ps, b128_ps)
+ GO2ps(unpacklo, unpcklps, a128_ps, b128_ps)
+ GO2ps(unpackhi, unpckhps, a128_ps, b128_ps)
+ GO2ps(movelh, pmovhps, a128_ps, b128_ps)
+ GO1ps(sqrt, psqrtps, a128_ps)
+ GO1ps(sqrt, psqrtps, b128_ps)
+ GO1ps(sqrt, psqrtps, c128_ps)
+ GO1ps(sqrt, psqrtps, d128_ps)
+ //GO1ps(rsqrt, prsqrtps, a128_ps)  // difference in precision
+ //GO1ps(rsqrt, prsqrtps, b128_ps)  // same
+ //GO1ps(rsqrt, prsqrtps, c128_ps)  // same
+ //GO1ps(rsqrt, prsqrtps, d128_ps)  // difference in the handling of NAN, (-)0, and INF in Dynarec
+ //GO1ps(rcp, prcpps, a128_ps)      // deference in precision
+ //GO1ps(rcp, prcpps, b128_ps)      // deference in precision
+ //GO1ps(rcp, prcpps, c128_ps)      // deference in precision
+ GO1ps(rcp, prcpps, d128_ps)
+ MULITGO2ps(and, andps)
+ MULITGO2ps(andnot, andnps)
+ MULITGO2ps(or, orps)
+ MULITGO2ps(xor, xorps)
+ MULITGO2ps(add, addps)
+ MULITGO2ps(mul, mulps)
+ MULITGO2ps(sub, subps)
+ MULITGO2ps(min, minps)
+ MULITGO2ps(div, divps)
+ MULITGO2ps(max, maxps)
+ MULITGO2Cps(cmp, cmpps, 0)
+ MULITGO2Cps(cmp, cmpps, 1)
+ MULITGO2Cps(cmp, cmpps, 2)
+ MULITGO2Cps(cmp, cmpps, 3)
+ MULITGO2Cps(cmp, cmpps, 4)
+ MULITGO2Cps(cmp, cmpps, 5)
+ MULITGO2Cps(cmp, cmpps, 6)
+ MULITGO2Cps(cmp, cmpps, 7)
+ MULITGO2Cps(cmp, cmpps, 8)
+ MULITGO2Cps(cmp, cmpps, 9)
+ MULITGO2Cps(cmp, cmpps, 10)
+ MULITGO2Cps(cmp, cmpps, 11)
+ MULITGO2Cps(cmp, cmpps, 12)
+ MULITGO2Cps(cmp, cmpps, 13)
+ MULITGO2Cps(cmp, cmpps, 14)
+ MULITGO2Cps(cmp, cmpps, 15)
+ MULITGO2Cps(cmp, cmpps, 16)
+ MULITGO2Cps(cmp, cmpps, 17)
+ MULITGO2Cps(cmp, cmpps, 18)
+ MULITGO2Cps(cmp, cmpps, 19)
+ MULITGO2Cps(cmp, cmpps, 20)
+ MULITGO2Cps(cmp, cmpps, 21)
+ MULITGO2Cps(cmp, cmpps, 22)
+ MULITGO2Cps(cmp, cmpps, 23)
+ MULITGO2Cps(cmp, cmpps, 24)
+ MULITGO2Cps(cmp, cmpps, 25)
+ MULITGO2Cps(cmp, cmpps, 26)
+ MULITGO2Cps(cmp, cmpps, 27)
+ MULITGO2Cps(cmp, cmpps, 28)
+ MULITGO2Cps(cmp, cmpps, 29)
+ MULITGO2Cps(cmp, cmpps, 30)
+ MULITGO2Cps(cmp, cmpps, 31)
+ MULITGO2Cps(shuffle, shufps, 0)
+ MULITGO2Cps(shuffle, shufps, 0x15)
+ MULITGO2Cps(shuffle, shufps, 0xff)
+ MULITGO2Cps(shuffle, shufps, 0x02)
+ MULTIGO2sd(sqrt, sqrtsd)
+ MULTIGO2sd(add, addsd)
+ MULTIGO2sd(mul, mulsd)
+ MULTIGO2sd(sub, subsd)
+ MULTIGO2sd(min, minsd)
+ MULTIGO2sd(div, divsd)
+ MULTIGO2sd(max, maxsd)
+ MULTIGO1ps2dq(cvtps, cvtps2dq)
+ MULITGO2Cps(dp, dpps, 0xff)
+ MULITGO2Cps(dp, dpps, 0x3f)
+ MULITGO2Cps(dp, dpps, 0xf3)
+ MULITGO2Cps(dp, dpps, 0x53)
+
+ return 0;
+}
+