diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-11-07 15:06:55 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-11-07 15:06:55 +0100 |
| commit | f5c9439f9110fcb4fde3c7db9f68be7c107b17f6 (patch) | |
| tree | f0094dbbd242c7bc4526d61c2a1d48717f00324e | |
| parent | f73e4193f032713529e26c4537e5fac44147c040 (diff) | |
| download | box64-f5c9439f9110fcb4fde3c7db9f68be7c107b17f6.tar.gz box64-f5c9439f9110fcb4fde3c7db9f68be7c107b17f6.zip | |
Improved CMPSS/CMPSD opcodes, improved test17 ([DYNAREC] too, and improved MINSS/MAXSS/MINSD/MAXSD too)
| -rwxr-xr-x | src/dynarec/dynarec_arm64_f20f.c | 30 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_f30f.c | 28 | ||||
| -rw-r--r-- | src/emu/x64runf20f.c | 6 | ||||
| -rw-r--r-- | src/emu/x64runf30f.c | 6 | ||||
| -rw-r--r-- | tests/ref17.txt | 166 | ||||
| -rwxr-xr-x | tests/test17 | bin | 19976 -> 29264 bytes | |||
| -rw-r--r-- | tests/test17.c | 204 |
7 files changed, 374 insertions, 66 deletions
diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c index e9a75a7d..e179b2ac 100755 --- a/src/dynarec/dynarec_arm64_f20f.c +++ b/src/dynarec/dynarec_arm64_f20f.c @@ -49,7 +49,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n uint8_t gd, ed; uint8_t wback; uint8_t u8; - uint64_t u64; + uint64_t u64, j64; int v0, v1; int q0; int d0, d1; @@ -214,9 +214,15 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v0 = sse_get_reg(dyn, ninst, x1, gd); GETEX(v1, 0); // MINSD: if any input is NaN, or Ex[0]<Gx[0], copy Ex[0] -> Gx[0] + #if 0 d0 = fpu_get_scratch(dyn); FMINNMD(d0, v0, v1); // NaN handling may be slightly different, is that a problem? VMOVeD(v0, 0, d0, 0); // to not erase uper part + #else + FCMPD(v0, v1); + B_NEXT(cLS); //Less than or equal + VMOVeD(v0, 0, v1, 0); // to not erase uper part + #endif break; case 0x5E: INST_NAME("DIVSD Gx, Ex"); @@ -234,9 +240,15 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v0 = sse_get_reg(dyn, ninst, x1, gd); GETEX(v1, 0); // MAXSD: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0] + #if 0 d0 = fpu_get_scratch(dyn); FMAXNMD(d0, v0, v1); // NaN handling may be slightly different, is that a problem? VMOVeD(v0, 0, d0, 0); // to not erase uper part + #else + FCMPD(v0, v1); + B_NEXT(cGE); //Greater than or equal + VMOVeD(v0, 0, v1, 0); // to not erase uper part + #endif break; case 0x70: @@ -282,19 +294,15 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(v0); GETEX(v1, 1); u8 = F8; - if((u8&7)==6){ - FCMPD(v1, v0); - } else { - FCMPD(v0, v1); - } + FCMPD(v0, v1); switch(u8&7) { - case 0: CSETMx(x2, cEQ); CSELx(x2, xZR, x2, cVS); break; // Equal - case 1: CSETMx(x2, cMI); CSELx(x2, xZR, x2, cVS); break; // Less than - case 2: CSETMx(x2, cLE); CSELx(x2, xZR, x2, cVS); break; // Less or equal + case 0: CSETMx(x2, cEQ); break; // Equal + case 1: CSETMx(x2, cCC); break; // Less than + case 2: CSETMx(x2, cLS); break; // Less or equal case 3: CSETMx(x2, cVS); break; // NaN - case 4: CSETMx(x2, cNE); break; // Not Equal + case 4: CSETMx(x2, cNE); break; // Not Equal or unordered case 5: CSETMx(x2, cCS); break; // Greater or equal or unordered - case 6: CSETMx(x2, cLT); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + case 6: CSETMx(x2, cHI); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) case 7: CSETMx(x2, cVC); break; // not NaN } VMOVQDfrom(v0, 0, x2); diff --git a/src/dynarec/dynarec_arm64_f30f.c b/src/dynarec/dynarec_arm64_f30f.c index 8c87b790..d47a99c5 100755 --- a/src/dynarec/dynarec_arm64_f30f.c +++ b/src/dynarec/dynarec_arm64_f30f.c @@ -259,9 +259,15 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(v0); GETEX(v1, 0); // MINSS: if any input is NaN, or Ex[0]<Gx[0], copy Ex[0] -> Gx[0] + #if 0 d0 = fpu_get_scratch(dyn); FMINNMS(d0, v0, v1); // NaN handling may be slightly different, is that a problem? VMOVeS(v0, 0, d0, 0); // to not erase uper part + #else + FCMPS(v0, v1); + B_NEXT(cLS); //Less than or equal + VMOVeS(v0, 0, v1, 0); // to not erase uper part + #endif break; case 0x5E: INST_NAME("DIVSS Gx, Ex"); @@ -278,9 +284,15 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(v0); GETEX(v1, 0); // MAXSS: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0] + #if 0 d0 = fpu_get_scratch(dyn); FMAXNMS(d0, v0, v1); // NaN handling may be slightly different, is that a problem? VMOVeS(v0, 0, d0, 0); // to not erase uper part + #else + FCMPS(v0, v1); + B_NEXT(cGE); //Greater than or equal + VMOVeS(v0, 0, v1, 0); // to not erase uper part + #endif break; case 0x6F: @@ -384,19 +396,15 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(v0); GETEX(v1, 1); u8 = F8; - if((u8&7)==6){ - FCMPS(v1, v0); - } else { - FCMPS(v0, v1); - } + FCMPS(v0, v1); switch(u8&7) { - case 0: CSETMw(x2, cEQ); CSELw(x2, xZR, x2, cVS); break; // Equal - case 1: CSETMw(x2, cMI); CSELw(x2, xZR, x2, cVS); break; // Less than - case 2: CSETMw(x2, cLE); CSELw(x2, xZR, x2, cVS); break; // Less or equal + case 0: CSETMw(x2, cEQ); break; // Equal + case 1: CSETMw(x2, cCC); break; // Less than + case 2: CSETMw(x2, cLS); break; // Less or equal case 3: CSETMw(x2, cVS); break; // NaN - case 4: CSETMw(x2, cNE); break; // Not Equal + case 4: CSETMw(x2, cNE); break; // Not Equal or unordered case 5: CSETMw(x2, cCS); break; // Greater or equal or unordered - case 6: CSETMw(x2, cLT); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + case 6: CSETMw(x2, cHI); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) case 7: CSETMw(x2, cVC); break; // not NaN } VMOVQSfrom(v0, 0, x2); diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c index 7bf3c99e..eff4ca5f 100644 --- a/src/emu/x64runf20f.c +++ b/src/emu/x64runf20f.c @@ -218,10 +218,10 @@ int RunF20F(x64emu_t *emu, rex_t rex) tmp8s = 0; switch(tmp8u&7) { case 0: tmp8s=(GX->d[0] == EX->d[0]); break; - case 1: tmp8s=isless(GX->d[0], EX->d[0]); break; - case 2: tmp8s=islessequal(GX->d[0], EX->d[0]); break; + case 1: tmp8s=isless(GX->d[0], EX->d[0]) && !(isnan(GX->d[0]) || isnan(EX->d[0])); break; + case 2: tmp8s=islessequal(GX->d[0], EX->d[0]) && !(isnan(GX->d[0]) || isnan(EX->d[0])); break; case 3: tmp8s=isnan(GX->d[0]) || isnan(EX->d[0]); break; - case 4: tmp8s=(GX->d[0] != EX->d[0]); break; + case 4: tmp8s=isnan(GX->d[0]) || isnan(EX->d[0]) || (GX->d[0] != EX->d[0]); break; case 5: tmp8s=isnan(GX->d[0]) || isnan(EX->d[0]) || isgreaterequal(GX->d[0], EX->d[0]); break; case 6: tmp8s=isnan(GX->d[0]) || isnan(EX->d[0]) || isgreater(GX->d[0], EX->d[0]); break; case 7: tmp8s=!isnan(GX->d[0]) && !isnan(EX->d[0]); break; diff --git a/src/emu/x64runf30f.c b/src/emu/x64runf30f.c index 1960238e..7cc7add5 100644 --- a/src/emu/x64runf30f.c +++ b/src/emu/x64runf30f.c @@ -318,10 +318,10 @@ int RunF30F(x64emu_t *emu, rex_t rex) tmp8s = 0; switch(tmp8u&7) { case 0: tmp8s=(GX->f[0] == EX->f[0]); break; - case 1: tmp8s=isless(GX->f[0], EX->f[0]); break; - case 2: tmp8s=islessequal(GX->f[0], EX->f[0]); break; + case 1: tmp8s=isless(GX->f[0], EX->f[0]) && !(isnan(GX->f[0]) || isnan(EX->f[0])); break; + case 2: tmp8s=islessequal(GX->f[0], EX->f[0]) && !(isnan(GX->f[0]) || isnan(EX->f[0])); break; case 3: tmp8s=isnan(GX->f[0]) || isnan(EX->f[0]); break; - case 4: tmp8s=(GX->f[0] != EX->f[0]); break; + case 4: tmp8s=isnan(GX->f[0]) || isnan(EX->f[0]) || (GX->f[0] != EX->f[0]); break; case 5: tmp8s=isnan(GX->f[0]) || isnan(EX->f[0]) || isgreaterequal(GX->f[0], EX->f[0]); break; case 6: tmp8s=isnan(GX->f[0]) || isnan(EX->f[0]) || isgreater(GX->f[0], EX->f[0]); break; case 7: tmp8s=!isnan(GX->f[0]) && !isnan(EX->f[0]); break; diff --git a/tests/ref17.txt b/tests/ref17.txt index 349d50d0..9715adf0 100644 --- a/tests/ref17.txt +++ b/tests/ref17.txt @@ -1,12 +1,154 @@ -div 1, 1 => 1 / 0 -div 10, 5 => 2 / 0 -div 10, 3 => 3 / 1 -div 1, 18446744073709551615 => 0 / 1 -div 10, 18446744073709551613 => 0 / 10 -div 18446744073709551606, 18446744073709551613 => 0 / 18446744073709551606 -idiv 1, 1 => 1 / 0 -idiv 10, 5 => 2 / 0 -idiv 10, 3 => 3 / 1 -idiv 1, -1 => -1 / 0 -idiv 10, -3 => -3 / 1 -idiv -10, -3 => 3 / -1 +ucomiss 1.000000, 2.000000 => 0x202 +ucomiss 2.000000, 1.000000 => 0x203 +ucomiss 1.000000, inf => 0x202 +ucomiss inf, 1.000000 => 0x203 +ucomiss 1.000000, -inf => 0x203 +ucomiss -inf, 1.000000 => 0x202 +ucomiss 1.000000, nan => 0x247 +ucomiss nan, 1.000000 => 0x247 +ucomiss 1.000000, 1.000000 => 0x242 +ucomiss 1.000000, 1.000000 => 0x242 +ucomiss inf, inf => 0x242 +ucomiss -inf, inf => 0x202 +ucomiss inf, -inf => 0x203 +ucomiss nan, nan => 0x247 +minss 1, 2 => 1 +minss 2, 1 => 1 +minss -inf, 2 => -inf +minss 2, -inf => -inf +minss inf, 2 => 2 +minss 2, inf => 2 +minss nan, 2 => 2 +minss 2, nan => nan +minss nan, 3.40282e+38 => 3.40282e+38 +minss 3.40282e+38, nan => nan +minss -inf, 3.40282e+38 => -inf +minss 3.40282e+38, -inf => -inf +minss inf, 3.40282e+38 => 3.40282e+38 +minss 3.40282e+38, inf => 3.40282e+38 +maxss 1, 2 => 2 +maxss 2, 1 => 2 +maxss -inf, 2 => 2 +maxss 2, -inf => 2 +maxss inf, 2 => inf +maxss 2, inf => inf +maxss nan, 2 => 2 +maxss 2, nan => nan +maxss nan, 3.40282e+38 => 3.40282e+38 +maxss 3.40282e+38, nan => nan +maxss -inf, 3.40282e+38 => 3.40282e+38 +maxss 3.40282e+38, -inf => 3.40282e+38 +maxss inf, 3.40282e+38 => inf +maxss 3.40282e+38, inf => inf +cmpss 0 1.000000, 2.000000 => 0x0 +cmpss 0 2.000000, 1.000000 => 0x0 +cmpss 0 1.000000, inf => 0x0 +cmpss 0 inf, 1.000000 => 0x0 +cmpss 0 1.000000, -inf => 0x0 +cmpss 0 -inf, 1.000000 => 0x0 +cmpss 0 1.000000, nan => 0x0 +cmpss 0 nan, 1.000000 => 0x0 +cmpss 0 1.000000, 1.000000 => 0xffffffff +cmpss 0 1.000000, 1.000000 => 0xffffffff +cmpss 0 inf, inf => 0xffffffff +cmpss 0 -inf, inf => 0x0 +cmpss 0 inf, -inf => 0x0 +cmpss 0 nan, nan => 0x0 +cmpss 1 1.000000, 2.000000 => 0xffffffff +cmpss 1 2.000000, 1.000000 => 0x0 +cmpss 1 1.000000, inf => 0xffffffff +cmpss 1 inf, 1.000000 => 0x0 +cmpss 1 1.000000, -inf => 0x0 +cmpss 1 -inf, 1.000000 => 0xffffffff +cmpss 1 1.000000, nan => 0x0 +cmpss 1 nan, 1.000000 => 0x0 +cmpss 1 1.000000, 1.000000 => 0x0 +cmpss 1 1.000000, 1.000000 => 0x0 +cmpss 1 inf, inf => 0x0 +cmpss 1 -inf, inf => 0xffffffff +cmpss 1 inf, -inf => 0x0 +cmpss 1 nan, nan => 0x0 +cmpss 2 1.000000, 2.000000 => 0xffffffff +cmpss 2 2.000000, 1.000000 => 0x0 +cmpss 2 1.000000, inf => 0xffffffff +cmpss 2 inf, 1.000000 => 0x0 +cmpss 2 1.000000, -inf => 0x0 +cmpss 2 -inf, 1.000000 => 0xffffffff +cmpss 2 1.000000, nan => 0x0 +cmpss 2 nan, 1.000000 => 0x0 +cmpss 2 1.000000, 1.000000 => 0xffffffff +cmpss 2 1.000000, 1.000000 => 0xffffffff +cmpss 2 inf, inf => 0xffffffff +cmpss 2 -inf, inf => 0xffffffff +cmpss 2 inf, -inf => 0x0 +cmpss 2 nan, nan => 0x0 +cmpss 3 1.000000, 2.000000 => 0x0 +cmpss 3 2.000000, 1.000000 => 0x0 +cmpss 3 1.000000, inf => 0x0 +cmpss 3 inf, 1.000000 => 0x0 +cmpss 3 1.000000, -inf => 0x0 +cmpss 3 -inf, 1.000000 => 0x0 +cmpss 3 1.000000, nan => 0xffffffff +cmpss 3 nan, 1.000000 => 0xffffffff +cmpss 3 1.000000, 1.000000 => 0x0 +cmpss 3 1.000000, 1.000000 => 0x0 +cmpss 3 inf, inf => 0x0 +cmpss 3 -inf, inf => 0x0 +cmpss 3 inf, -inf => 0x0 +cmpss 3 nan, nan => 0xffffffff +cmpss 4 1.000000, 2.000000 => 0xffffffff +cmpss 4 2.000000, 1.000000 => 0xffffffff +cmpss 4 1.000000, inf => 0xffffffff +cmpss 4 inf, 1.000000 => 0xffffffff +cmpss 4 1.000000, -inf => 0xffffffff +cmpss 4 -inf, 1.000000 => 0xffffffff +cmpss 4 1.000000, nan => 0xffffffff +cmpss 4 nan, 1.000000 => 0xffffffff +cmpss 4 1.000000, 1.000000 => 0x0 +cmpss 4 1.000000, 1.000000 => 0x0 +cmpss 4 inf, inf => 0x0 +cmpss 4 -inf, inf => 0xffffffff +cmpss 4 inf, -inf => 0xffffffff +cmpss 4 nan, nan => 0xffffffff +cmpss 5 1.000000, 2.000000 => 0x0 +cmpss 5 2.000000, 1.000000 => 0xffffffff +cmpss 5 1.000000, inf => 0x0 +cmpss 5 inf, 1.000000 => 0xffffffff +cmpss 5 1.000000, -inf => 0xffffffff +cmpss 5 -inf, 1.000000 => 0x0 +cmpss 5 1.000000, nan => 0xffffffff +cmpss 5 nan, 1.000000 => 0xffffffff +cmpss 5 1.000000, 1.000000 => 0xffffffff +cmpss 5 1.000000, 1.000000 => 0xffffffff +cmpss 5 inf, inf => 0xffffffff +cmpss 5 -inf, inf => 0x0 +cmpss 5 inf, -inf => 0xffffffff +cmpss 5 nan, nan => 0xffffffff +cmpss 6 1.000000, 2.000000 => 0x0 +cmpss 6 2.000000, 1.000000 => 0xffffffff +cmpss 6 1.000000, inf => 0x0 +cmpss 6 inf, 1.000000 => 0xffffffff +cmpss 6 1.000000, -inf => 0xffffffff +cmpss 6 -inf, 1.000000 => 0x0 +cmpss 6 1.000000, nan => 0xffffffff +cmpss 6 nan, 1.000000 => 0xffffffff +cmpss 6 1.000000, 1.000000 => 0x0 +cmpss 6 1.000000, 1.000000 => 0x0 +cmpss 6 inf, inf => 0x0 +cmpss 6 -inf, inf => 0x0 +cmpss 6 inf, -inf => 0xffffffff +cmpss 6 nan, nan => 0xffffffff +cmpss 7 1.000000, 2.000000 => 0xffffffff +cmpss 7 2.000000, 1.000000 => 0xffffffff +cmpss 7 1.000000, inf => 0xffffffff +cmpss 7 inf, 1.000000 => 0xffffffff +cmpss 7 1.000000, -inf => 0xffffffff +cmpss 7 -inf, 1.000000 => 0xffffffff +cmpss 7 1.000000, nan => 0x0 +cmpss 7 nan, 1.000000 => 0x0 +cmpss 7 1.000000, 1.000000 => 0xffffffff +cmpss 7 1.000000, 1.000000 => 0xffffffff +cmpss 7 inf, inf => 0xffffffff +cmpss 7 -inf, inf => 0xffffffff +cmpss 7 inf, -inf => 0xffffffff +cmpss 7 nan, nan => 0x0 diff --git a/tests/test17 b/tests/test17 index bcfd74f8..e56e6a46 100755 --- a/tests/test17 +++ b/tests/test17 Binary files differdiff --git a/tests/test17.c b/tests/test17.c index 799c1368..735e861d 100644 --- a/tests/test17.c +++ b/tests/test17.c @@ -6,45 +6,195 @@ #include <math.h> #if defined(__x86_64__) -uint64_t _div_(uint64_t a, uint64_t b, uint64_t *r) +uint64_t _ucomiss_(float a, float b) { - uint64_t ret, rem; + uint64_t ret; asm volatile ( - "xor %%rdx, %%rdx\n" - "div %%rcx\n" - "mov %%rdx, %%rbx\n" - :"=a" (ret), "=b" (rem):"a" (a), "c" (b):"rdx","cc"); - *r = rem; + "ucomiss %%xmm0, %%xmm1\n" + "pushf\n" + "pop %%rax" + :"=a" (ret)::"xmm0","xmm1","cc"); return ret; } -uint64_t _idiv_(uint64_t a, uint64_t b, uint64_t *r) +uint64_t _minss_(float a, float b) { - uint64_t ret, rem; + uint64_t ret; asm volatile ( - "cqo\n" - "idiv %%rcx\n" - "mov %%rdx, %%rbx\n" - :"=a" (ret), "=b" (rem):"a" (a), "c" (b):"rdx","cc"); - *r = rem; + "minss %%xmm1, %%xmm0\n" + "movd %%xmm0, %%eax" + :"=a" (ret)::"xmm0","xmm1","cc"); return ret; } +uint64_t _maxss_(float a, float b) +{ + uint64_t ret; + asm volatile ( + "maxss %%xmm1, %%xmm0\n" + "movd %%xmm0, %%eax" + :"=a" (ret)::"xmm0","xmm1","cc"); + return ret; +} +#define CMPSS(A) \ +uint64_t _cmpss_##A(float a, float b) \ +{ \ + uint64_t ret; \ + asm volatile ( \ + "cmpss $" #A ", %%xmm1, %%xmm0\n" \ + "movd %%xmm0, %%eax" \ + :"=a" (ret)::"xmm0","xmm1","cc"); \ + return ret; \ +} #else +uint64_t _ucomiss_(float a, float b) +{ + uint32_t ret; + asm volatile ( + "movss %1, %%xmm0\n" + "movss %2, %%xmm1\n" + "ucomiss %%xmm0, %%xmm1\n" + "pushf\n" + "pop %%eax" + :"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc"); + return ret; +} +uint64_t _minss_(float a, float b) +{ + uint32_t ret; + asm volatile ( + "movss %1, %%xmm0\n" + "movss %2, %%xmm1\n" + "minss %%xmm1, %%xmm0\n" + "movd %%xmm0, %%eax" + :"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc"); + return ret; +} +uint64_t _maxss_(float a, float b) +{ + uint32_t ret; + asm volatile ( + "movss %1, %%xmm0\n" + "movss %2, %%xmm1\n" + "maxss %%xmm1, %%xmm0\n" + "movd %%xmm0, %%eax" + :"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc"); + return ret; +} +#define CMPSS(A) \ +uint64_t _cmpss_##A(float a, float b) \ +{ \ + uint32_t ret; \ + asm volatile ( \ + "movss %1, %%xmm0\n" \ + "movss %2, %%xmm1\n" \ + "cmpss $" #A ", %%xmm1, %%xmm0\n" \ + "movd %%xmm0, %%eax" \ + :"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc"); \ + return ret; \ +} #endif +CMPSS(0) +CMPSS(1) +CMPSS(2) +CMPSS(3) +CMPSS(4) +CMPSS(5) +CMPSS(6) +CMPSS(7) int main(int argc, const char** argv) { - uint64_t datas[][2] = {{1,1},{10,5},{10,3},{1, (uint64_t)-1}, {10, (uint64_t)-3}, {(uint64_t)-10, (uint64_t)-3}}; + float a, b; + uint64_t flags; + uint32_t maxf = 0x7f7fffff; + uint32_t minf = 0xff7fffff; + uint32_t r; + +#define GO1(A, N) \ +a = 1.0f; b = 2.0f; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%lx\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%lx\n", b, a, flags); \ +b = INFINITY; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%lx\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%lx\n", b, a, flags); \ +b = -INFINITY; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%lx\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%lx\n", b, a, flags); \ +b = NAN; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%lx\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%lx\n", b, a, flags); \ +b = a; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%lx\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%lx\n", b, a, flags); \ +a = b = INFINITY; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%lx\n", a, b, flags); \ +a = -INFINITY; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%lx\n", a, b, flags); \ +flags = A(b, a); \ +printf(N " %f, %f => 0x%lx\n", b, a, flags); \ +a = b = NAN; \ +flags = A(a, b); \ +printf(N " %f, %f => 0x%lx\n", a, b, flags); + +#define GO2(A, N) \ +a = 1.0f; b = 2.0f; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = -INFINITY; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = +INFINITY; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = NAN; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +b = *(float*)&maxf; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = -INFINITY; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); \ +a = +INFINITY; \ +r = A(a, b); \ +printf(N " %g, %g => %g\n", a, b, *(float*)&r); \ +r = A(b, a); \ +printf(N " %g, %g => %g\n", b, a, *(float*)&r); - int sz = sizeof(datas)/sizeof(datas[0]); - for(int i=0; i<sz; ++i) { - uint64_t rem = 0; - uint64_t d = _div_(datas[i][0], datas[i][1], &rem); - printf("div %llu, %llu => %llu / %llu\n", datas[i][0], datas[i][1], d, rem); - } - for(int i=0; i<sz; ++i) { - uint64_t rem = 0; - uint64_t d = _idiv_(datas[i][0], datas[i][1], &rem); - printf("idiv %lld, %lld => %lld / %lld\n", datas[i][0], datas[i][1], d, rem); - } - return 0; + GO1(_ucomiss_, "ucomiss") + GO2(_minss_, "minss") + GO2(_maxss_, "maxss") + GO1(_cmpss_0, "cmpss 0") + GO1(_cmpss_1, "cmpss 1") + GO1(_cmpss_2, "cmpss 2") + GO1(_cmpss_3, "cmpss 3") + GO1(_cmpss_4, "cmpss 4") + GO1(_cmpss_5, "cmpss 5") + GO1(_cmpss_6, "cmpss 6") + GO1(_cmpss_7, "cmpss 7") + + return 0; } |