diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-03-30 10:53:51 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-03-30 10:53:51 +0200 |
| commit | a9a82e581a2cff17368fc369f1d6517e4845a2e6 (patch) | |
| tree | 26e554a528edebbadfac1bb828495a5579d25ba9 | |
| parent | 0a8bbe93a52ce4d454e0eb8ee6c71384e0b5fb94 (diff) | |
| download | box64-a9a82e581a2cff17368fc369f1d6517e4845a2e6.tar.gz box64-a9a82e581a2cff17368fc369f1d6517e4845a2e6.zip | |
Improved and enhanced test17 (SSE testing)
| -rw-r--r-- | src/emu/x64run660f.c | 6 | ||||
| -rw-r--r-- | tests/ref17.txt | 28 | ||||
| -rwxr-xr-x | tests/test17 | bin | 29264 -> 81488 bytes | |||
| -rw-r--r-- | tests/test17.c | 205 |
4 files changed, 144 insertions, 95 deletions
diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c index 0f7f252b..5d4463bd 100644 --- a/src/emu/x64run660f.c +++ b/src/emu/x64run660f.c @@ -367,7 +367,7 @@ int Run660F(x64emu_t *emu, rex_t rex) GETEX(0); GETGX; for (int i=0; i<16; ++i) { - GX->sb[i] = abs(EX->sb[i]); + GX->ub[i] = abs(EX->sb[i]); } break; case 0x1D: /* PABSW Gx, Ex */ @@ -375,7 +375,7 @@ int Run660F(x64emu_t *emu, rex_t rex) GETEX(0); GETGX; for (int i=0; i<8; ++i) { - GX->sw[i] = abs(EX->sw[i]); + GX->uw[i] = abs(EX->sw[i]); } break; case 0x1E: /* PABSD Gx, Ex */ @@ -383,7 +383,7 @@ int Run660F(x64emu_t *emu, rex_t rex) GETEX(0); GETGX; for (int i=0; i<4; ++i) { - GX->sd[i] = abs(EX->sd[i]); + GX->ud[i] = abs(EX->sd[i]); } break; diff --git a/tests/ref17.txt b/tests/ref17.txt index 9715adf0..2bae8c46 100644 --- a/tests/ref17.txt +++ b/tests/ref17.txt @@ -1,17 +1,17 @@ -ucomiss 1.000000, 2.000000 => 0x202 -ucomiss 2.000000, 1.000000 => 0x203 -ucomiss 1.000000, inf => 0x202 -ucomiss inf, 1.000000 => 0x203 -ucomiss 1.000000, -inf => 0x203 -ucomiss -inf, 1.000000 => 0x202 -ucomiss 1.000000, nan => 0x247 -ucomiss nan, 1.000000 => 0x247 +ucomiss 1.000000, 2.000000 => 0x203 +ucomiss 2.000000, 1.000000 => 0x202 +ucomiss 1.000000, inf => 0x203 +ucomiss inf, 1.000000 => 0x202 +ucomiss 1.000000, -inf => 0x202 +ucomiss -inf, 1.000000 => 0x203 +ucomiss 1.000000, nan => 0x203 +ucomiss nan, 1.000000 => 0x203 ucomiss 1.000000, 1.000000 => 0x242 ucomiss 1.000000, 1.000000 => 0x242 ucomiss inf, inf => 0x242 -ucomiss -inf, inf => 0x202 -ucomiss inf, -inf => 0x203 -ucomiss nan, nan => 0x247 +ucomiss -inf, inf => 0x203 +ucomiss inf, -inf => 0x202 +ucomiss nan, nan => 0x203 minss 1, 2 => 1 minss 2, 1 => 1 minss -inf, 2 => -inf @@ -152,3 +152,9 @@ cmpss 7 inf, inf => 0xffffffff cmpss 7 -inf, inf => 0xffffffff cmpss 7 inf, -inf => 0xffffffff cmpss 7 nan, nan => 0x0 +pabsb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 ) = 0x1 0x80 0x7f 0x0 0x1 0x2 0x3 0x7f 0x2 0x7c 0x72 0x52 0x5b 0x0 0x40 0x32 +pabsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x1 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x7fff +pabsd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0x1 0x80000000 0x7fffffff 0x0 +pshuffleb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x2 0x2 0xff 0x0 0x0 0xff 0xfe 0x81 0x0 0x3 0x72 0x32 0xff 0x80 +phaddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x7fff 0x3 0x8004 0xffff 0xfffe 0x9050 0x7fff +phaddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x7fffffff 0x7fffffff 0x80000001 0x3 diff --git a/tests/test17 b/tests/test17 index e56e6a46..0d0b89e4 100755 --- a/tests/test17 +++ b/tests/test17 Binary files differdiff --git a/tests/test17.c b/tests/test17.c index 735e861d..e0c07685 100644 --- a/tests/test17.c +++ b/tests/test17.c @@ -4,102 +4,122 @@ #include <stdlib.h> #include <stdint.h> #include <math.h> +#include <pmmintrin.h> +#include <immintrin.h> + +typedef unsigned char u8x16 __attribute__ ((vector_size (16))); +typedef unsigned short u16x8 __attribute__ ((vector_size (16))); +typedef unsigned int u32x4 __attribute__ ((vector_size (16))); +typedef unsigned long int u64x2 __attribute__ ((vector_size (16))); +typedef float f32x4 __attribute__ ((vector_size (16))); +typedef double d64x2 __attribute__ ((vector_size (16))); + +typedef union { + __m128i mm; + __m128 mf; + __m128d md; + u8x16 u8; + u16x8 u16; + u32x4 u32; + u64x2 u64; + f32x4 f32; + d64x2 d64; +} v128; -#if defined(__x86_64__) uint64_t _ucomiss_(float a, float b) { - uint64_t ret; - asm volatile ( - "ucomiss %%xmm0, %%xmm1\n" - "pushf\n" - "pop %%rax" - :"=a" (ret)::"xmm0","xmm1","cc"); + uint64_t ret = 0x202; + v128 va, vb; + va.f32[0] = a; + vb.f32[0] = b; + if(_mm_ucomigt_ss(va.mf, vb.mf)) + ret |= 0x000; + else if(_mm_ucomilt_ss(va.mf, vb.mf)) + ret |= 0x001; + else if(_mm_ucomieq_ss(va.mf, vb.mf)) + ret |= 0x040; + else + ret |= 0x045; return ret; } + uint64_t _minss_(float a, float b) { - uint64_t ret; - asm volatile ( - "minss %%xmm1, %%xmm0\n" - "movd %%xmm0, %%eax" - :"=a" (ret)::"xmm0","xmm1","cc"); - return ret; + v128 va, vb, ret; + va.f32[0] = a; + vb.f32[0] = b; + ret.mf = _mm_min_ss(va.mf, vb.mf); + return ret.u64[0]; } uint64_t _maxss_(float a, float b) { - uint64_t ret; - asm volatile ( - "maxss %%xmm1, %%xmm0\n" - "movd %%xmm0, %%eax" - :"=a" (ret)::"xmm0","xmm1","cc"); - return ret; + v128 va, vb, ret; + va.f32[0] = a; + vb.f32[0] = b; + ret.mf = _mm_max_ss(va.mf, vb.mf); + return ret.u64[0]; } -#define CMPSS(A) \ -uint64_t _cmpss_##A(float a, float b) \ -{ \ - uint64_t ret; \ - asm volatile ( \ - "cmpss $" #A ", %%xmm1, %%xmm0\n" \ - "movd %%xmm0, %%eax" \ - :"=a" (ret)::"xmm0","xmm1","cc"); \ - return ret; \ + +#define CMPSS(A, B) \ +uint64_t _cmpss_##A(float a, float b) \ +{ \ + v128 va, vb, ret; \ + va.f32[0] = a; \ + vb.f32[0] = b; \ + ret.mf = _mm_cmp##B##_ss(va.mf, vb.mf); \ + return ret.u64[0]; \ } -#else -uint64_t _ucomiss_(float a, float b) -{ - uint32_t ret; - asm volatile ( - "movss %1, %%xmm0\n" - "movss %2, %%xmm1\n" - "ucomiss %%xmm0, %%xmm1\n" - "pushf\n" - "pop %%eax" - :"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc"); - return ret; +CMPSS(0, eq) +CMPSS(1, lt) +CMPSS(2, le) +CMPSS(3, unord) +CMPSS(4, neq) +CMPSS(5, nlt) +CMPSS(6, nle) +CMPSS(7, ord) +#undef CMPSS + +const v128 a128_8 = {.u8 = { + 0xff, 0x80, 0x7f, 0x00, 0x01, 0x02, 0x03, 0x81, + 0xfe, 0x84, 0x72, 0x52, 0xa5, 0x00, 0xc0, 0x32 +}}; +const v128 a128_16 = {.u16 = { + 0xffff, 0x8000, 0x7fff, 0x0000, 0x0001, 0x0002, 0x0003, 0x8001 +}}; +const v128 a128_32 = {.u32 = { + 0xffffffff, 0x80000000, 0x7fffffff, 0x00000000 +}}; +const v128 a128_64 = {.u64 = { + 0xffffffffffffffffLL, 0x8000000000000000LL +}}; + +const v128 b128_8 = {.u8 = { + 0x00, 0x01, 0x05, 0x15, 0x20, 0x80, 0xff, 0x00, + 0x08, 0x07, 0x81, 0x06, 0x0a, 0x0f, 0x10, 0x01 +}}; +const v128 b128_16 = {.u16 = { + 0x8000, 0x7fff, 0xffff, 0xffff, 0x0050, 0x9000, 0xfffe, 0x8001 +}}; +const v128 b128_32 = {.u32 = { + 0x00000001, 0x80000000, 0x00000005, 0xfffffffe +}}; + +void print_8(v128 v) { + for(int i=0; i<16; ++i) + printf("0x%x ", v.u8[i]); } -uint64_t _minss_(float a, float b) -{ - uint32_t ret; - asm volatile ( - "movss %1, %%xmm0\n" - "movss %2, %%xmm1\n" - "minss %%xmm1, %%xmm0\n" - "movd %%xmm0, %%eax" - :"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc"); - return ret; +void print_16(v128 v) { + for(int i=0; i<8; ++i) + printf("0x%x ", v.u16[i]); } -uint64_t _maxss_(float a, float b) -{ - uint32_t ret; - asm volatile ( - "movss %1, %%xmm0\n" - "movss %2, %%xmm1\n" - "maxss %%xmm1, %%xmm0\n" - "movd %%xmm0, %%eax" - :"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc"); - return ret; +void print_32(v128 v) { + for(int i=0; i<4; ++i) + printf("0x%x ", v.u32[i]); } -#define CMPSS(A) \ -uint64_t _cmpss_##A(float a, float b) \ -{ \ - uint32_t ret; \ - asm volatile ( \ - "movss %1, %%xmm0\n" \ - "movss %2, %%xmm1\n" \ - "cmpss $" #A ", %%xmm1, %%xmm0\n" \ - "movd %%xmm0, %%eax" \ - :"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc"); \ - return ret; \ +void print_64(v128 v) { + for(int i=0; i<2; ++i) + printf("0x%llx ", v.u64[i]); } -#endif -CMPSS(0) -CMPSS(1) -CMPSS(2) -CMPSS(3) -CMPSS(4) -CMPSS(5) -CMPSS(6) -CMPSS(7) int main(int argc, const char** argv) { @@ -195,6 +215,29 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); GO1(_cmpss_5, "cmpss 5") GO1(_cmpss_6, "cmpss 6") GO1(_cmpss_7, "cmpss 7") - + + #undef GO1 + #undef GO2 + v128 a128; + + #define GO1(A, N, C) \ + a128.mm = _mm_##A##_epi##N(a128_##N.mm); \ + printf("p%s%s(", #A, #C); print_##N(a128_##N); \ + printf(") = "); print_##N(a128); printf("\n"); + #define GO2(A, N, C, A1, A2) \ + a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm); \ + printf("p%s%s(", #A, #C); print_##N(A1); \ + printf(", "); print_##N(A2); \ + printf(") = "); print_##N(a128); printf("\n"); + + + GO1(abs, 8, b) + GO1(abs, 16, w) + GO1(abs, 32, d) + GO2(shuffle, 8, b, a128_8, b128_8) + GO2(hadd, 16, w, a128_16, b128_16) + GO2(hadd, 32, d, a128_32, b128_32) + + return 0; } |