diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-03-30 12:17:52 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-03-30 12:17:52 +0200 |
| commit | 3f641c2b67ae210caa6ebe7a6e4253038dada6a5 (patch) | |
| tree | aa55e322d079edbb35de9c7a2c8d9627b475dd31 | |
| parent | 94c3cec69f7a1b497e0de8c019fab684d2769394 (diff) | |
| download | box64-3f641c2b67ae210caa6ebe7a6e4253038dada6a5.tar.gz box64-3f641c2b67ae210caa6ebe7a6e4253038dada6a5.zip | |
Improved test17 ([DYNAREC] Added 66 0F 38 16 opcode, fixed 66 0F 38 24/34 opcodes)
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 19 | ||||
| -rw-r--r-- | tests/ref17.txt | 29 | ||||
| -rwxr-xr-x | tests/test17 | bin | 81488 -> 95272 bytes | |||
| -rw-r--r-- | tests/test17.c | 56 |
4 files changed, 88 insertions, 16 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index aa8a724a..c739ee15 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -385,7 +385,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETEX(q1, 0); GETGX_empty(q0); SXTL_16(q0, q1); // 16bits->32bits - SXTL_32(q0, q1); // 32bits->64bits + SXTL_32(q0, q0); // 32bits->64bits break; case 0x25: INST_NAME("PMOVSXDQ Gx, Ex"); // SSE4 opcode! @@ -432,7 +432,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETEX(q1, 0); GETGX_empty(q0); UXTL_16(q0, q1); // 16bits->32bits - UXTL_32(q0, q1); // 32bits->64bits + UXTL_32(q0, q0); // 32bits->64bits break; case 0x35: INST_NAME("PMOVZXDQ Gx, Ex"); // SSE4 opcode! @@ -451,7 +451,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n break; case 0x3D: - INST_NAME("PMINSD Gx, Ex"); // SSE4 opcode! + INST_NAME("PMAXSD Gx, Ex"); // SSE4 opcode! nextop = F8; GETEX(q1, 0); GETGX(q0); @@ -603,6 +603,19 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } break; + case 0x16: + if(rex.w) {INST_NAME("PEXTRQ Ed, Gx, Ib");} else {INST_NAME("PEXTRD Ed, Gx, Ib");} + nextop = F8; + GETGX(q0); + GETED(1); + u8 = F8; + if(rex.w) { + VMOVQDto(ed, q0, (u8&1)); + } else { + VMOVSto(ed, q0, (u8&3)); + } + break; + case 0x22: INST_NAME("PINSRD Gx, ED, Ib"); nextop = F8; diff --git a/tests/ref17.txt b/tests/ref17.txt index 2bae8c46..43882799 100644 --- a/tests/ref17.txt +++ b/tests/ref17.txt @@ -152,9 +152,32 @@ cmpss 7 inf, inf => 0xffffffff cmpss 7 -inf, inf => 0xffffffff cmpss 7 inf, -inf => 0xffffffff cmpss 7 nan, nan => 0x0 +pshufb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x2 0x2 0xff 0x0 0x0 0xff 0xfe 0x81 0x0 0x3 0x72 0x32 0xff 0x80 +phaddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x7fff 0x3 0x8004 0xffff 0xfffe 0x9050 0x7fff +phaddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x7fffffff 0x7fffffff 0x80000001 0x3 +phaddsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x7fff 0x3 0x8004 0xffff 0xfffe 0x9050 0x8000 +pmaddubsw(0x80ff 0x7f 0x201 0x8103 0x84fe 0x5272 0xa5 0x32c0 , 0x100 0x1505 0x8020 0xff 0x708 0x681 0xf0a 0x110 ) = 0x80 0x27b 0xff20 0xfffd 0xb8c 0xc95e 0x672 0xc32 +phsubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x7fff 0xffff 0x8002 0x1 0x0 0x7050 0x7ffd +psignb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x80 0x7f 0x0 0x1 0xfe 0xfd 0x0 0xfe 0x84 0x8e 0x52 0xa5 0x0 0xc0 0x32 +psignw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8000 0x8001 0x0 0x1 0xfffe 0xfffd 0x7fff +psignd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x7fffffff 0x0 +pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8001 0xffff 0x0 0x0 0xfffe 0x0 0x7ffe +pblendvps(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe , 0x1 0x80000000 0x80000005 0xfffe ) = 0xffffffff 0x80000000 0x5 0x0 +ptestz(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0 +ptestc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0 +ptestnzc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 1 pabsb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 ) = 0x1 0x80 0x7f 0x0 0x1 0x2 0x3 0x7f 0x2 0x7c 0x72 0x52 0x5b 0x0 0x40 0x32 pabsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x1 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x7fff pabsd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0x1 0x80000000 0x7fffffff 0x0 -pshuffleb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0xff 0x80 0x2 0x2 0xff 0x0 0x0 0xff 0xfe 0x81 0x0 0x3 0x72 0x32 0xff 0x80 -phaddw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x7fff 0x7fff 0x3 0x8004 0xffff 0xfffe 0x9050 0x7fff -phaddd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0x7fffffff 0x7fffffff 0x80000001 0x3 +pmovsxbw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0xffff 0xffff 0x0 0xff80 0xffff 0x7f 0x0 0x0 +pmovsxbd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffffffff 0xffffffff 0xffffffff 0xffffffff +pmovsxbq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffffffffffff 0xffffffffffffffff +pmovsxwd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffffffff 0xffffffff 0x0 0xffff8000 +pmovsxwq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffffffffffff 0xffffffffffffffff +pmovsxdq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffffffffffff 0xffffffffffffffff +pmovzxbw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0xff 0xff 0x0 0x80 0xff 0x7f 0x0 0x0 +pmovzxbd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xff 0xff 0xff 0xff +pmovzxbq(0xffffffffffffffff 0x8000000000000000 ) = 0xff 0xff +pmovzxwd(0xffffffff 0x80000000 0x7fffffff 0x0 ) = 0xffff 0xffff 0x0 0x8000 +pmovzxwq(0xffffffffffffffff 0x8000000000000000 ) = 0xffff 0xffff +pmovzxdq(0xffffffffffffffff 0x8000000000000000 ) = 0xffffffff 0xffffffff diff --git a/tests/test17 b/tests/test17 index 0d0b89e4..b6be07fc 100755 --- a/tests/test17 +++ b/tests/test17 Binary files differdiff --git a/tests/test17.c b/tests/test17.c index e0c07685..d3992ea2 100644 --- a/tests/test17.c +++ b/tests/test17.c @@ -103,6 +103,9 @@ const v128 b128_16 = {.u16 = { const v128 b128_32 = {.u32 = { 0x00000001, 0x80000000, 0x00000005, 0xfffffffe }}; +const v128 c128_32 = {.u32 = { + 0x00000001, 0x80000000, 0x80000005, 0x0000fffe +}}; void print_8(v128 v) { for(int i=0; i<16; ++i) @@ -219,25 +222,58 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); #undef GO1 #undef GO2 v128 a128; + int i; #define GO1(A, N, C) \ a128.mm = _mm_##A##_epi##N(a128_##N.mm); \ - printf("p%s%s(", #A, #C); print_##N(a128_##N); \ + printf("%s(", #C); print_##N(a128_##N); \ printf(") = "); print_##N(a128); printf("\n"); #define GO2(A, N, C, A1, A2) \ a128.mm = _mm_##A##_epi##N(A1.mm, A2.mm); \ - printf("p%s%s(", #A, #C); print_##N(A1); \ + printf("%s(", #C); print_##N(A1); \ printf(", "); print_##N(A2); \ printf(") = "); print_##N(a128); printf("\n"); + #define GO2i(A, A1, A2) \ + i = _mm_##A##_si128(A1.mm, A2.mm); \ + printf("p%s(", #A); print_64(A1); \ + printf(", "); print_64(A2); \ + printf(") = %d\n", i); + #define GO3PS(A, N, A1, A2, A3) \ + a128.mf = _mm_##A##_ps(A1.mf, A2.mf, A3.mf); \ + printf("p%s%s(", #A, "ps"); print_##N(A1); \ + printf(", "); print_##N(A2); \ + printf(", "); print_##N(A3); \ + printf(") = "); print_##N(a128); printf("\n"); - GO1(abs, 8, b) - GO1(abs, 16, w) - GO1(abs, 32, d) - GO2(shuffle, 8, b, a128_8, b128_8) - GO2(hadd, 16, w, a128_16, b128_16) - GO2(hadd, 32, d, a128_32, b128_32) - - + GO2(shuffle, 8, pshufb, a128_8, b128_8) + GO2(hadd, 16, phaddw, a128_16, b128_16) + GO2(hadd, 32, phaddd, a128_32, b128_32) + GO2(hadds, 16, phaddsw, a128_16, b128_16) + GO2(maddubs, 16, pmaddubsw, a128_8, b128_8) + GO2(hsub, 16, phsubw, a128_16, b128_16) + GO2(sign, 8, psignb, a128_8, b128_8) + GO2(sign, 16, psignw, a128_16, b128_16) + GO2(sign, 32, psignd, a128_32, b128_32) + GO2(mulhrs, 16, pmulhrsw, a128_16, b128_16) + GO3PS(blendv, 32, a128_32, b128_32, c128_32) + GO2i(testz, a128_32, b128_32) + GO2i(testc, a128_32, b128_32) + GO2i(testnzc, a128_32, b128_32) + GO1(abs, 8, pabsb) + GO1(abs, 16, pabsw) + GO1(abs, 32, pabsd) + GO1(cvtepi8, 16, pmovsxbw); + GO1(cvtepi8, 32, pmovsxbd); + GO1(cvtepi8, 64, pmovsxbq); + GO1(cvtepi16, 32, pmovsxwd); + GO1(cvtepi16, 64, pmovsxwq); + GO1(cvtepi32, 64, pmovsxdq); + GO1(cvtepu8, 16, pmovzxbw); + GO1(cvtepu8, 32, pmovzxbd); + GO1(cvtepu8, 64, pmovzxbq); + GO1(cvtepu16, 32, pmovzxwd); + GO1(cvtepu16, 64, pmovzxwq); + GO1(cvtepu32, 64, pmovzxdq); return 0; } |