diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-04-25 20:03:58 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-04-25 20:03:58 +0200 |
| commit | e4da025dc00257b2b6ad1f5d97df7960e80bcf0e (patch) | |
| tree | db2e6883e2dbfad5a667de8aecfbb7ca53e0ebe2 | |
| parent | b6b069cf8d854467e8fa3d2ebf7f60e975988e6a (diff) | |
| download | box64-e4da025dc00257b2b6ad1f5d97df7960e80bcf0e.tar.gz box64-e4da025dc00257b2b6ad1f5d97df7960e80bcf0e.zip | |
[ARM64_DYNAREC] Fixed (rarely used) some edge case for (V)PMULHRSW opcode (and improved tests)
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 42 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 9 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 11 | ||||
| -rw-r--r-- | tests/ref17.txt | 8 | ||||
| -rw-r--r-- | tests/ref30.txt | 5 | ||||
| -rwxr-xr-x | tests/test17 | bin | 137104 -> 155888 bytes | |||
| -rw-r--r-- | tests/test17.c | 2 | ||||
| -rwxr-xr-x | tests/test30 | bin | 535880 -> 536416 bytes | |||
| -rw-r--r-- | tests/test30.c | 5 |
11 files changed, 84 insertions, 7 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 91e4fba6..7cc424fb 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -2166,6 +2166,10 @@ int convert_bitmask(uint64_t bitmask); #define URHADDQ_16(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b01, Vm, Vn, Vd)) #define URHADDQ_32(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b10, Vm, Vn, Vd)) +//SRSHR/URSHR +#define RSHR(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 1<<13 | 0<<12 | 1<<10 | (Rn)<<5 | (Rd)) +#define SRSHRQ_32(Vd, Vn, shift) EMIT(RSHR(1, 0, 0b0100 | (((32-(shift))>>3)&0b11), (32-(shift))&0b111, Vn, Vd)) + // QRDMULH Signed saturating (Rounding) Doubling Multiply returning High half #define QDMULH_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10110<<11 | 1<<10 | (Rn)<<5 | (Rd)) #define SQRDMULH_8(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b00, Vm, Vn, Vd)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 7d730bc9..3d369fb3 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1587,6 +1587,28 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%cRHADD V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', Rd, Vd, Rn, Vd, Rm, Vd); return buff; } + //S/URSHR + if(isMask(opcode, "0QU011110iiiiiii001001nnnnnddddd", &a)) { + int shft = 0; + int sz = 0; + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + if(imm&0b1000000) { + sz = 3; + shft = imm&0b111111; + } else if(imm&0b100000) { + sz = 2; + shft = imm&0b1111; + } else if(imm&0b10000) { + sz = 1; + shft = imm&0b111; + } else if(imm&0b1000) { + sz = 0; + shft = imm&0b111; + } + const char* Vd = Y[(sz<<1) | a.Q]; + snprintf(buff, sizeof(buff), "%cRSHR V%d.%s, V%d.%s, #%d", a.U?'U':'S', Rd, Vd, Rn, Vd, shft); + return buff; + } //SQ(R)DMULH if(isMask(opcode, "0QU01110ff1mmmmm101101nnnnnddddd", &a)) { const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "???"}; @@ -1762,6 +1784,26 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "PMULL%s V%d.%s, V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn); return buff; } + // [S/U]MULL + if(isMask(opcode, "0QU01110ff1mmmmm110000nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"}; + const char* Z[] = {"8H", "4S", "2D", "??"}; + int sz = sf; + const char* Vn = Y[(sz<<1)|a.Q]; + const char* Vd = Z[sz]; + snprintf(buff, sizeof(buff), "%cMULL%s V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn); + return buff; + } + //XTN(2) + if(isMask(opcode, "0Q001110ff100001001010nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"}; + const char* Z[] = {"8H", "4S", "2D", "??"}; + int sz = sf; + const char* Vd = Y[(sz<<1)|a.Q]; + const char* Vn = Z[sz]; + snprintf(buff, sizeof(buff), "XTN%s V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn); + return buff; + } // DMB if(isMask(opcode, "11010101000000110011nnnn10111111", &a)) { diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 9e62417c..b4778849 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -677,7 +677,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; GETGM(q0); GETEM(q1, 0); - SQRDMULH_16(q0, q0, q1); + v0 = fpu_get_scratch(dyn, ninst); + VSMULL_16(v0, q0, q1); + SRSHRQ_32(v0, v0, 15); + XTN_16(q0, v0); break; case 0x1C: INST_NAME("PABSB Gm,Em"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 50100d12..fddd347a 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -454,7 +454,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(q0, 1); GETEX(q1, 0, 0); - SQRDMULHQ_16(q0, q0, q1); + v0 = fpu_get_scratch(dyn, ninst); + v1 = fpu_get_scratch(dyn, ninst); + VSMULL_16(v0, q0, q1); + VSMULL2_16(v1, q0, q1); + SRSHRQ_32(v0, v0, 15); + SRSHRQ_32(v1, v1, 15); + XTN_16(q0, v0); + XTN2_16(q0, v1); break; case 0x10: diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 5338c53a..91eefba3 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -219,10 +219,17 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip case 0x0B: INST_NAME("VPMULHRSW Gx,Vx, Ex"); nextop = F8; + q0 = fpu_get_scratch(dyn, ninst); + q1 = fpu_get_scratch(dyn, ninst); for(int l=0; l<1+vex.l; ++l) { if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } - SQRDMULHQ_16(v0, v2, v1); - } + VSMULL_16(q0, v1, v2); + VSMULL2_16(q1, v1, v2); + SRSHRQ_32(q0, q0, 15); + SRSHRQ_32(q1, q1, 15); + XTN_16(v0, q0); + XTN2_16(v0, q1); + } if(!vex.l) YMM0(gd); break; case 0x0C: diff --git a/tests/ref17.txt b/tests/ref17.txt index 35f59cd7..76bf6770 100644 --- a/tests/ref17.txt +++ b/tests/ref17.txt @@ -4,14 +4,14 @@ ucomiss 1.000000, inf => 0x203 ucomiss inf, 1.000000 => 0x202 ucomiss 1.000000, -inf => 0x202 ucomiss -inf, 1.000000 => 0x203 -ucomiss 1.000000, nan => 0x203 -ucomiss nan, 1.000000 => 0x203 +ucomiss 1.000000, nan => 0x247 +ucomiss nan, 1.000000 => 0x247 ucomiss 1.000000, 1.000000 => 0x242 ucomiss 1.000000, 1.000000 => 0x242 ucomiss inf, inf => 0x242 ucomiss -inf, inf => 0x203 ucomiss inf, -inf => 0x202 -ucomiss nan, nan => 0x203 +ucomiss nan, nan => 0x247 minss 1, 2 => 1 minss 2, 1 => 1 minss -inf, 2 => -inf @@ -161,7 +161,9 @@ phsubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff psignb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x80 0x7f 0x0 0x1 0xfe 0xfd 0x0 0xfe 0x84 0x8e 0x52 0xa5 0x0 0xc0 0x32 psignw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8000 0x8001 0x0 0x1 0xfffe 0xfffd 0x7fff psignd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x7fffffff 0x0 +pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x0 0x8000 0x7ffe 0x0 0x0 0x0 0x0 0x7ffe pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8001 0xffff 0x0 0x0 0xfffe 0x0 0x7ffe +pmulhrsw(0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x7ffe 0x0 0x0 0x0 0x6200 0x0 0x7ffe pblendvps(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe , 0x1 0x80000000 0x80000005 0xfffe ) = 0xffffffff 0x80000000 0x5 0x0 ptestz(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0 ptestc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0 diff --git a/tests/ref30.txt b/tests/ref30.txt index af7e952c..7fe476b4 100644 --- a/tests/ref30.txt +++ b/tests/ref30.txt @@ -161,7 +161,12 @@ phsubw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff psignb(0xff 0x80 0x7f 0x0 0x1 0x2 0x3 0x81 0xfe 0x84 0x72 0x52 0xa5 0x0 0xc0 0x32 , 0x0 0x1 0x5 0x15 0x20 0x80 0xff 0x0 0x8 0x7 0x81 0x6 0xa 0xf 0x10 0x1 ) = 0x0 0x80 0x7f 0x0 0x1 0xfe 0xfd 0x0 0xfe 0x84 0x8e 0x52 0xa5 0x0 0xc0 0x32 psignw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8000 0x8001 0x0 0x1 0xfffe 0xfffd 0x7fff psignd(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe ) = 0xffffffff 0x80000000 0x7fffffff 0x0 +pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 ) = 0x0 0x8000 0x7ffe 0x0 0x0 0x0 0x0 0x7ffe pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x1 0x8001 0xffff 0x0 0x0 0xfffe 0x0 0x7ffe +pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 ) = 0xffff 0xffe0 0x7eff 0x0 0xffff 0xffff 0x0 0x7ff7 +pmulhrsw(0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 , 0x8000 0x7fff 0xffff 0xffff 0x50 0x9000 0xfffe 0x8001 ) = 0x8000 0x7ffe 0x0 0x0 0x0 0x6200 0x0 0x7ffe +pmulhrsw(0xffff 0x8000 0x7fff 0x0 0x1 0x2 0x3 0x8001 , 0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 ) = 0xffff 0xffe0 0x7eff 0x0 0xffff 0xffff 0x0 0x7ff7 +pmulhrsw(0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 , 0x7ffe 0x20 0x7f00 0x1 0x8000 0xa050 0xfff1 0x8008 ) = 0x7ffc 0x0 0x7e02 0x0 0x8000 0x4788 0x0 0x7ff0 pblendvps(0xffffffff 0x80000000 0x7fffffff 0x0 , 0x1 0x80000000 0x5 0xfffffffe , 0x1 0x80000000 0x80000005 0xfffe ) = 0xffffffff 0x80000000 0x5 0x0 ptestz(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0 ptestc(0x80000000ffffffff 0x7fffffff , 0x8000000000000001 0xfffffffe00000005 ) = 0 diff --git a/tests/test17 b/tests/test17 index 5b28c1ad..38692d76 100755 --- a/tests/test17 +++ b/tests/test17 Binary files differdiff --git a/tests/test17.c b/tests/test17.c index 8b0cfc40..63c51e4d 100644 --- a/tests/test17.c +++ b/tests/test17.c @@ -421,7 +421,9 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); GO2(sign, 8, psignb, a128_8, b128_8) GO2(sign, 16, psignw, a128_16, b128_16) GO2(sign, 32, psignd, a128_32, b128_32) + GO2(mulhrs, 16, pmulhrsw, a128_16, a128_16) GO2(mulhrs, 16, pmulhrsw, a128_16, b128_16) + GO2(mulhrs, 16, pmulhrsw, b128_16, b128_16) GO3PS(blendv, 32, a128_32, b128_32, c128_32) GO2i(testz, a128_32, b128_32) GO2i(testc, a128_32, b128_32) diff --git a/tests/test30 b/tests/test30 index 9f3eb68f..4f895773 100755 --- a/tests/test30 +++ b/tests/test30 Binary files differdiff --git a/tests/test30.c b/tests/test30.c index c6a71177..03b97319 100644 --- a/tests/test30.c +++ b/tests/test30.c @@ -518,7 +518,12 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r); GO2(sign, 8, psignb, a128_8, b128_8) GO2(sign, 16, psignw, a128_16, b128_16) GO2(sign, 32, psignd, a128_32, b128_32) + GO2(mulhrs, 16, pmulhrsw, a128_16, a128_16) GO2(mulhrs, 16, pmulhrsw, a128_16, b128_16) + GO2(mulhrs, 16, pmulhrsw, a128_16, c128_16) + GO2(mulhrs, 16, pmulhrsw, b128_16, b128_16) + GO2(mulhrs, 16, pmulhrsw, a128_16, c128_16) + GO2(mulhrs, 16, pmulhrsw, c128_16, c128_16) GO3PS(blendv, 32, a128_32, b128_32, c128_32) GO2i(testz, a128_32, b128_32) GO2i(testc, a128_32, b128_32) |