diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-04-25 20:03:58 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-04-25 20:03:58 +0200 |
| commit | e4da025dc00257b2b6ad1f5d97df7960e80bcf0e (patch) | |
| tree | db2e6883e2dbfad5a667de8aecfbb7ca53e0ebe2 /src | |
| parent | b6b069cf8d854467e8fa3d2ebf7f60e975988e6a (diff) | |
| download | box64-e4da025dc00257b2b6ad1f5d97df7960e80bcf0e.tar.gz box64-e4da025dc00257b2b6ad1f5d97df7960e80bcf0e.zip | |
[ARM64_DYNAREC] Fixed (rarely used) some edge case for (V)PMULHRSW opcode (and improved tests)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 42 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 9 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 11 |
5 files changed, 67 insertions, 4 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 91e4fba6..7cc424fb 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -2166,6 +2166,10 @@ int convert_bitmask(uint64_t bitmask); #define URHADDQ_16(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b01, Vm, Vn, Vd)) #define URHADDQ_32(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b10, Vm, Vn, Vd)) +//SRSHR/URSHR +#define RSHR(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 1<<13 | 0<<12 | 1<<10 | (Rn)<<5 | (Rd)) +#define SRSHRQ_32(Vd, Vn, shift) EMIT(RSHR(1, 0, 0b0100 | (((32-(shift))>>3)&0b11), (32-(shift))&0b111, Vn, Vd)) + // QRDMULH Signed saturating (Rounding) Doubling Multiply returning High half #define QDMULH_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10110<<11 | 1<<10 | (Rn)<<5 | (Rd)) #define SQRDMULH_8(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b00, Vm, Vn, Vd)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 7d730bc9..3d369fb3 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1587,6 +1587,28 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%cRHADD V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', Rd, Vd, Rn, Vd, Rm, Vd); return buff; } + //S/URSHR + if(isMask(opcode, "0QU011110iiiiiii001001nnnnnddddd", &a)) { + int shft = 0; + int sz = 0; + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + if(imm&0b1000000) { + sz = 3; + shft = imm&0b111111; + } else if(imm&0b100000) { + sz = 2; + shft = imm&0b1111; + } else if(imm&0b10000) { + sz = 1; + shft = imm&0b111; + } else if(imm&0b1000) { + sz = 0; + shft = imm&0b111; + } + const char* Vd = Y[(sz<<1) | a.Q]; + snprintf(buff, sizeof(buff), "%cRSHR V%d.%s, V%d.%s, #%d", a.U?'U':'S', Rd, Vd, Rn, Vd, shft); + return buff; + } //SQ(R)DMULH if(isMask(opcode, "0QU01110ff1mmmmm101101nnnnnddddd", &a)) { const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "???"}; @@ -1762,6 +1784,26 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "PMULL%s V%d.%s, V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn); return buff; } + // [S/U]MULL + if(isMask(opcode, "0QU01110ff1mmmmm110000nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"}; + const char* Z[] = {"8H", "4S", "2D", "??"}; + int sz = sf; + const char* Vn = Y[(sz<<1)|a.Q]; + const char* Vd = Z[sz]; + snprintf(buff, sizeof(buff), "%cMULL%s V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn); + return buff; + } + //XTN(2) + if(isMask(opcode, "0Q001110ff100001001010nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"}; + const char* Z[] = {"8H", "4S", "2D", "??"}; + int sz = sf; + const char* Vd = Y[(sz<<1)|a.Q]; + const char* Vn = Z[sz]; + snprintf(buff, sizeof(buff), "XTN%s V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn); + return buff; + } // DMB if(isMask(opcode, "11010101000000110011nnnn10111111", &a)) { diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 9e62417c..b4778849 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -677,7 +677,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; GETGM(q0); GETEM(q1, 0); - SQRDMULH_16(q0, q0, q1); + v0 = fpu_get_scratch(dyn, ninst); + VSMULL_16(v0, q0, q1); + SRSHRQ_32(v0, v0, 15); + XTN_16(q0, v0); break; case 0x1C: INST_NAME("PABSB Gm,Em"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 50100d12..fddd347a 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -454,7 +454,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(q0, 1); GETEX(q1, 0, 0); - SQRDMULHQ_16(q0, q0, q1); + v0 = fpu_get_scratch(dyn, ninst); + v1 = fpu_get_scratch(dyn, ninst); + VSMULL_16(v0, q0, q1); + VSMULL2_16(v1, q0, q1); + SRSHRQ_32(v0, v0, 15); + SRSHRQ_32(v1, v1, 15); + XTN_16(q0, v0); + XTN2_16(q0, v1); break; case 0x10: diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 5338c53a..91eefba3 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -219,10 +219,17 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip case 0x0B: INST_NAME("VPMULHRSW Gx,Vx, Ex"); nextop = F8; + q0 = fpu_get_scratch(dyn, ninst); + q1 = fpu_get_scratch(dyn, ninst); for(int l=0; l<1+vex.l; ++l) { if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } - SQRDMULHQ_16(v0, v2, v1); - } + VSMULL_16(q0, v1, v2); + VSMULL2_16(q1, v1, v2); + SRSHRQ_32(q0, q0, 15); + SRSHRQ_32(q1, q1, 15); + XTN_16(v0, q0); + XTN2_16(v0, q1); + } if(!vex.l) YMM0(gd); break; case 0x0C: |