diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-03-08 15:39:05 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-08 08:39:05 +0100 |
| commit | 55c2c9ee59261540714c3861875d38d66e22ac37 (patch) | |
| tree | 1e94d454792bd34661e56d506521f6ba26e96db8 /src | |
| parent | 8fef5f2c69d0873d1a6371796587b5e892226cde (diff) | |
| download | box64-55c2c9ee59261540714c3861875d38d66e22ac37.tar.gz box64-55c2c9ee59261540714c3861875d38d66e22ac37.zip | |
[ARM64_DYNAREC] Fixed a falsy optimization on PSHUFHW (#1344)
* [ARM64_DYNAREC] Reverted a falsy optimization on PSHUFHW * Fix it instead of a lazy revert
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f30f.c | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index 8f47646c..c4ccbff4 100644 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -312,7 +312,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VMOVeS(v0, 0, v1, 0); // to not erase uper part #endif break; - + case 0x6F: INST_NAME("MOVDQU Gx,Ex");// no alignment constraint on NEON here, so same as MOVDQA nextop = F8; @@ -334,14 +334,14 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(v0, 1); u8 = F8; d0 = fpu_get_scratch(dyn); - if(u8==0b00000000 || u8==0b01010101 || u8==0b10101010 || u8==0b11111111) { - VDUP_16(d0, v1, u8&3); + if (u8 == 0b00000000 || u8 == 0b01010101 || u8 == 0b10101010 || u8 == 0b11111111) { + VDUPQ_16(d0, v1, (u8 & 3) + 4); } else { // only high part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits u64 = 0; - for (int i=0; i<4; ++i) { - u64 |= ((uint64_t)((u8>>(i*2))&3)*2+8)<<(i*16+0); - u64 |= ((uint64_t)((u8>>(i*2))&3)*2+9)<<(i*16+8); + for (int i = 0; i < 4; ++i) { + u64 |= ((uint64_t)((u8 >> (i * 2)) & 3) * 2 + 8) << (i * 16 + 0); + u64 |= ((uint64_t)((u8 >> (i * 2)) & 3) * 2 + 9) << (i * 16 + 8); } MOV64x(x2, u64); VMOVQDfrom(d0, 0, x2); |