diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-03-02 14:09:31 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-03-02 14:09:31 +0100 |
| commit | 463e31251f6f49a69736113044c18de5e5d581cc (patch) | |
| tree | cbccd063dbad09a25c9e48727a99e701a4332838 | |
| parent | 9032f24a569c0817c7421a45a0956aff5bace7f6 (diff) | |
| download | box64-463e31251f6f49a69736113044c18de5e5d581cc.tar.gz box64-463e31251f6f49a69736113044c18de5e5d581cc.zip | |
[ARM64_DYNAREC] Small optim on some cases of F2 0F 70 opcode
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f20f.c | 32 |
1 files changed, 21 insertions, 11 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index 7e84e970..a8d55cf3 100644 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -358,17 +358,27 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(v0, 1); u8 = F8; - // only low part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits - u64 = 0; - for (int i=0; i<4; ++i) { - u64 |= ((uint64_t)((u8>>(i*2))&3)*2+0)<<(i*16+0); - u64 |= ((uint64_t)((u8>>(i*2))&3)*2+1)<<(i*16+8); - } - MOV64x(x2, u64); - d0 = fpu_get_scratch(dyn); - VMOVQDfrom(d0, 0, x2); - VTBL1_8(d0, v1, d0); - VMOVeD(v0, 0, d0, 0); + if(u8==0b00000000 || u8==0b01010101 || u8==0b10101010 || u8==0b11111111) { + if(v0==v1) { + d0 = fpu_get_scratch(dyn); + VMOVQ(d0, v1); + } + VDUP_16(v0, v1, u8&3); + if(v0==v1) + v1 = d0; + } else { + // only low part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits + u64 = 0; + for (int i=0; i<4; ++i) { + u64 |= ((uint64_t)((u8>>(i*2))&3)*2+0)<<(i*16+0); + u64 |= ((uint64_t)((u8>>(i*2))&3)*2+1)<<(i*16+8); + } + MOV64x(x2, u64); + d0 = fpu_get_scratch(dyn); + VMOVQDfrom(d0, 0, x2); + VTBL1_8(d0, v1, d0); + VMOVeD(v0, 0, d0, 0); + } if(v0!=v1) { VMOVeD(v0, 1, v1, 1); } |