about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-03-02 14:09:31 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-03-02 14:09:31 +0100
commit463e31251f6f49a69736113044c18de5e5d581cc (patch)
treecbccd063dbad09a25c9e48727a99e701a4332838 /src
parent9032f24a569c0817c7421a45a0956aff5bace7f6 (diff)
downloadbox64-463e31251f6f49a69736113044c18de5e5d581cc.tar.gz
box64-463e31251f6f49a69736113044c18de5e5d581cc.zip
[ARM64_DYNAREC] Small optim on some cases of F2 0F 70 opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f20f.c32
1 files changed, 21 insertions, 11 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index 7e84e970..a8d55cf3 100644
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -358,17 +358,27 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETGX(v0, 1);

 

             u8 = F8;

-            // only low part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits

-            u64 = 0;

-            for (int i=0; i<4; ++i) {

-                u64 |= ((uint64_t)((u8>>(i*2))&3)*2+0)<<(i*16+0);

-                u64 |= ((uint64_t)((u8>>(i*2))&3)*2+1)<<(i*16+8);

-            }

-            MOV64x(x2, u64);

-            d0 = fpu_get_scratch(dyn);

-            VMOVQDfrom(d0, 0, x2);

-            VTBL1_8(d0, v1, d0);

-            VMOVeD(v0, 0, d0, 0);

+            if(u8==0b00000000 || u8==0b01010101 || u8==0b10101010 || u8==0b11111111) {

+                if(v0==v1) {

+                    d0 = fpu_get_scratch(dyn);

+                    VMOVQ(d0, v1);

+                }

+                VDUP_16(v0, v1, u8&3);

+                if(v0==v1)

+                    v1 = d0;

+            } else {

+                // only low part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits

+                u64 = 0;

+                for (int i=0; i<4; ++i) {

+                    u64 |= ((uint64_t)((u8>>(i*2))&3)*2+0)<<(i*16+0);

+                    u64 |= ((uint64_t)((u8>>(i*2))&3)*2+1)<<(i*16+8);

+                }

+                MOV64x(x2, u64);

+                d0 = fpu_get_scratch(dyn);

+                VMOVQDfrom(d0, 0, x2);

+                VTBL1_8(d0, v1, d0);

+                VMOVeD(v0, 0, d0, 0);

+                }

             if(v0!=v1) {

                 VMOVeD(v0, 1, v1, 1);

             }