about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-01-05 14:54:24 +0100
committerptitSeb <sebastien.chev@gmail.com>2025-01-05 14:55:28 +0100
commitf160fe3300fd59bc3c0ace153ec817b0d3b68ae2 (patch)
tree8394a39868ff67d54a61ebc3290b650ae4e064e1
parent5d5dd3a3bc8824b81fb262c1c4b698d74a0d44c7 (diff)
downloadbox64-f160fe3300fd59bc3c0ace153ec817b0d3b68ae2.tar.gz
box64-f160fe3300fd59bc3c0ace153ec817b0d3b68ae2.zip
[ARM64_DYNAREC] Small improvments on (V)PMOVMSKB opcodes
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c21
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f.c8
2 files changed, 11 insertions, 18 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 0f2dd1fe..0f336876 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -2965,20 +2965,17 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             q1 = fpu_get_scratch(dyn, ninst);

             GETEX(q0, 0, 0);

             GETGD;

-            TABLE64(x1, (uintptr_t)&mask_shift8);

-            VLDR64_U12(v0, x1, 0);     // load shift

-            MOVI_8(v1, 0x80);   // load mask

-            VAND(q1, v1, q0);

-            USHL_8(q1, q1, v0); // shift

-            UADDLV_8(q1, q1);   // accumalte

-            VMOVBto(gd, q1, 0);

+            TABLE64(x2, 0x0706050403020100LL);

+            VDUPQD(v0, x2);

+            VSHRQ_8(q1, q0, 7);

+            USHLQ_8(q1, q1, v0); // shift

+            UADDLV_8(v1, q1);   // accumalte

+            VMOVBto(gd, v1, 0);

             // and now the high part

-            VMOVeD(q1, 0, q0, 1);

-            VAND(q1, v1, q1);  // keep highest bit

-            USHL_8(q1, q1, v0); // shift

+            VMOVeD(q1, 0, q1, 1);

             UADDLV_8(q1, q1);   // accumalte

-            VMOVBto(x1, q1, 0);

-            BFIx(gd, x1, 8, 8);

+            VMOVBto(x2, q1, 0);

+            BFIw(gd, x2, 8, 8);

             break;

         case 0xD8:

             INST_NAME("PSUBUSB Gx, Ex");

diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
index 5ef0c6a0..5a404dd3 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
@@ -56,9 +56,6 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
     MAYUSE(s0);
     MAYUSE(j64);
     MAYUSE(cacheupd);
-    #if STEP > 1
-    static const int8_t mask_shift8[] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-    #endif
 
     /* Remember to not create a new fpu_scratch after some GY/VY/EY is created, because Y can be in the scratch area and might overlap (and scratch will win) */
 
@@ -1396,9 +1393,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             q1 = fpu_get_scratch(dyn, ninst);
             GETEX_Y(q0, 0, 0);
             GETGD;
-            TABLE64(x2, (uintptr_t)&mask_shift8);
-            VLDR64_U12(v0, x2, 0);     // load shift
-            VDUPQ_64(v0, v0, 0);
+            TABLE64(x2, 0x0706050403020100LL);
+            VDUPQD(v0, x2);
             VSHRQ_8(q1, q0, 7);
             USHLQ_8(q1, q1, v0); // shift
             UADDLV_8(v1, q1);   // accumalte