about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-03-04 17:20:56 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-03-04 17:20:56 +0100
commitd44f3d9baee8a0f7ce16bb3027f5a666a262aa07 (patch)
tree0f7c7e89ee5dd6bf73b658d11b4dfb33110e15f1 /src/dynarec
parentd71e5f968abf66bb267d831bcc99fbd66f2d2d23 (diff)
downloadbox64-d44f3d9baee8a0f7ce16bb3027f5a666a262aa07.tar.gz
box64-d44f3d9baee8a0f7ce16bb3027f5a666a262aa07.zip
[ARM64_DYNAREC] Small optims for SSE/SSE2 & strongmem>1
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c1
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f20f.c1
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f30f.c22
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h8
4 files changed, 19 insertions, 13 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 2e0d6e5a..627e12f2 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -2663,6 +2663,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3));

                 FMOVD(v1, v0);

             } else {

+                WILLWRITE2();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VST64(v0, ed, fixedaddress);

                 SMWRITE2();

diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index a8d55cf3..65607b52 100644
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -71,6 +71,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 d0 = sse_get_reg(dyn, ninst, x1, ed, 1);

                 VMOVeD(d0, 0, v0, 0);

             } else {

+                WILLWRITE2();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VST64(v0, ed, fixedaddress);

                 SMWRITE2();

diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c
index d57d3c8b..8f47646c 100644
--- a/src/dynarec/arm64/dynarec_arm64_f30f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f30f.c
@@ -333,16 +333,20 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETEX(v1, 0, 1) ;

             GETGX(v0, 1);

             u8 = F8;

-            // only high part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits

-            u64 = 0;

-            for (int i=0; i<4; ++i) {

-                u64 |= ((uint64_t)((u8>>(i*2))&3)*2+8)<<(i*16+0);

-                u64 |= ((uint64_t)((u8>>(i*2))&3)*2+9)<<(i*16+8);

-            }

-            MOV64x(x2, u64);

             d0 = fpu_get_scratch(dyn);

-            VMOVQDfrom(d0, 0, x2);

-            VTBL1_8(d0, v1, d0);

+            if(u8==0b00000000 || u8==0b01010101 || u8==0b10101010 || u8==0b11111111) {

+                VDUP_16(d0, v1, u8&3);

+            } else {

+                // only high part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits

+                u64 = 0;

+                for (int i=0; i<4; ++i) {

+                    u64 |= ((uint64_t)((u8>>(i*2))&3)*2+8)<<(i*16+0);

+                    u64 |= ((uint64_t)((u8>>(i*2))&3)*2+9)<<(i*16+8);

+                }

+                MOV64x(x2, u64);

+                VMOVQDfrom(d0, 0, x2);

+                VTBL1_8(d0, v1, d0);

+            }

             VMOVeD(v0, 1, d0, 0);

             if(v0!=v1) {

                 VMOVeD(v0, 0, v1, 0);

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 0fe4d252..563327e8 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -426,7 +426,7 @@
     if(MODREG) {                                                                                        \
         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);                                      \
     } else {                                                                                            \
-        SMREAD();                                                                                       \
+        if(w) {WILLWRITE2();} else {SMREAD();}                                                          \
         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, D);  \
         a = fpu_get_scratch(dyn);                                                                       \
         VLD128(a, ed, fixedaddress);                                                                    \
@@ -445,7 +445,7 @@
     if(MODREG) {                                                                                        \
         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);                                      \
     } else {                                                                                            \
-        SMREAD();                                                                                       \
+        if(w) {WILLWRITE2();} else {SMREAD();}                                                          \
         a = fpu_get_scratch(dyn);                                                                       \
         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, D);   \
         VLD64(a, ed, fixedaddress);                                                                     \
@@ -459,7 +459,7 @@
     if(MODREG) {                                                                                        \
         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);                                      \
     } else {                                                                                            \
-        SMREAD();                                                                                       \
+        if(w) {WILLWRITE2();} else {SMREAD();}                                                          \
         a = fpu_get_scratch(dyn);                                                                       \
         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, D);   \
         VLD32(a, ed, fixedaddress);                                                                     \
@@ -473,7 +473,7 @@
     if(MODREG) {                                                                                        \
         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);                                      \
     } else {                                                                                            \
-        SMREAD();                                                                                       \
+        if(w) {WILLWRITE2();} else {SMREAD();}                                                          \
         a = fpu_get_scratch(dyn);                                                                       \
         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, D);   \
         VLD16(a, ed, fixedaddress);                                                                     \