about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-02-12 19:24:27 +0100
committerptitSeb <sebastien.chev@gmail.com>2025-02-12 19:24:27 +0100
commitaf0b0c6c9d95e8bb6359de0f42d7623233d1a7f9 (patch)
tree545b0c1da19ba96e15345cf27f42eba2372d8879 /src
parent0e588e444f2ba5e461a8fc106864b87cb2633320 (diff)
downloadbox64-af0b0c6c9d95e8bb6359de0f42d7623233d1a7f9.tar.gz
box64-af0b0c6c9d95e8bb6359de0f42d7623233d1a7f9.zip
[ARM64_DYNAREC] Improved some 66 F0 opcode, especially unaligned path
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h1
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66f0.c46
2 files changed, 30 insertions, 17 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 21060124..f56a68dc 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -262,6 +262,7 @@ int convert_bitmask(uint64_t bitmask);
 
 #define SUB_ext(sf, op, S, Rm, option, imm3, Rn, Rd)    ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | 1<<21 | (Rm)<<16 | (option)<<13 | (imm3)<<10 | (Rn)<<5 | (Rd))
 #define SUBxw_UXTB(Rd, Rn, Rm)      EMIT(SUB_ext(rex.w, 1, 0, Rm, 0b000, 0, Rn, Rd))
+#define SUBw_UXTB(Rd, Rn, Rm)       EMIT(SUB_ext(0, 1, 0, Rm, 0b000, 0, Rn, Rd))
 
 // CCMP compare if cond is true, set nzcv if false
 #define CCMP_reg(sf, Rm, cond, Rn, nzcv)    ((sf)<<31 | 1<<30 | 1<<29 | 0b11010010<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | (nzcv))
diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c
index 7ee6e9a3..2ee22297 100644
--- a/src/dynarec/arm64/dynarec_arm64_66f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_66f0.c
@@ -158,6 +158,8 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                             MARK3;
                             LDRH_U12(x1, wback, 0);
                             LDAXRB(x3, wback); // dummy read, to arm the write...
+                            SUBw_UXTB(x3, x3, x1);
+                            CBNZw_MARK3(x3);
                             CMPSw_REG(x6, x1);
                             B_MARK(cNE);
                             // EAX == Ed
@@ -166,8 +168,8 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                             STRH_U12(gd, wback, 0);
                         }
                     }
-                    SMDMB();
                     MARK;
+                    SMDMB();
                     // Common part (and fallback for EAX != Ed)
                     UFLAG_IF {emit_cmp16(dyn, ninst, x6, x1, x3, x4, x5);}
                     BFIx(xRAX, x1, 0, 16);
@@ -295,30 +297,36 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                             }
                         }
                         if(arm64_atomics) {
-                            LDADDALH(x5, x1, wback);
                             UFLAG_IF {
-                                emit_add16(dyn, ninst, x1, x5, x3, x4);    
+                                LDADDALH(x5, x1, wback);
+                            } else {
+                                STADDLH(x5, wback);
                             }
                         } else {
                             MARKLOCK;
                             LDAXRH(x1, wback);
-                            emit_add16(dyn, ninst, x1, x5, x3, x4);
-                            STLXRH(x3, x1, wback);
+                            ADDw_REG(x4, x1, x5);
+                            STLXRH(x3, x4, wback);
                             CBNZx_MARKLOCK(x3);
                         }
                         SMDMB();
                         if(!ALIGNED_ATOMICH) {
-                            B_NEXT_nocond;
+                            B_MARK2_nocond;
                             MARK;   // unaligned! also, not enough
                             LDRH_U12(x1, wback, 0);
                             LDAXRB(x4, wback);
-                            BFIw(x1, x4, 0, 8); // re-inject
-                            emit_add16(dyn, ninst, x1, x5, x3, x4);
-                            STLXRB(x3, x1, wback);
+                            SUBw_UXTB(x4, x4, x1);
+                            CBNZw_MARK(x4);
+                            ADDw_REG(x4, x1, x5);
+                            STLXRB(x3, x4, wback);
                             CBNZx_MARK(x3);
-                            STRH_U12(x1, wback, 0);    // put the whole value
+                            STRH_U12(x4, wback, 0);    // put the whole value
                             SMDMB();
                         }
+                        MARK2;
+                        UFLAG_IF {
+                            emit_add16(dyn, ninst, x1, x5, x3, x4);
+                        }
                     }
                     break;
                 case 1: //OR
@@ -472,30 +480,34 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                             NEGw_REG(x4, x5);
                             UFLAG_IF {
                                 LDADDALH(x4, x1, wback);
-                                emit_sub16(dyn, ninst, x1, x5, x3, x4);
                             } else {
                                 STADDLH(x4, wback);
                             }
                         } else {
                             MARKLOCK;
                             LDAXRH(x1, wback);
-                            emit_sub16(dyn, ninst, x1, x5, x3, x4);
+                            SUBw_REG(x4, x1, x5);
                             STLXRH(x3, x1, wback);
                             CBNZx_MARKLOCK(x3);
                         }
                         SMDMB();
                         if(!ALIGNED_ATOMICH) {
-                            B_NEXT_nocond;
+                            B_MARK2_nocond;
                             MARK;   // unaligned! also, not enough
                             LDRH_U12(x1, wback, 0);
                             LDAXRB(x4, wback);
-                            BFIw(x1, x4, 0, 8); // re-inject
-                            emit_sub16(dyn, ninst, x1, x5, x3, x4);
-                            STLXRB(x3, x1, wback);
+                            SUBw_UXTB(x4, x4, x1);
+                            CBNZw_MARK(x4);
+                            SUBw_REG(x4, x1, x5);
+                            STLXRB(x3, x4, wback);
                             CBNZx_MARK(x3);
-                            STRH_U12(x1, wback, 0);    // put the whole value
+                            STRH_U12(x4, wback, 0);    // put the whole value
                             SMDMB();
                         }
+                        MARK2;
+                        UFLAG_IF {
+                            emit_sub16(dyn, ninst, x1, x5, x3, x4);
+                        }
                     }
                     break;
                 case 6: //XOR