about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-08-01 11:12:43 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-08-01 11:12:43 +0200
commita17811f4a615f417a5742dde62ab683cf80020b0 (patch)
treef117a109605e5563f734ad075402bcb286b02110 /src
parentd2c77ddb01a369efafec053de967b5174705d165 (diff)
downloadbox64-a17811f4a615f417a5742dde62ab683cf80020b0.tar.gz
box64-a17811f4a615f417a5742dde62ab683cf80020b0.zip
[INTERP] Try to improve aligned LOCK CMPXCHG8B opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_lock.S25
-rw-r--r--src/dynarec/arm64/arm64_lock.h6
-rw-r--r--src/dynarec/la64/la64_lock.S14
-rw-r--r--src/dynarec/la64/la64_lock.h3
-rw-r--r--src/dynarec/native_lock.h3
-rw-r--r--src/dynarec/rv64/rv64_lock.S13
-rw-r--r--src/dynarec/rv64/rv64_lock.h3
-rw-r--r--src/emu/x64runf0.c22
8 files changed, 77 insertions, 12 deletions
diff --git a/src/dynarec/arm64/arm64_lock.S b/src/dynarec/arm64/arm64_lock.S
index 2d85fe5e..168898cf 100644
--- a/src/dynarec/arm64/arm64_lock.S
+++ b/src/dynarec/arm64/arm64_lock.S
@@ -23,6 +23,7 @@
 .global arm64_lock_storeifnull
 .global arm64_lock_storeifnull_d
 .global arm64_lock_storeifref
+.global arm64_lock_storeifref2
 .global arm64_lock_storeifref_d
 .global arm64_lock_storeifref2_d
 .global arm64_lock_decifnot0b
@@ -321,6 +322,30 @@ arm64_atomic_storeifref2_d:
     mov     w0, w2
     ret
 
+arm64_lock_storeifref2:
+    adrp    x3, cpuext
+    add     x3, x3, #:lo12:cpuext
+    ldr     w3, [x3]
+    tbnz    w3, #0, arm64_atomic_storeifref2
+    dmb     ish
+1:
+    // address is x0, value is x1, x1 store to x0 only if [x0] is x2. return old [x0] value
+    ldaxr   x3, [x0]
+    cmp     x2, x3
+    bne     2f
+    stlxr   w4, x1, [x0]
+    cbnz    w4, 1b
+2:
+    mov     x0, x3
+    ret
+
+arm64_atomic_storeifref2:
+    dmb     ish
+    // address is x0, value is x1, x1 store to x0 only if [x0] is x2. return old [x0] value
+    casal   x2, x1, [x0]
+    mov     x0, x2
+    ret
+
 arm64_lock_decifnot0b:
     dmb     ish
 1:
diff --git a/src/dynarec/arm64/arm64_lock.h b/src/dynarec/arm64/arm64_lock.h
index cca8d677..f488ddd5 100644
--- a/src/dynarec/arm64/arm64_lock.h
+++ b/src/dynarec/arm64/arm64_lock.h
@@ -55,11 +55,15 @@ extern void* arm64_atomic_storeifnull(void*p, void* val);
 extern void* arm64_lock_storeifref(void*p, void* val, void* ref);
 extern void* arm64_atomic_storeifref(void*p, void* val, void* ref);
 
+// Atomically store value to [p] only if [p] is ref. Return old [p] value (so val or old)
+extern void* arm64_lock_storeifref2(void*p, void* val, void* ref);
+extern void* arm64_atomic_storeifref2(void*p, void* val, void* ref);
+
 // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
 extern uint32_t arm64_lock_storeifref_d(void*p, uint32_t val, uint32_t ref);
 extern uint32_t arm64_atomic_storeifref_d(void*p, uint32_t val, uint32_t ref);
 
-// Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
+// Atomically store value to [p] only if [p] is ref. Return old [p] value (so ref or old)
 extern uint32_t arm64_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref);
 extern uint32_t arm64_atomic_storeifref2_d(void*p, uint32_t val, uint32_t ref);
 
diff --git a/src/dynarec/la64/la64_lock.S b/src/dynarec/la64/la64_lock.S
index 18736afd..df7cfd83 100644
--- a/src/dynarec/la64/la64_lock.S
+++ b/src/dynarec/la64/la64_lock.S
@@ -11,6 +11,7 @@
 .global la64_lock_storeifnull
 .global la64_lock_storeifnull_d
 .global la64_lock_storeifref
+.global la64_lock_storeifref2
 .global la64_lock_storeifref_d
 .global la64_lock_storeifref2_d
 .global la64_lock_storeb
@@ -113,6 +114,19 @@ la64_lock_storeifref2_d:
     move $a0, $a3
     ret
 
+la64_lock_storeifref2:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value
+    dbar 0
+1:
+    move $a4, $a1
+    ll.d $a3, $a0, 0
+    bne  $a2, $a3, 2f
+    sc.d $a4, $a0, 0
+    beqz $a4, 1b
+2:
+    move $a0, $a3
+    ret
+
 la64_lock_storeb:
     st.b $a1, $a0, 0
     dbar 0
diff --git a/src/dynarec/la64/la64_lock.h b/src/dynarec/la64/la64_lock.h
index e51267fe..531f06b3 100644
--- a/src/dynarec/la64/la64_lock.h
+++ b/src/dynarec/la64/la64_lock.h
@@ -27,6 +27,9 @@ extern void* la64_lock_storeifnull(void* p, void* val);
 // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
 extern void* la64_lock_storeifref(void* p, void* val, void* ref);
 
+// Atomically store value to [p] only if [p] is ref. Return old [p] value (so ref or old)
+extern void* la64_lock_storeifref2(void* p, void* val, void* ref);
+
 // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
 extern uint32_t la64_lock_storeifref_d(void* p, uint32_t val, uint32_t ref);
 
diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h
index 5d4514ec..ba4cb131 100644
--- a/src/dynarec/native_lock.h
+++ b/src/dynarec/native_lock.h
@@ -19,6 +19,7 @@
 #define native_lock_xchg_h(A, B)            arm64_lock_xchg_h(A, B)

 #define native_lock_xchg_b(A, B)            arm64_lock_xchg_b(A, B)

 #define native_lock_storeifref(A, B, C)     arm64_lock_storeifref(A, B, C)

+#define native_lock_storeifref2(A, B, C)    arm64_lock_storeifref2(A, B, C)

 #define native_lock_storeifref_d(A, B, C)   arm64_lock_storeifref_d(A, B, C)

 #define native_lock_storeifref2_d(A, B, C)  arm64_lock_storeifref2_d(A, B, C)

 #define native_lock_storeifnull(A, B)       arm64_lock_storeifnull(A, B)

@@ -45,6 +46,7 @@
 #define native_lock_xchg_dd(A, B)           rv64_lock_xchg_dd(A, B)

 #define native_lock_xchg_d(A, B)            rv64_lock_xchg_d(A, B)

 #define native_lock_storeifref(A, B, C)     rv64_lock_storeifref(A, B, C)

+#define native_lock_storeifref2(A, B, C)    rv64_lock_storeifref2(A, B, C)

 #define native_lock_storeifref_d(A, B, C)   rv64_lock_storeifref_d(A, B, C)

 #define native_lock_storeifref2_d(A, B, C)  rv64_lock_storeifref2_d(A, B, C)

 #define native_lock_storeifnull(A, B)       rv64_lock_storeifnull(A, B)

@@ -83,6 +85,7 @@
 #define native_lock_xchg_dd(A, B)           la64_lock_xchg_dd(A, B)

 #define native_lock_xchg_d(A, B)            la64_lock_xchg_d(A, B)

 #define native_lock_storeifref(A, B, C)     la64_lock_storeifref(A, B, C)

+#define native_lock_storeifref2(A, B, C)    la64_lock_storeifref2(A, B, C)

 #define native_lock_storeifref_d(A, B, C)   la64_lock_storeifref_d(A, B, C)

 #define native_lock_storeifref2_d(A, B, C)  la64_lock_storeifref2_d(A, B, C)

 #define native_lock_storeifnull(A, B)       la64_lock_storeifnull(A, B)

diff --git a/src/dynarec/rv64/rv64_lock.S b/src/dynarec/rv64/rv64_lock.S
index 36f3c67f..999561f7 100644
--- a/src/dynarec/rv64/rv64_lock.S
+++ b/src/dynarec/rv64/rv64_lock.S
@@ -10,6 +10,7 @@
 .global rv64_lock_storeifnull
 .global rv64_lock_storeifnull_d
 .global rv64_lock_storeifref
+.global rv64_lock_storeifref2
 .global rv64_lock_storeifref_d
 .global rv64_lock_storeifref2_d
 .global rv64_lock_decifnot0b
@@ -100,6 +101,18 @@ rv64_lock_storeifref2_d:
     mv      a0, a3
     ret
 
+rv64_lock_storeifref2:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value
+    fence   rw, rw
+1:    
+    lr.d    a3, (a0)
+    bne     a2, a3, 2f
+    sc.d    a4, a1, (a0)
+    bnez    a4, 1b
+2:
+    mv      a0, a3
+    ret
+
 rv64_lock_decifnot0b:
     fence   rw, rw
     andi    a3, a0, 3
diff --git a/src/dynarec/rv64/rv64_lock.h b/src/dynarec/rv64/rv64_lock.h
index 9c55dffc..c6857b92 100644
--- a/src/dynarec/rv64/rv64_lock.h
+++ b/src/dynarec/rv64/rv64_lock.h
@@ -23,6 +23,9 @@ extern void* rv64_lock_storeifnull(void* p, void* val);
 // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
 extern void* rv64_lock_storeifref(void* p, void* val, void* ref);
 
+// Atomically store value to [p] only if [p] is ref. Return old [p] value (so ref or old)
+extern void* rv64_lock_storeifref2(void* p, void* val, void* ref);
+
 // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
 extern uint32_t rv64_lock_storeifref_d(void* p, uint32_t val, uint32_t ref);
 
diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c
index b4af10c1..43b6b1d7 100644
--- a/src/emu/x64runf0.c
+++ b/src/emu/x64runf0.c
@@ -942,34 +942,34 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
 #endif

                             } else

                                 if(((uintptr_t)ED)&0x7) {

+                                    tmp64u = R_EAX | (((uint64_t)R_EDX)<<32);

                                     do {

                                         native_lock_get_b(ED);

-                                        tmp64u = ED->q[0];

-                                        if((R_EAX == (tmp64u&0xffffffff)) && (R_EDX == ((tmp64u>>32)&0xffffffff))) {

+                                        tmp64u2 = ED->q[0];

+                                        if(tmp64u == tmp64u2) {

                                             SET_FLAG(F_ZF);

                                             tmp32s = native_lock_write_b(ED, emu->regs[_BX].byte[0]);

                                             if(!tmp32s)

                                                 ED->q[0] = R_EBX|(((uint64_t)R_ECX)<<32);

                                         } else {

                                             CLEAR_FLAG(F_ZF);

-                                            R_RAX = tmp64u&0xffffffff;

-                                            R_RDX = (tmp64u>>32)&0xffffffff;

+                                            R_RAX = tmp64u2&0xffffffff;

+                                            R_RDX = (tmp64u2>>32)&0xffffffff;

                                             tmp32s = 0;

                                         }

                                     } while(tmp32s);

-                                } else

-                                do {

-                                    tmp64u = native_lock_read_dd(ED);

-                                    if((R_EAX == (tmp64u&0xffffffff)) && (R_EDX == ((tmp64u>>32)&0xffffffff))) {

+                                } else {

+                                    tmp64u = R_EAX | (((uint64_t)R_EDX)<<32);

+                                    tmp64u2 = R_EBX | (((uint64_t)R_ECX)<<32);

+                                    tmp64u2 = (uint64_t)native_lock_storeifref2(ED, (void*)tmp64u2, (void*)tmp64u);

+                                    if(tmp64u2==tmp64u) {

                                         SET_FLAG(F_ZF);

-                                        tmp32s = native_lock_write_dd(ED, R_EBX|(((uint64_t)R_ECX)<<32));

                                     } else {

                                         CLEAR_FLAG(F_ZF);

                                         R_RAX = tmp64u&0xffffffff;

                                         R_RDX = (tmp64u>>32)&0xffffffff;

-                                        tmp32s = 0;

                                     }

-                                } while(tmp32s);

+                                }

 #else

                             pthread_mutex_lock(&my_context->mutex_lock);

                             if(rex.w) {