diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-08-01 11:12:43 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-08-01 11:12:43 +0200 |
| commit | a17811f4a615f417a5742dde62ab683cf80020b0 (patch) | |
| tree | f117a109605e5563f734ad075402bcb286b02110 /src | |
| parent | d2c77ddb01a369efafec053de967b5174705d165 (diff) | |
| download | box64-a17811f4a615f417a5742dde62ab683cf80020b0.tar.gz box64-a17811f4a615f417a5742dde62ab683cf80020b0.zip | |
[INTERP] Try to improve aligned LOCK CMPXCHG8B opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_lock.S | 25 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_lock.h | 6 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_lock.S | 14 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_lock.h | 3 | ||||
| -rw-r--r-- | src/dynarec/native_lock.h | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_lock.S | 13 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_lock.h | 3 | ||||
| -rw-r--r-- | src/emu/x64runf0.c | 22 |
8 files changed, 77 insertions, 12 deletions
diff --git a/src/dynarec/arm64/arm64_lock.S b/src/dynarec/arm64/arm64_lock.S index 2d85fe5e..168898cf 100644 --- a/src/dynarec/arm64/arm64_lock.S +++ b/src/dynarec/arm64/arm64_lock.S @@ -23,6 +23,7 @@ .global arm64_lock_storeifnull .global arm64_lock_storeifnull_d .global arm64_lock_storeifref +.global arm64_lock_storeifref2 .global arm64_lock_storeifref_d .global arm64_lock_storeifref2_d .global arm64_lock_decifnot0b @@ -321,6 +322,30 @@ arm64_atomic_storeifref2_d: mov w0, w2 ret +arm64_lock_storeifref2: + adrp x3, cpuext + add x3, x3, #:lo12:cpuext + ldr w3, [x3] + tbnz w3, #0, arm64_atomic_storeifref2 + dmb ish +1: + // address is x0, value is x1, x1 store to x0 only if [x0] is x2. return old [x0] value + ldaxr x3, [x0] + cmp x2, x3 + bne 2f + stlxr w4, x1, [x0] + cbnz w4, 1b +2: + mov x0, x3 + ret + +arm64_atomic_storeifref2: + dmb ish + // address is x0, value is x1, x1 store to x0 only if [x0] is x2. return old [x0] value + casal x2, x1, [x0] + mov x0, x2 + ret + arm64_lock_decifnot0b: dmb ish 1: diff --git a/src/dynarec/arm64/arm64_lock.h b/src/dynarec/arm64/arm64_lock.h index cca8d677..f488ddd5 100644 --- a/src/dynarec/arm64/arm64_lock.h +++ b/src/dynarec/arm64/arm64_lock.h @@ -55,11 +55,15 @@ extern void* arm64_atomic_storeifnull(void*p, void* val); extern void* arm64_lock_storeifref(void*p, void* val, void* ref); extern void* arm64_atomic_storeifref(void*p, void* val, void* ref); +// Atomically store value to [p] only if [p] is ref. Return old [p] value (so val or old) +extern void* arm64_lock_storeifref2(void*p, void* val, void* ref); +extern void* arm64_atomic_storeifref2(void*p, void* val, void* ref); + // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) extern uint32_t arm64_lock_storeifref_d(void*p, uint32_t val, uint32_t ref); extern uint32_t arm64_atomic_storeifref_d(void*p, uint32_t val, uint32_t ref); -// Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) +// Atomically store value to [p] only if [p] is ref. Return old [p] value (so ref or old) extern uint32_t arm64_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref); extern uint32_t arm64_atomic_storeifref2_d(void*p, uint32_t val, uint32_t ref); diff --git a/src/dynarec/la64/la64_lock.S b/src/dynarec/la64/la64_lock.S index 18736afd..df7cfd83 100644 --- a/src/dynarec/la64/la64_lock.S +++ b/src/dynarec/la64/la64_lock.S @@ -11,6 +11,7 @@ .global la64_lock_storeifnull .global la64_lock_storeifnull_d .global la64_lock_storeifref +.global la64_lock_storeifref2 .global la64_lock_storeifref_d .global la64_lock_storeifref2_d .global la64_lock_storeb @@ -113,6 +114,19 @@ la64_lock_storeifref2_d: move $a0, $a3 ret +la64_lock_storeifref2: + // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value + dbar 0 +1: + move $a4, $a1 + ll.d $a3, $a0, 0 + bne $a2, $a3, 2f + sc.d $a4, $a0, 0 + beqz $a4, 1b +2: + move $a0, $a3 + ret + la64_lock_storeb: st.b $a1, $a0, 0 dbar 0 diff --git a/src/dynarec/la64/la64_lock.h b/src/dynarec/la64/la64_lock.h index e51267fe..531f06b3 100644 --- a/src/dynarec/la64/la64_lock.h +++ b/src/dynarec/la64/la64_lock.h @@ -27,6 +27,9 @@ extern void* la64_lock_storeifnull(void* p, void* val); // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) extern void* la64_lock_storeifref(void* p, void* val, void* ref); +// Atomically store value to [p] only if [p] is ref. Return old [p] value (so ref or old) +extern void* la64_lock_storeifref2(void* p, void* val, void* ref); + // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) extern uint32_t la64_lock_storeifref_d(void* p, uint32_t val, uint32_t ref); diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h index 5d4514ec..ba4cb131 100644 --- a/src/dynarec/native_lock.h +++ b/src/dynarec/native_lock.h @@ -19,6 +19,7 @@ #define native_lock_xchg_h(A, B) arm64_lock_xchg_h(A, B) #define native_lock_xchg_b(A, B) arm64_lock_xchg_b(A, B) #define native_lock_storeifref(A, B, C) arm64_lock_storeifref(A, B, C) +#define native_lock_storeifref2(A, B, C) arm64_lock_storeifref2(A, B, C) #define native_lock_storeifref_d(A, B, C) arm64_lock_storeifref_d(A, B, C) #define native_lock_storeifref2_d(A, B, C) arm64_lock_storeifref2_d(A, B, C) #define native_lock_storeifnull(A, B) arm64_lock_storeifnull(A, B) @@ -45,6 +46,7 @@ #define native_lock_xchg_dd(A, B) rv64_lock_xchg_dd(A, B) #define native_lock_xchg_d(A, B) rv64_lock_xchg_d(A, B) #define native_lock_storeifref(A, B, C) rv64_lock_storeifref(A, B, C) +#define native_lock_storeifref2(A, B, C) rv64_lock_storeifref2(A, B, C) #define native_lock_storeifref_d(A, B, C) rv64_lock_storeifref_d(A, B, C) #define native_lock_storeifref2_d(A, B, C) rv64_lock_storeifref2_d(A, B, C) #define native_lock_storeifnull(A, B) rv64_lock_storeifnull(A, B) @@ -83,6 +85,7 @@ #define native_lock_xchg_dd(A, B) la64_lock_xchg_dd(A, B) #define native_lock_xchg_d(A, B) la64_lock_xchg_d(A, B) #define native_lock_storeifref(A, B, C) la64_lock_storeifref(A, B, C) +#define native_lock_storeifref2(A, B, C) la64_lock_storeifref2(A, B, C) #define native_lock_storeifref_d(A, B, C) la64_lock_storeifref_d(A, B, C) #define native_lock_storeifref2_d(A, B, C) la64_lock_storeifref2_d(A, B, C) #define native_lock_storeifnull(A, B) la64_lock_storeifnull(A, B) diff --git a/src/dynarec/rv64/rv64_lock.S b/src/dynarec/rv64/rv64_lock.S index 36f3c67f..999561f7 100644 --- a/src/dynarec/rv64/rv64_lock.S +++ b/src/dynarec/rv64/rv64_lock.S @@ -10,6 +10,7 @@ .global rv64_lock_storeifnull .global rv64_lock_storeifnull_d .global rv64_lock_storeifref +.global rv64_lock_storeifref2 .global rv64_lock_storeifref_d .global rv64_lock_storeifref2_d .global rv64_lock_decifnot0b @@ -100,6 +101,18 @@ rv64_lock_storeifref2_d: mv a0, a3 ret +rv64_lock_storeifref2: + // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value + fence rw, rw +1: + lr.d a3, (a0) + bne a2, a3, 2f + sc.d a4, a1, (a0) + bnez a4, 1b +2: + mv a0, a3 + ret + rv64_lock_decifnot0b: fence rw, rw andi a3, a0, 3 diff --git a/src/dynarec/rv64/rv64_lock.h b/src/dynarec/rv64/rv64_lock.h index 9c55dffc..c6857b92 100644 --- a/src/dynarec/rv64/rv64_lock.h +++ b/src/dynarec/rv64/rv64_lock.h @@ -23,6 +23,9 @@ extern void* rv64_lock_storeifnull(void* p, void* val); // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) extern void* rv64_lock_storeifref(void* p, void* val, void* ref); +// Atomically store value to [p] only if [p] is ref. Return old [p] value (so ref or old) +extern void* rv64_lock_storeifref2(void* p, void* val, void* ref); + // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) extern uint32_t rv64_lock_storeifref_d(void* p, uint32_t val, uint32_t ref); diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c index b4af10c1..43b6b1d7 100644 --- a/src/emu/x64runf0.c +++ b/src/emu/x64runf0.c @@ -942,34 +942,34 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr) #endif } else if(((uintptr_t)ED)&0x7) { + tmp64u = R_EAX | (((uint64_t)R_EDX)<<32); do { native_lock_get_b(ED); - tmp64u = ED->q[0]; - if((R_EAX == (tmp64u&0xffffffff)) && (R_EDX == ((tmp64u>>32)&0xffffffff))) { + tmp64u2 = ED->q[0]; + if(tmp64u == tmp64u2) { SET_FLAG(F_ZF); tmp32s = native_lock_write_b(ED, emu->regs[_BX].byte[0]); if(!tmp32s) ED->q[0] = R_EBX|(((uint64_t)R_ECX)<<32); } else { CLEAR_FLAG(F_ZF); - R_RAX = tmp64u&0xffffffff; - R_RDX = (tmp64u>>32)&0xffffffff; + R_RAX = tmp64u2&0xffffffff; + R_RDX = (tmp64u2>>32)&0xffffffff; tmp32s = 0; } } while(tmp32s); - } else - do { - tmp64u = native_lock_read_dd(ED); - if((R_EAX == (tmp64u&0xffffffff)) && (R_EDX == ((tmp64u>>32)&0xffffffff))) { + } else { + tmp64u = R_EAX | (((uint64_t)R_EDX)<<32); + tmp64u2 = R_EBX | (((uint64_t)R_ECX)<<32); + tmp64u2 = (uint64_t)native_lock_storeifref2(ED, (void*)tmp64u2, (void*)tmp64u); + if(tmp64u2==tmp64u) { SET_FLAG(F_ZF); - tmp32s = native_lock_write_dd(ED, R_EBX|(((uint64_t)R_ECX)<<32)); } else { CLEAR_FLAG(F_ZF); R_RAX = tmp64u&0xffffffff; R_RDX = (tmp64u>>32)&0xffffffff; - tmp32s = 0; } - } while(tmp32s); + } #else pthread_mutex_lock(&my_context->mutex_lock); if(rex.w) { |