about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-05-04 19:11:45 +0800
committerGitHub <noreply@github.com>2024-05-04 13:11:45 +0200
commit9494801d0b79366077062ee5f3b2b375070e8f9a (patch)
tree0a79983c14eb9658fc0a03cc55754ebb4e7fcc20 /src
parent1501592fa87cde05f22ea7ec3b2587ffe51134f9 (diff)
downloadbox64-9494801d0b79366077062ee5f3b2b375070e8f9a.tar.gz
box64-9494801d0b79366077062ee5f3b2b375070e8f9a.zip
[DYNAREC] Small improvements to atomic functions (#1492)
* [DYNAREC] Small improvements to atomic functions

* Support older version of gcc
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_functions.c33
-rw-r--r--src/dynarec/la64/la64_lock.S113
-rw-r--r--src/dynarec/la64/la64_lock.h62
-rw-r--r--src/dynarec/native_lock.h51
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.c16
-rw-r--r--src/dynarec/rv64/rv64_lock.h5
6 files changed, 155 insertions, 125 deletions
diff --git a/src/dynarec/la64/dynarec_la64_functions.c b/src/dynarec/la64/dynarec_la64_functions.c
index 7568b453..700cae5a 100644
--- a/src/dynarec/la64/dynarec_la64_functions.c
+++ b/src/dynarec/la64/dynarec_la64_functions.c
@@ -308,31 +308,36 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
     }
 }
 
-// CAS
-uint8_t extract_byte(uint32_t val, void* address){
+// will go badly if address is unaligned
+static uint8_t extract_byte(uint32_t val, void* address)
+{
     int idx = (((uintptr_t)address)&3)*8;
     return (val>>idx)&0xff;
 }
-uint32_t insert_byte(uint32_t val, uint8_t b, void* address){
+
+static uint32_t insert_byte(uint32_t val, uint8_t b, void* address)
+{
     int idx = (((uintptr_t)address)&3)*8;
     val&=~(0xff<<idx);
     val|=(((uint32_t)b)<<idx);
     return val;
 }
 
-// will go badly if address is unaligned
-uint16_t extract_half(uint32_t val, void* address){
+static uint16_t extract_half(uint32_t val, void* address)
+{
     int idx = (((uintptr_t)address)&3)*8;
     return (val>>idx)&0xffff;
 }
-uint32_t insert_half(uint32_t val, uint16_t h, void* address){
+
+static uint32_t insert_half(uint32_t val, uint16_t h, void* address)
+{
     int idx = (((uintptr_t)address)&3)*8;
     val&=~(0xffff<<idx);
     val|=(((uint32_t)h)<<idx);
     return val;
 }
 
-uint8_t la64_lock_xchg_b(void* addr, uint8_t val)
+uint8_t la64_lock_xchg_b_slow(void* addr, uint8_t val)
 {
     uint32_t ret;
     uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
@@ -342,24 +347,14 @@ uint8_t la64_lock_xchg_b(void* addr, uint8_t val)
     return extract_byte(ret, addr);
 }
 
-uint16_t la64_lock_xchg_h(void* addr, uint16_t val)
-{
-    uint32_t ret;
-    uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
-    do {
-        ret = *aligned;
-    } while(la64_lock_cas_d(aligned, ret, insert_half(ret, val, addr)));
-    return extract_half(ret, addr);
-}
-
-int la64_lock_cas_b(void* addr, uint8_t ref, uint8_t val)
+int la64_lock_cas_b_slow(void* addr, uint8_t ref, uint8_t val)
 {
     uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
     uint32_t tmp = *aligned;
     return la64_lock_cas_d(aligned, ref, insert_byte(tmp, val, addr));
 }
 
-int la64_lock_cas_h(void* addr, uint16_t ref, uint16_t val)
+int la64_lock_cas_h_slow(void* addr, uint16_t ref, uint16_t val)
 {
     uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
     uint32_t tmp = *aligned;
diff --git a/src/dynarec/la64/la64_lock.S b/src/dynarec/la64/la64_lock.S
index 40934c9c..48f21595 100644
--- a/src/dynarec/la64/la64_lock.S
+++ b/src/dynarec/la64/la64_lock.S
@@ -1,10 +1,11 @@
-// RV64 lock helper
+// LA64 lock helper
 // there is 2 part: read and write
 // write return 0 on success, 1 on fail (value has been changed)
 
 .text
 .align 4
 
+.global la64_lock_xchg_b
 .global la64_lock_xchg_dd
 .global la64_lock_xchg_d
 .global la64_lock_storeifnull
@@ -12,18 +13,24 @@
 .global la64_lock_storeifref
 .global la64_lock_storeifref_d
 .global la64_lock_storeifref2_d
-.global la64_lock_decifnot0b
 .global la64_lock_storeb
 .global la64_lock_incif0
 .global la64_lock_decifnot0
 .global la64_lock_store
 .global la64_lock_store_dd
-.global la64_lock_get_b
-.global la64_lock_get_d
-.global la64_lock_get_dd
+.global la64_lock_cas_b
+.global la64_lock_cas_h
 .global la64_lock_cas_d
 .global la64_lock_cas_dd
 .global la64_lock_cas_dq
+.global la64_lock_get_b
+.global la64_lock_get_d
+.global la64_lock_get_dd
+
+la64_lock_xchg_b:
+    .word 0x385e1486 // amswap_db.b $a2, $a1, $a0
+    move        $a0, $a2
+    ret
 
 la64_lock_xchg_dd:
     // address is a0, value is a1, return old value in a0
@@ -40,11 +47,13 @@ la64_lock_xchg_d:
 la64_lock_storeifnull:
     // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value
     dbar 0
+1:
     move $a3, $a1
     ll.d $a2, $a0, 0
-    bnez $a2, 12
+    bnez $a2, 2f
     sc.d $a3, $a0, 0
-    beqz $a3, -16
+    beqz $a3, 1b
+2:
     move $a0, $a2
     ret
 
@@ -52,59 +61,58 @@ la64_lock_storeifnull_d:
     // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value
     dbar 0
     move $a3, $a1
+1:
     ll.w $a2, $a0, 0
-    bnez $a2, 12
+    bnez $a2, 2f
     sc.w $a3, $a0, 0
-    beqz $a3, -16
+    beqz $a3, 1b
+2:
     move $a0, $a2
     ret
 
 la64_lock_storeifref:
     // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value)
     dbar 0
+1:
     move $a4, $a1
     ll.d $a3, $a0, 0
-    bne  $a2, $a3, 24
+    bne  $a2, $a3, 2f
     sc.d $a4, $a0, 0
-    beqz $a4, -16
-    dbar 0
+    beqz $a4, 1b
     move $a0, $a1
     ret
-    dbar 0
+2:
     move $a0, $a3
     ret
 
 la64_lock_storeifref_d:
     // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value)
     dbar 0
-    move $a4, $a1  
+1:
+    move $a4, $a1
     ll.w $a3, $a0, 0
-    bne  $a2, $a3, 24
+    bne  $a2, $a3, 2f
     sc.w $a4, $a0, 0
-    beqz $a4, -16
-    dbar 0
+    beqz $a4, 1b
     move $a0, $a1
     ret
-    dbar 0
+2:
     move $a0, $a3
     ret
 
 la64_lock_storeifref2_d:
     // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value
     dbar 0
+1:
     move $a4, $a1
     ll.w $a3, $a0, 0
-    bne  $a2, $a3, 12
+    bne  $a2, $a3, 2f
     sc.w $a4, $a0, 0
-    beqz $a4, -16
+    beqz $a4, 1b
+2:
     move $a0, $a3
     ret
 
-la64_lock_decifnot0b:
-    dbar       0
-    // TODO
-    ret
-
 la64_lock_storeb:
     st.b $a1, $a0, 0
     dbar 0
@@ -112,23 +120,27 @@ la64_lock_storeb:
 
 la64_lock_decifnot0:
     dbar   0
+1:
     ll.w   $a1, $a0, 0
-    beqz   $a1, 20
+    beqz   $a1, 2f
     addi.d $a1, $a1, -1
     move   $a2, $a1
     sc.w   $a2, $a0, 0
-    beqz   $a2, -20
+    beqz   $a2, 1b
+2:
     move   $a0, $a1
     ret
 
 la64_lock_incif0:
     dbar   0
+1:
     ll.w   $a1, $a0, 0
-    bnez   $a1, 20
+    bnez   $a1, 2f
     addi.d $a1, $a1, 1
     move   $a2, $a1
     sc.w   $a2, $a0, 0
-    beqz   $a2, -20
+    beqz   $a2, 1b
+2:
     move   $a0, $a1
     ret
 
@@ -142,27 +154,27 @@ la64_lock_store_dd:
     dbar 0
     ret
 
-la64_lock_get_b:
-    dbar 0
-    ld.b $a0, $a0, 0
-    ret
-
-la64_lock_get_d:
-    dbar 0
-    ld.w $a0, $a0, 0
+la64_lock_cas_b:
+    ext.w.b     $a3, $a1
+    .word 0x385a1885 // amcas_db.b  $a1, $a2, $a0
+    xor         $a0, $a1, $a3
+    sltu        $a0, $zero, $a0
     ret
 
-la64_lock_get_dd:
-    dbar 0
-    ld.d $a0, $a0, 0
+la64_lock_cas_h:
+    ext.w.h     $a3, $a1
+    .word 0x385a9885 // amcas_db.h  $a1, $a2, $a0
+    xor         $a0, $a1, $a3
+    sltu        $a0, $zero, $a0
     ret
 
 la64_lock_cas_d:
     ll.w $a3, $a0, 0
-    bne  $a3, $a1, 16
+    bne  $a3, $a1, 1f
     sc.w $a2, $a0, 0
-    xori $a0, $a2, 1
+    xori $a0, $a2, 1 // sc return success in $a2, overwriting it! 0 = fail, 1 = success
     ret
+1:
     li.d $a0, 1
     ret
 
@@ -170,7 +182,7 @@ la64_lock_cas_dd:
     ll.d $a3, $a0, 0
     bne  $a3, $a1, 1f
     sc.d $a2, $a0, 0
-    xori $a0, $a2, 1    // sc return success in $a2, overwriting it! 0 = fail, 1 = success
+    xori $a0, $a2, 1
 1:
     ret
     li.d $a0, 1
@@ -187,3 +199,18 @@ la64_lock_cas_dq:
 1:
     li.d $a0, 1
     ret
+
+la64_lock_get_b:
+    dbar 0
+    ld.b $a0, $a0, 0
+    ret
+
+la64_lock_get_d:
+    dbar 0
+    ld.w $a0, $a0, 0
+    ret
+
+la64_lock_get_dd:
+    dbar 0
+    ld.d $a0, $a0, 0
+    ret
\ No newline at end of file
diff --git a/src/dynarec/la64/la64_lock.h b/src/dynarec/la64/la64_lock.h
index c757e08a..80cd80b2 100644
--- a/src/dynarec/la64/la64_lock.h
+++ b/src/dynarec/la64/la64_lock.h
@@ -2,6 +2,16 @@
 #define __LA64_LOCK__H__
 #include <stdint.h>
 
+extern int la64_lamcas;
+extern int la64_lam_bh;
+
+// Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
+extern int
+la64_lock_cas_d(void* p, int32_t ref, int32_t val);
+
+// Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
+extern int la64_lock_cas_dd(void* p, int64_t ref, int64_t val);
+
 // Atomically exchange value at [p] with val, return old p
 extern uintptr_t la64_lock_xchg_dd(void* p, uintptr_t val);
 
@@ -9,37 +19,40 @@ extern uintptr_t la64_lock_xchg_dd(void* p, uintptr_t val);
 extern uint32_t la64_lock_xchg_d(void* p, uint32_t val);
 
 // Atomically store value to [p] only if [p] is NULL. Return old [p] value
-extern uint32_t la64_lock_storeifnull_d(void*p, uint32_t val);
+extern uint32_t la64_lock_storeifnull_d(void* p, uint32_t val);
 
 // Atomically store value to [p] only if [p] is NULL. Return old [p] value
-extern void* la64_lock_storeifnull(void*p, void* val);
+extern void* la64_lock_storeifnull(void* p, void* val);
 
 // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
-extern void* la64_lock_storeifref(void*p, void* val, void* ref);
+extern void* la64_lock_storeifref(void* p, void* val, void* ref);
 
 // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
-extern uint32_t la64_lock_storeifref_d(void*p, uint32_t val, uint32_t ref);
+extern uint32_t la64_lock_storeifref_d(void* p, uint32_t val, uint32_t ref);
 
 // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
-extern uint32_t la64_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref);
+extern uint32_t la64_lock_storeifref2_d(void* p, uint32_t val, uint32_t ref);
 
 // decrement atomically the byte at [p] (but only if p not 0)
-extern void la64_lock_decifnot0b(void*p);
+extern void la64_lock_decifnot0b(void* p);
 
 // atomic store (with memory barrier)
-extern void la64_lock_storeb(void*p, uint8_t b);
+extern void la64_lock_storeb(void* p, uint8_t b);
 
 // increment atomically the int at [p] only if it was 0. Return the old value of [p]
-extern int la64_lock_incif0(void*p);
+extern int la64_lock_incif0(void* p);
 
 // decrement atomically the int at [p] (but only if p not 0)
-extern int la64_lock_decifnot0(void*p);
+extern int la64_lock_decifnot0(void* p);
 
 // atomic store (with memory barrier)
-extern void la64_lock_store(void*p, uint32_t v);
+extern void la64_lock_store(void* p, uint32_t v);
 
 // atomic store (with memory barrier)
-extern void la64_lock_store_dd(void*p, uint64_t v);
+extern void la64_lock_store_dd(void* p, uint64_t v);
+
+// (mostly) Atomically store val1 and val2 at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
+extern int la64_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2);
 
 // atomic get (with memory barrier)
 extern uint32_t la64_lock_get_b(void* p);
@@ -50,24 +63,13 @@ extern uint32_t la64_lock_get_d(void* p);
 // atomic get (with memory barrier)
 extern void* la64_lock_get_dd(void* p);
 
-// Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
-extern int la64_lock_cas_d(void* p, int32_t ref, int32_t val);
-
-// Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
-extern int la64_lock_cas_dd(void* p, int64_t ref, int64_t val);
-
-// (mostly) Atomically store val1 and val2 at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
-extern int la64_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2);
-
-// Not defined in assembler but in dynarec_rv64_functions
-uint8_t extract_byte(uint32_t val, void* address);
-uint32_t insert_byte(uint32_t val, uint8_t b, void* address);
-uint16_t extract_half(uint32_t val, void* address);
-uint32_t insert_half(uint32_t val, uint16_t h, void* address);
-
 uint8_t la64_lock_xchg_b(void* addr, uint8_t v);
-uint16_t la64_lock_xchg_h(void* addr, uint16_t v);
-int la64_lock_cas_b(void* p, uint8_t ref, uint8_t val);
-int la64_lock_cas_h(void* p, uint16_t ref, uint16_t val);
+extern int la64_lock_cas_b(void* p, uint8_t ref, uint8_t val);
+extern int la64_lock_cas_h(void* p, uint16_t ref, uint16_t val);
+
+// Not defined in assembler but in dynarec_la64_functions
+uint8_t la64_lock_xchg_b_slow(void* addr, uint8_t v);
+extern int la64_lock_cas_b_slow(void* p, uint8_t ref, uint8_t val);
+extern int la64_lock_cas_h_slow(void* p, uint16_t ref, uint16_t val);
 
-#endif  //__LA64_LOCK__H__
+#endif //__LA64_LOCK__H__
diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h
index b34700fe..33626a63 100644
--- a/src/dynarec/native_lock.h
+++ b/src/dynarec/native_lock.h
@@ -23,7 +23,7 @@
 #define native_lock_storeifref2_d(A, B, C)  arm64_lock_storeifref2_d(A, B, C)

 #define native_lock_storeifnull(A, B)       arm64_lock_storeifnull(A, B)

 #define native_lock_storeifnull_d(A, B)     arm64_lock_storeifnull_d(A, B)

-#define native_lock_decifnot0b(A)           arm64_lock_decifnot0b(A)

+// #define native_lock_decifnot0b(A)           arm64_lock_decifnot0b(A)

 #define native_lock_storeb(A, B)            arm64_lock_storeb(A, B)

 #define native_lock_incif0(A)               arm64_lock_incif0(A)

 #define native_lock_decifnot0(A)            arm64_lock_decifnot0(A)

@@ -37,7 +37,7 @@
 #include "rv64/rv64_lock.h"

 

 #define USE_CAS

-// RV64 is quite strict (or at least strongly recommand) on what you can do between an LD.A and an SD.A

+// RV64 is quite strict (or at least strongly recommand) on what you can do between an LR and an SC

 // That basicaly forbid to call a function, so there cannot be READ / WRITE separated

 // And so need to use a Compare and Swap mecanism instead

 

@@ -49,7 +49,7 @@
 #define native_lock_storeifref2_d(A, B, C)  rv64_lock_storeifref2_d(A, B, C)

 #define native_lock_storeifnull(A, B)       rv64_lock_storeifnull(A, B)

 #define native_lock_storeifnull_d(A, B)     rv64_lock_storeifnull_d(A, B)

-#define native_lock_decifnot0b(A)           rv64_lock_decifnot0b(A)

+// #define native_lock_decifnot0b(A)           rv64_lock_decifnot0b(A)

 #define native_lock_storeb(A, B)            rv64_lock_storeb(A, B)

 #define native_lock_incif0(A)               rv64_lock_incif0(A)

 #define native_lock_decifnot0(A)            rv64_lock_decifnot0(A)

@@ -57,11 +57,7 @@
 #define native_lock_store_dd(A, B)          rv64_lock_store_dd(A, B)

 #define native_lock_cas_d(A, B, C)          rv64_lock_cas_d(A, B, C)

 #define native_lock_cas_dd(A, B, C)         rv64_lock_cas_dd(A, B, C)

-

 #define native_lock_xchg_b(A, B)            rv64_lock_xchg_b(A, B)

-#define native_lock_cas_b(A, B, C)          rv64_lock_cas_b(A, B, C)

-#define native_lock_cas_h(A, B, C)          rv64_lock_cas_h(A, B, C)

-

 #define native_lock_read_b(A)               tmpcas=*(uint8_t*)(A)

 #define native_lock_write_b(A, B)           rv64_lock_cas_b(A, tmpcas, B)

 #define native_lock_read_h(A)               tmpcas=*(uint16_t*)(A)

@@ -71,7 +67,11 @@
 #define native_lock_read_dd(A)              tmpcas=*(uint64_t*)(A)

 #define native_lock_write_dd(A, B)          rv64_lock_cas_dd(A, tmpcas, B)

 // there is no atomic move on 16bytes, so faking it

-#define native_lock_read_dq(A, B, C)        *A=tmpcas=((uint64_t*)(C))[0]; *B=((uint64_t*)(C))[1];

+#define native_lock_read_dq(A, B, C)       \

+    do {                                   \

+        *A = tmpcas = ((uint64_t*)(C))[0]; \

+        *B = ((uint64_t*)(C))[1];          \

+    } while (0)

 #define native_lock_write_dq(A, B, C)       rv64_lock_cas_dq(C, A, tmpcas, B);

 #define native_lock_get_b(A)                rv64_lock_get_b(A)

 #define native_lock_get_d(A)                rv64_lock_get_d(A)

@@ -81,39 +81,46 @@
 #include "la64/la64_lock.h"

 

 #define USE_CAS

+// LA64 is quite strict (or at least strongly recommand) on what you can do between an LL and an SC

+// That basicaly forbid to call a function, so there cannot be READ / WRITE separated

+// And so need to use a Compare and Swap mecanism instead

 

-// no byte or 2-bytes atomic access on LA64

 #define native_lock_xchg_dd(A, B)           la64_lock_xchg_dd(A, B)

 #define native_lock_xchg_d(A, B)            la64_lock_xchg_d(A, B)

-#define native_lock_xchg_h(A, B)            la64_lock_xchg_h(A, B)

-#define native_lock_xchg_b(A, B)            la64_lock_xchg_b(A, B)

 #define native_lock_storeifref(A, B, C)     la64_lock_storeifref(A, B, C)

 #define native_lock_storeifref_d(A, B, C)   la64_lock_storeifref_d(A, B, C)

 #define native_lock_storeifref2_d(A, B, C)  la64_lock_storeifref2_d(A, B, C)

 #define native_lock_storeifnull(A, B)       la64_lock_storeifnull(A, B)

 #define native_lock_storeifnull_d(A, B)     la64_lock_storeifnull_d(A, B)

-#define native_lock_decifnot0b(A)           la64_lock_decifnot0b(A)

+// #define native_lock_decifnot0b(A)           la64_lock_decifnot0b(A)

 #define native_lock_storeb(A, B)            la64_lock_storeb(A, B)

 #define native_lock_incif0(A)               la64_lock_incif0(A)

 #define native_lock_decifnot0(A)            la64_lock_decifnot0(A)

 #define native_lock_store(A, B)             la64_lock_store(A, B)

 #define native_lock_store_dd(A, B)          la64_lock_store_dd(A, B)

-

-// there is no atomic move on 8bytes, so faking it

+#define native_lock_cas_d(A, B, C)          la64_lock_cas_d(A, B, C)

+#define native_lock_cas_dd(A, B, C)         la64_lock_cas_dd(A, B, C)

+#define native_lock_xchg_b(A, B) \

+    la64_lam_bh ? la64_lock_xchg_b(A, B) : la64_lock_xchg_b_slow(A, B)

 #define native_lock_read_b(A)               tmpcas=*(uint8_t*)(A)

-#define native_lock_write_b(A, B)           la64_lock_cas_b(A, tmpcas, B)

-// there is no atomic move on 16bytes, so faking it

+#define native_lock_write_b(A, B) \

+    la64_lamcas ? la64_lock_cas_b(A, tmpcas, B) : la64_lock_cas_b_slow(A, tmpcas, B)

 #define native_lock_read_h(A)               tmpcas=*(uint16_t*)(A)

-#define native_lock_write_h(A, B)           la64_lock_cas_h(A, tmpcas, B)

+#define native_lock_write_h(A, B) \

+    la64_lamcas ? la64_lock_cas_h(A, tmpcas, B) : la64_lock_cas_h_slow(A, tmpcas, B)

 #define native_lock_read_d(A)               tmpcas=*(uint32_t*)(A)

 #define native_lock_write_d(A, B)           la64_lock_cas_d(A, tmpcas, B)

 #define native_lock_read_dd(A)              tmpcas=*(uint64_t*)(A)

 #define native_lock_write_dd(A, B)          la64_lock_cas_dd(A, tmpcas, B)

-#define native_lock_read_dq(A, B, C)        *A=tmpcas=((uint64_t*)(C))[0]; *B=((uint64_t*)(C))[1];

-#define native_lock_write_dq(A, B, C)       la64_lock_cas_dq(C, A, tmpcas, B)

-#define native_lock_get_b(A)                la64_lock_get_b(A)

-#define native_lock_get_d(A)                la64_lock_get_d(A)

-#define native_lock_get_dd(A)               la64_lock_get_dd(A)

+#define native_lock_read_dq(A, B, C)       \

+    do {                                   \

+        *A = tmpcas = ((uint64_t*)(C))[0]; \

+        *B = ((uint64_t*)(C))[1];          \

+    } while (0)

+#define native_lock_write_dq(A, B, C) la64_lock_cas_dq(C, A, tmpcas, B);

+#define native_lock_get_b(A)          la64_lock_get_b(A)

+#define native_lock_get_d(A)          la64_lock_get_d(A)

+#define native_lock_get_dd(A)         la64_lock_get_dd(A)

 

 #else

 #error Unsupported architecture

diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c
index 9965b438..d366de48 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.c
+++ b/src/dynarec/rv64/dynarec_rv64_functions.c
@@ -492,24 +492,28 @@ void extcacheUnwind(extcache_t* cache)
     }
 }
 
-
-uint8_t extract_byte(uint32_t val, void* address){
+// will go badly if address is unaligned
+static uint8_t extract_byte(uint32_t val, void* address)
+{
     int idx = (((uintptr_t)address)&3)*8;
     return (val>>idx)&0xff;
 }
-uint32_t insert_byte(uint32_t val, uint8_t b, void* address){
+
+static uint32_t insert_byte(uint32_t val, uint8_t b, void* address)
+{
     int idx = (((uintptr_t)address)&3)*8;
     val&=~(0xff<<idx);
     val|=(((uint32_t)b)<<idx);
     return val;
 }
 
-// will go badly if address is unaligned
-uint16_t extract_half(uint32_t val, void* address){
+static uint16_t extract_half(uint32_t val, void* address)
+{
     int idx = (((uintptr_t)address)&3)*8;
     return (val>>idx)&0xffff;
 }
-uint32_t insert_half(uint32_t val, uint16_t h, void* address){
+static uint32_t insert_half(uint32_t val, uint16_t h, void* address)
+{
     int idx = (((uintptr_t)address)&3)*8;
     val&=~(0xffff<<idx);
     val|=(((uint32_t)h)<<idx);
diff --git a/src/dynarec/rv64/rv64_lock.h b/src/dynarec/rv64/rv64_lock.h
index 8f9d95b1..235ad46f 100644
--- a/src/dynarec/rv64/rv64_lock.h
+++ b/src/dynarec/rv64/rv64_lock.h
@@ -60,11 +60,6 @@ extern uint32_t rv64_lock_get_d(void* p);
 extern void* rv64_lock_get_dd(void* p);
 
 // Not defined in assembler but in dynarec_rv64_functions
-uint8_t extract_byte(uint32_t val, void* address);
-uint32_t insert_byte(uint32_t val, uint8_t b, void* address);
-uint16_t extract_half(uint32_t val, void* address);
-uint32_t insert_half(uint32_t val, uint16_t h, void* address);
-
 uint8_t rv64_lock_xchg_b(void* addr, uint8_t v);
 extern int rv64_lock_cas_b(void* p, uint8_t ref, uint8_t val);
 extern int rv64_lock_cas_h(void* p, uint16_t ref, uint16_t val);