about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c18
-rw-r--r--src/dynarec/arm64/dynarec_arm64_64.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_67.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h10
-rw-r--r--src/dynarec/arm64/dynarec_arm64_pass2.h2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_pass3.h2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h3
-rw-r--r--src/dynarec/dynablock.c46
-rw-r--r--src/dynarec/dynablock_private.h7
-rw-r--r--src/dynarec/dynarec_arch.h3
-rw-r--r--src/dynarec/dynarec_native.c41
-rw-r--r--src/dynarec/dynarec_native_pass.c5
-rw-r--r--src/dynarec/dynarec_private.h1
-rw-r--r--src/dynarec/la64/dynarec_la64_private.h4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h4
15 files changed, 151 insertions, 19 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 0b828704..0a22ac90 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -16,6 +16,7 @@
 #include "bridge.h"
 #include "x64trace.h"
 #include "dynarec_native.h"
+#include "../dynablock_private.h"
 #include "custommem.h"
 #include "alternate.h"
 
@@ -3386,11 +3387,17 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         // Push actual return address
                         if(addr < (dyn->start+dyn->isize)) {
                             // there is a next...
-                            j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
+                            if(BOX64DRENV(dynarec_callret)>1)
+                                j64 = CALLRET_GETRET();
+                            else
+                                j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
                             ADR_S20(x4, j64);
                             MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);
                         } else {
-                            j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0;
+                            if(BOX64DRENV(dynarec_callret)>1)
+                                j64 = CALLRET_GETRET();
+                            else
+                                j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0;
                             ADR_S20(x4, j64);
                             MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);
                         }
@@ -3404,6 +3411,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     else
                         j64 = addr+i32;
                     jump_to_next(dyn, j64, 0, ninst, rex.is32bits);
+                    if(BOX64DRENV(dynarec_callret)>1) CALLRET_RET();
                     if (BOX64DRENV(dynarec_callret) && addr >= (dyn->start + dyn->isize)) {
                         // jumps out of current dynablock...
                         MARK;
@@ -4046,7 +4054,10 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         // Push actual return address
                         if(addr < (dyn->start+dyn->isize)) {
                             // there is a next...
-                            j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
+                            if(BOX64DRENV(dynarec_callret)>1)
+                                j64 = CALLRET_GETRET();
+                            else
+                                j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
                             ADR_S20(x4, j64);
                             MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);
                         } else {
@@ -4058,6 +4069,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     PUSH1z(xRIP);
                     jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
+                    if(BOX64DRENV(dynarec_callret)>1) CALLRET_RET();
                     if (BOX64DRENV(dynarec_callret) && addr >= (dyn->start + dyn->isize)) {
                         // jumps out of current dynablock...
                         MARK;
diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c
index f7265aad..1f9edc20 100644
--- a/src/dynarec/arm64/dynarec_arm64_64.c
+++ b/src/dynarec/arm64/dynarec_arm64_64.c
@@ -14,6 +14,7 @@
 #include "emu/x64run_private.h"
 #include "x64trace.h"
 #include "dynarec_native.h"
+#include "../dynablock_private.h"
 #include "custommem.h"
 
 #include "arm64_printer.h"
@@ -1602,11 +1603,17 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         // Push actual return address
                         if(addr < (dyn->start+dyn->isize)) {
                             // there is a next...
-                            j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
+                            if(BOX64DRENV(dynarec_callret)>1)
+                                j64 = CALLRET_GETRET();
+                            else
+                                j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
                             ADR_S20(x4, j64);
                             MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);
                         } else {
-                            j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0;
+                            if(BOX64DRENV(dynarec_callret)>1)
+                                j64 = CALLRET_GETRET();
+                            else
+                                j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0;
                             ADR_S20(x4, j64);
                             MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);
                         }
@@ -1614,6 +1621,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     PUSH1z(xRIP);
                     jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
+                    if(BOX64DRENV(dynarec_callret)>1) CALLRET_RET();
                     if (BOX64DRENV(dynarec_callret) && addr >= (dyn->start + dyn->isize)) {
                         // jumps out of current dynablock...
                         MARK;
diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c
index 1d95bb6b..3d9a7b35 100644
--- a/src/dynarec/arm64/dynarec_arm64_67.c
+++ b/src/dynarec/arm64/dynarec_arm64_67.c
@@ -13,6 +13,7 @@
 #include "emu/x64run_private.h"

 #include "x64trace.h"

 #include "dynarec_native.h"

+#include "../dynablock_private.h"

 #include "custommem.h"

 

 #include "arm64_printer.h"

@@ -1708,11 +1709,17 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         // Push actual return address

                         if(addr < (dyn->start+dyn->isize)) {

                             // there is a next...

-                            j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;

+                            if(BOX64DRENV(dynarec_callret)>1)

+                                j64 = CALLRET_GETRET();

+                            else

+                                j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;

                             ADR_S20(x4, j64);

                             MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);

                         } else {

-                            j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0;

+                            if(BOX64DRENV(dynarec_callret)>1)

+                                j64 = CALLRET_GETRET();

+                            else

+                                j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0;

                             ADR_S20(x4, j64);

                             MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);

                         }

@@ -1720,6 +1727,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }

                     PUSH1z(xRIP);

                     jump_to_next(dyn, 0, ed, ninst, rex.is32bits);

+                    if(BOX64DRENV(dynarec_callret)>1) CALLRET_RET();

                     if(BOX64DRENV(dynarec_callret) && addr >= (dyn->start + dyn->isize)) {

                         // jumps out of current dynablock...

                         MARK;

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 771e80f5..2e152dfb 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -988,6 +988,16 @@
 #define IF_ALIGNED(A) if (!dyn->insts[ninst].unaligned)
 #endif
 
+#ifndef CALLRET_RET
+#define CALLRET_RET()   NOP
+#endif
+#ifndef CALLRET_GETRET
+#define CALLRET_GETRET()    (dyn->callrets?(dyn->callrets[dyn->callret_size].offs-dyn->native_size):0)
+#endif
+#ifndef CALLRET_LOOP
+#define CALLRET_LOOP()  NOP
+#endif
+
 #define STORE_REG(A)    STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 #define STP_REGS(A, B)  STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A]))
 #define LDP_REGS(A, B)  LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A]))
diff --git a/src/dynarec/arm64/dynarec_arm64_pass2.h b/src/dynarec/arm64/dynarec_arm64_pass2.h
index 0975908e..6fcb5b50 100644
--- a/src/dynarec/arm64/dynarec_arm64_pass2.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass2.h
@@ -17,3 +17,5 @@
 #define INST_NAME(name) 
 #define TABLE64(A, V)   {Table64(dyn, (V), 2); EMIT(0);}
 #define FTABLE64(A, V)  {mmx87_regs_t v = {.d = V}; Table64(dyn, v.q, 2); EMIT(0);}
+#define CALLRET_RET()   do {dyn->callrets[dyn->callret_size].type = 0; dyn->callrets[dyn->callret_size++].offs = dyn->native_size; EMIT(ARCH_NOP); } while(0)
+#define CALLRET_LOOP()   do {dyn->callrets[dyn->callret_size].type = 1; dyn->callrets[dyn->callret_size++].offs = dyn->native_size; EMIT(ARCH_NOP); } while(0)
\ No newline at end of file
diff --git a/src/dynarec/arm64/dynarec_arm64_pass3.h b/src/dynarec/arm64/dynarec_arm64_pass3.h
index b274cabb..a0d79f30 100644
--- a/src/dynarec/arm64/dynarec_arm64_pass3.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass3.h
@@ -23,3 +23,5 @@
 #define INST_NAME(name) inst_name_pass3(dyn, ninst, name, rex)
 #define TABLE64(A, V)   {int val64offset = Table64(dyn, (V), 3); MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);}
 #define FTABLE64(A, V)  {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q, 3); MESSAGE(LOG_DUMP, "  FTable64: %g\n", v.d); VLDR64_literal(A, val64offset);}
+#define CALLRET_RET()   do {dyn->callrets[dyn->callret_size].type = 0; dyn->callrets[dyn->callret_size++].offs = dyn->native_size; EMIT(ARCH_NOP); } while(0)
+#define CALLRET_LOOP()   do {dyn->callrets[dyn->callret_size].type = 1; dyn->callrets[dyn->callret_size++].offs = dyn->native_size; EMIT(ARCH_NOP); } while(0)
\ No newline at end of file
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index 740e7e9e..1c5f7008 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -47,6 +47,7 @@ typedef union sse_cache_s {
         uint8_t write:1;
     };
 } sse_cache_t;
+typedef struct callret_s callret_t;
 typedef struct neoncache_s {
     // Neon cache
     neon_cache_t        neoncache[32];
@@ -157,6 +158,8 @@ typedef struct dynarec_arm_s {
     dynablock_t*        dynablock;
     instsize_t*         instsize;
     size_t              insts_size; // size of the instruction size array (calculated)
+    int                 callret_size;   // size of the array
+    callret_t*          callrets;   // arrey of callret return, with NOP / UDF depending if the block is clean or dirty
     uintptr_t           forward;    // address of the last end of code while testing forward
     uintptr_t           forward_to; // address of the next jump to (to check if everything is ok)
     int32_t             forward_size;   // size at the forward point
diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c
index c12210b5..f0a8bbd9 100644
--- a/src/dynarec/dynablock.c
+++ b/src/dynarec/dynablock.c
@@ -50,15 +50,24 @@ dynablock_t* InvalidDynablock(dynablock_t* db, int need_lock)
         db->done = 0;
         db->gone = 1;
         uintptr_t db_size = db->x64_size;
+        #ifdef ARCH_NOP
+        if(db->callret_size) {
+            // mark all callrets to UDF
+            for(int i=0; i<db->callret_size; ++i)
+                *(uint32_t*)(db->block+db->callrets[i].offs) = ARCH_NOP;
+            ClearCache(db->block, db->size);
+        }
+        #endif
         if(db_size && my_context) {
             uint32_t n = rb_get(my_context->db_sizes, db_size);
             if(n>1)
                 rb_set(my_context->db_sizes, db_size, db_size+1, n-1);
-            else
+            else {
                 rb_unset(my_context->db_sizes, db_size, db_size+1);
-            if(db_size == my_context->max_db_size) {
-                my_context->max_db_size = rb_get_righter(my_context->db_sizes);
-                dynarec_log(LOG_INFO, "BOX64 Dynarec: lower max_db=%d\n", my_context->max_db_size);
+                if(db_size == my_context->max_db_size) {
+                    my_context->max_db_size = rb_get_righter(my_context->db_sizes);
+                    dynarec_log(LOG_INFO, "BOX64 Dynarec: lower max_db=%d\n", my_context->max_db_size);
+                }
             }
         }
         if(need_lock)
@@ -135,7 +144,14 @@ void MarkDynablock(dynablock_t* db)
                 else
                     db->previous = old;
             }
+        } 
+        #ifdef ARCH_NOP
+        else if(db->callret_size) {
+            // mark all callrets to UDF
+            for(int i=0; i<db->callret_size; ++i)
+                *(uint32_t*)(db->block+db->callrets[i].offs) = ARCH_UDF;
         }
+        #endif
     }
 }
 
@@ -290,8 +306,17 @@ dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, int is32bits)
             dynarec_log(LOG_DEBUG, "Validating block %p from %p:%p (hash:%X, always_test:%d) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, db->hash, db->always_test, (void*)addr);
             if(db->always_test)
                 protectDB((uintptr_t)db->x64_addr, db->x64_size);
-            else
+            else {
+                #ifdef ARCH_NOP
+                if(db->callret_size) {
+                    // mark all callrets to UDF
+                    for(int i=0; i<db->callret_size; ++i)
+                        *(uint32_t*)(db->block+db->callrets[i].offs) = ARCH_NOP;
+                    ClearCache(db->block, db->size);
+                }
+                #endif
                 protectDBJumpTable((uintptr_t)db->x64_addr, db->x64_size, db->block, db->jmpnext);
+            }
         }
         if(!need_lock)
             mutex_unlock(&my_context->mutex_dyndump);
@@ -326,8 +351,17 @@ dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr,
         } else {
             if(db->always_test)
                 protectDB((uintptr_t)db->x64_addr, db->x64_size);
-            else
+            else {
+                #ifdef ARCH_NOP
+                if(db->callret_size) {
+                    // mark all callrets to UDF
+                    for(int i=0; i<db->callret_size; ++i)
+                        *(uint32_t*)(db->block+db->callrets[i].offs) = ARCH_NOP;
+                    ClearCache(db->block, db->size);
+                }
+                #endif
                 protectDBJumpTable((uintptr_t)db->x64_addr, db->x64_size, db->block, db->jmpnext);
+            }
         }
         if(!need_lock)
             mutex_unlock(&my_context->mutex_dyndump);
diff --git a/src/dynarec/dynablock_private.h b/src/dynarec/dynablock_private.h
index b9e5f55d..8e174a63 100644
--- a/src/dynarec/dynablock_private.h
+++ b/src/dynarec/dynablock_private.h
@@ -6,6 +6,11 @@ typedef struct instsize_s {
     unsigned char nat:4;
 } instsize_t;
 
+typedef struct callret_s {
+    uint32_t    offs:31;
+    uint32_t    type:1;
+} callret_t;
+
 typedef struct dynablock_s {
     void*           block;  // block-sizeof(void*) == self
     void*           actual_block;   // the actual start of the block (so block-sizeof(void*))
@@ -23,6 +28,8 @@ typedef struct dynablock_s {
     instsize_t*     instsize;
     void*           arch;       // arch dependant per inst info (can be NULL)
     size_t          arch_size;  // size of of arch dependant infos
+    int             callret_size;   // size of the array
+    callret_t*      callrets;   // array of callret return, with NOP / UDF depending if the block is clean or dirty
     void*           jmpnext;    // a branch jmpnext code when block is marked
 } dynablock_t;
 
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h
index eaf64fd8..44d767b2 100644
--- a/src/dynarec/dynarec_arch.h
+++ b/src/dynarec/dynarec_arch.h
@@ -32,6 +32,9 @@
 #define ARCH_UNALIGNED(A, B) arch_unaligned(A, B)

 extern uint32_t arm64_crc(void* p, uint32_t len);

 #define ARCH_CRC(A, B)  if(arm64_crc32) return arm64_crc(A, B)

+

+#define ARCH_NOP    0b11010101000000110010000000011111

+#define ARCH_UDF    0xcafe

 #elif defined(LA64)

 

 #define instruction_native_t        instruction_la64_t

diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index 58aa4493..df6ab348 100644
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -504,6 +504,7 @@ static int static_jmps[MAX_INSTS+2];
 static uintptr_t static_next[MAX_INSTS+2];
 static uint64_t static_table64[(MAX_INSTS+3)/4];
 static instruction_native_t static_insts[MAX_INSTS+2] = {0};
+static callret_t static_callrets[MAX_INSTS+2] = {0};
 // TODO: ninst could be a uint16_t instead of an int, that could same some temp. memory
 
 void ClearCache(void* start, size_t len)
@@ -653,7 +654,12 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
         int i = helper.jmps[ii];
         uintptr_t j = helper.insts[i].x64.jmp;
         helper.insts[i].x64.jmp_insts = -1;
-        if(j<start || j>=end || j==helper.insts[i].x64.addr) {
+        #ifndef ARCH_NOP
+        if(j<start || j>=end || j==helper.insts[i].x64.addr)
+        #else
+        if(j<start || j>=end)
+        #endif
+        {
             helper.insts[i].x64.need_after |= X_PEND;
         } else {
             // find jump address instruction
@@ -691,8 +697,12 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
                     helper.insts[k].x64.barrier |= BARRIER_FULL;
                 // special case, loop on itself with some nop in between
                 if(k<i && !helper.insts[i].x64.has_next && is_nops(&helper, helper.insts[k].x64.addr, helper.insts[i].x64.addr-helper.insts[k].x64.addr)) {
+                    #ifndef ARCH_NOP
                     helper.always_test = 1;
                     k = -1;
+                    #else
+                    helper.insts[k].x64.self_loop = 1;
+                    #endif
                 }
                 helper.insts[i].x64.jmp_insts = k;
             }
@@ -737,7 +747,11 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
     for(int ii=0; ii<helper.jmp_sz && !helper.always_test; ++ii) {
         int i = helper.jmps[ii];
         if(helper.insts[i].x64.alive && (helper.insts[i].x64.jmp==helper.insts[i].x64.addr)) {
+            #ifndef ARCH_NOP
             helper.always_test = 1;
+            #else
+            helper.insts[i].x64.self_loop = 1;
+            #endif
         }
     }
     // no need for next anymore
@@ -753,6 +767,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
     }
     
     // pass 2, instruction size
+    helper.callrets = static_callrets;
     native_pass2(&helper, addr, alternate, is32bits, inst_max);
     if(helper.abort) {
         if(BOX64DRENV(dynarec_dump) || BOX64ENV(dynarec_log))dynarec_log(LOG_NONE, "Abort dynablock on pass2\n");
@@ -778,15 +793,17 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
     size_t insts_rsize = (helper.insts_size+2)*sizeof(instsize_t);
     insts_rsize = (insts_rsize+7)&~7;   // round the size...
     size_t arch_size = ARCH_SIZE(&helper);
+    size_t callret_size = helper.callret_size*4;
     // ok, now allocate mapped memory, with executable flag on
-    size_t sz = sizeof(void*) + native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + insts_rsize + arch_size;
-    //           dynablock_t*     block (arm insts)            table64               jmpnext code       instsize     arch
+    size_t sz = sizeof(void*) + native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + insts_rsize + arch_size + callret_size;
+    //           dynablock_t*     block (arm insts)            table64               jmpnext code       instsize     arch         callrets
     void* actual_p = (void*)AllocDynarecMap(sz);
     void* p = (void*)(((uintptr_t)actual_p) + sizeof(void*));
     void* tablestart = p + native_size;
     void* next = tablestart + helper.table64size*sizeof(uint64_t);
     void* instsize = next + 4*sizeof(void*);
     void* arch = instsize + insts_rsize;
+    void* callrets = arch + arch_size;
     if(actual_p==NULL) {
         dynarec_log(LOG_INFO, "AllocDynarecMap(%p, %zu) failed, canceling block\n", block, sz);
         CancelBlock64(0);
@@ -801,9 +818,13 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
     *(dynablock_t**)actual_p = block;
     helper.table64cap = helper.table64size;
     helper.table64 = (uint64_t*)helper.tablestart;
+    helper.callrets = (callret_t*)callrets;
+    if(callret_size)
+        memcpy(helper.callrets, static_callrets, helper.callret_size*sizeof(callret_t));
+    helper.callret_size = 0;
     // pass 3, emit (log emit native opcode)
     if(BOX64DRENV(dynarec_dump)) {
-        dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for %u %s bytes (native=%zu, table64=%zu, instsize=%zu, arch=%zu)", (BOX64DRENV(dynarec_dump)>1)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize, is32bits?"x86":"x64", native_size, helper.table64size*sizeof(uint64_t), insts_rsize, arch_size); 
+        dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for %u %s bytes (native=%zu, table64=%zu, instsize=%zu, arch=%zu, callrets=%zu)", (BOX64DRENV(dynarec_dump)>1)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize, is32bits?"x86":"x64", native_size, helper.table64size*sizeof(uint64_t), insts_rsize, arch_size, callret_size);
         printFunctionAddr(helper.start, " => ");
         dynarec_log(LOG_NONE, "%s\n", (BOX64DRENV(dynarec_dump)>1)?"\e[m":"");
     }
@@ -846,6 +867,8 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
         block->arch = NULL;
         block->arch_size = arch_size;
     }
+    block->callret_size = helper.callret_size;
+    block->callrets = helper.callrets;
     *(dynablock_t**)next = block;
     *(void**)(next+3*sizeof(void*)) = native_next;
     CreateJmpNext(block->jmpnext, next+3*sizeof(void*));
@@ -889,7 +912,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
         printf_log(LOG_NONE, "Warning, insts_size difference in block between pass2 (%zu) and pass3 (%zu), allocated: %zu\n", oldinstsize, helper.insts_size, insts_rsize/sizeof(instsize_t));
     }
     if(!isprotectedDB(addr, end-addr)) {
-        dynarec_log(LOG_DEBUG, "Warning, block unprotected while being processed %p:%ld, marking as need_test\n", block->x64_addr, block->x64_size);
+        dynarec_log(LOG_INFO, "Warning, block unprotected while being processed %p:%ld, marking as need_test\n", block->x64_addr, block->x64_size);
         block->dirty = 1;
         //protectDB(addr, end-addr);
     }
@@ -898,7 +921,13 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
         block->always_test = 1;
     }
     if(block->always_test) {
-        dynarec_log(LOG_DEBUG, "Note: block marked as always dirty %p:%ld\n", block->x64_addr, block->x64_size);
+        dynarec_log(LOG_INFO, "Note: block marked as always dirty %p:%ld\n", block->x64_addr, block->x64_size);
+        #ifdef ARCH_NOP
+        // mark callrets to trigger SIGILL to check clean state
+        if(block->callret_size)
+            for(int i=0; i<block->callret_size; ++i)
+                *(uint32_t*)(block->block+block->callrets[i].offs) = ARCH_UDF;
+        #endif
     }
     current_helper = NULL;
     //block->done = 1;
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index bf30503c..99cb2449 100644
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -14,6 +14,7 @@
 #include "x64trace.h"
 #include "dynablock.h"
 #include "dynarec_native.h"
+#include "dynablock_private.h"
 #include "custommem.h"
 #include "elfloader.h"
 #include "x64test.h"
@@ -114,6 +115,10 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
         dyn->f.dfnone_here = 0;
         NEW_INST;
         MESSAGE(LOG_DUMP, "New Instruction %s:%p, native:%p\n", is32bits?"x86":"x64",(void*)addr, (void*)dyn->block);
+        #ifdef ARCH_NOP
+        if(dyn->insts[ninst].x64.alive && dyn->insts[ninst].x64.self_loop)
+            CALLRET_LOOP();
+        #endif
         if(!ninst) {
             GOTEST(x1, x2);
         }
diff --git a/src/dynarec/dynarec_private.h b/src/dynarec/dynarec_private.h
index 111e7c74..9e3c55be 100644
--- a/src/dynarec/dynarec_private.h
+++ b/src/dynarec/dynarec_private.h
@@ -41,6 +41,7 @@ typedef struct instruction_x64_s {
     uint8_t     has_next:1;   // does this opcode can continue to the next?
     uint8_t     has_callret:1;    // this instruction have an optimized call setup
     uint8_t     alive:1;    // this opcode gets executed (0 if dead code in that block)
+    uint8_t     self_loop:1;    // this is a landing address for a self-loop (loop on itslef with no exit)
     uint8_t     barrier;    // next instruction is a jump point, so no optim allowed
     uint8_t     state_flags;// One of SF_XXX state
     uint8_t     use_flags;  // 0 or combination of X_?F
diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h
index 7c40ca27..0246007e 100644
--- a/src/dynarec/la64/dynarec_la64_private.h
+++ b/src/dynarec/la64/dynarec_la64_private.h
@@ -68,6 +68,8 @@ typedef struct flagcache_s {
     uint8_t             dfnone_here;// defered flags is cleared in this opcode
 } flagcache_t;
 
+typedef struct callret_s callret_t;
+
 typedef struct instruction_la64_s {
     instruction_x64_t   x64;
     uintptr_t           address;    // (start) address of the arm emitted instruction
@@ -134,6 +136,8 @@ typedef struct dynarec_la64_s {
     dynablock_t*         dynablock;
     instsize_t*          instsize;
     size_t               insts_size; // size of the instruction size array (calculated)
+    int                  callret_size;   // size of the array
+    callret_t*           callrets;   // arrey of callret return, with NOP / UDF depending if the block is clean or dirty
     uintptr_t            forward;    // address of the last end of code while testing forward
     uintptr_t            forward_to; // address of the next jump to (to check if everything is ok)
     int32_t              forward_size;   // size at the forward point
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index 99281462..7519d99c 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -100,6 +100,8 @@ typedef struct flagcache_s {
     uint8_t             dfnone_here;// defered flags is cleared in this opcode
 } flagcache_t;
 
+typedef struct callret_s callret_t;
+
 typedef struct instruction_rv64_s {
     instruction_x64_t   x64;
     uintptr_t           address;    // (start) address of the riscv emitted instruction
@@ -169,6 +171,8 @@ typedef struct dynarec_rv64_s {
     dynablock_t*        dynablock;
     instsize_t*         instsize;
     size_t              insts_size; // size of the instruction size array (calculated)
+    int                 callret_size;   // size of the array
+    callret_t*          callrets;   // arrey of callret return, with NOP / UDF depending if the block is clean or dirty
     uint8_t             smwrite;    // for strongmem model emulation
     uintptr_t           forward;    // address of the last end of code while testing forward
     uintptr_t           forward_to; // address of the next jump to (to check if everything is ok)