From c4b57774f0b4794ad22e3e8c9fb4be760431b64e Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 2 Mar 2023 19:38:23 +0100 Subject: [DYNAREC] Finetuned the jump table memory allocation --- src/custommem.c | 229 ++++++++++++++++++------------- src/dynarec/arm64/dynarec_arm64_helper.c | 24 ++-- src/include/box64context.h | 1 - src/include/custommem.h | 13 ++ 4 files changed, 160 insertions(+), 107 deletions(-) (limited to 'src') diff --git a/src/custommem.c b/src/custommem.c index e261b62f..f0936b73 100644 --- a/src/custommem.c +++ b/src/custommem.c @@ -33,10 +33,11 @@ // init inside dynablocks.c static mmaplist_t *mmaplist = NULL; -static uintptr_t*** box64_jmptbl3[1<chunks[i].block = p; list->chunks[i].first = p; @@ -607,110 +623,112 @@ void cleanDBFromAddressRange(uintptr_t addr, size_t size, int destroy) } } -int addJumpTableIfDefault64(void* addr, void* jmp) +static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, uintptr_t idx3) { - uintptr_t idx3, idx2, idx1, idx0; - idx3 = (((uintptr_t)addr)>>48)&0xffff; - idx2 = (((uintptr_t)addr)>>32)&0xffff; - idx1 = (((uintptr_t)addr)>>16)&0xffff; - idx0 = (((uintptr_t)addr) )&0xffff; if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { - uintptr_t*** tbl = (uintptr_t***)box_malloc((1<>JMPTABL_START3)&JMPTABLE_MASK3; + idx2 = (((uintptr_t)addr)>>JMPTABL_START2)&JMPTABLE_MASK2; + idx1 = (((uintptr_t)addr)>>JMPTABL_START1)&JMPTABLE_MASK1; + idx0 = (((uintptr_t)addr) )&JMPTABLE_MASK0; - return (native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, native_next)==jmp)?1:0; + return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3), jmp, native_next)==jmp)?1:0; } void setJumpTableDefault64(void* addr) { uintptr_t idx3, idx2, idx1, idx0; - idx3 = (((uintptr_t)addr)>>48)&0xffff; + idx3 = (((uintptr_t)addr)>>JMPTABL_START3)&JMPTABLE_MASK3; if(box64_jmptbl3[idx3] == box64_jmptbldefault2) return; - idx2 = (((uintptr_t)addr)>>32)&0xffff; + idx2 = (((uintptr_t)addr)>>JMPTABL_START2)&JMPTABLE_MASK2; if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) return; - idx1 = (((uintptr_t)addr)>>16)&0xffff; + idx1 = (((uintptr_t)addr)>>JMPTABL_START1)&JMPTABLE_MASK1; if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) return; - idx0 = (((uintptr_t)addr) )&0xffff; + idx0 = (((uintptr_t)addr) )&JMPTABLE_MASK0; box64_jmptbl3[idx3][idx2][idx1][idx0] = (uintptr_t)native_next; } void setJumpTableDefaultRef64(void* addr, void* jmp) { uintptr_t idx3, idx2, idx1, idx0; - idx3 = (((uintptr_t)addr)>>48)&0xffff; + idx3 = (((uintptr_t)addr)>>JMPTABL_START3)&JMPTABLE_MASK3; if(box64_jmptbl3[idx3] == box64_jmptbldefault2) return; - idx2 = (((uintptr_t)addr)>>32)&0xffff; + idx2 = (((uintptr_t)addr)>>JMPTABL_START2)&JMPTABLE_MASK2; if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) return; - idx1 = (((uintptr_t)addr)>>16)&0xffff; + idx1 = (((uintptr_t)addr)>>JMPTABL_START1)&JMPTABLE_MASK1; if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) return; - idx0 = (((uintptr_t)addr) )&0xffff; + idx0 = (((uintptr_t)addr) )&JMPTABLE_MASK0; native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], native_next, jmp); } int setJumpTableIfRef64(void* addr, void* jmp, void* ref) { uintptr_t idx3, idx2, idx1, idx0; - idx3 = (((uintptr_t)addr)>>48)&0xffff; - idx2 = (((uintptr_t)addr)>>32)&0xffff; - idx1 = (((uintptr_t)addr)>>16)&0xffff; - idx0 = (((uintptr_t)addr) )&0xffff; - if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { - uintptr_t*** tbl = (uintptr_t***)box_malloc((1<>JMPTABL_START3)&JMPTABLE_MASK3; + idx2 = (((uintptr_t)addr)>>JMPTABL_START2)&JMPTABLE_MASK2; + idx1 = (((uintptr_t)addr)>>JMPTABL_START1)&JMPTABLE_MASK1; + idx0 = (((uintptr_t)addr) )&JMPTABLE_MASK0; + return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3), jmp, ref)==jmp)?1:0; } int isJumpTableDefault64(void* addr) { uintptr_t idx3, idx2, idx1, idx0; - idx3 = (((uintptr_t)addr)>>48)&0xffff; + idx3 = (((uintptr_t)addr)>>JMPTABL_START3)&JMPTABLE_MASK3; if(box64_jmptbl3[idx3] == box64_jmptbldefault2) return 1; - idx2 = (((uintptr_t)addr)>>32)&0xffff; + idx2 = (((uintptr_t)addr)>>JMPTABL_START2)&JMPTABLE_MASK2; if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) return 1; - idx1 = (((uintptr_t)addr)>>16)&0xffff; + idx1 = (((uintptr_t)addr)>>JMPTABL_START1)&JMPTABLE_MASK1; if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) return 1; - idx0 = (((uintptr_t)addr) )&0xffff; + idx0 = (((uintptr_t)addr) )&JMPTABLE_MASK0; return (box64_jmptbl3[idx3][idx2][idx1][idx0]==(uintptr_t)native_next)?1:0; } uintptr_t getJumpTable64() @@ -721,42 +739,20 @@ uintptr_t getJumpTable64() uintptr_t getJumpTableAddress64(uintptr_t addr) { uintptr_t idx3, idx2, idx1, idx0; - idx3 = ((addr)>>48)&0xffff; - idx2 = ((addr)>>32)&0xffff; - idx1 = ((addr)>>16)&0xffff; - idx0 = ((addr) )&0xffff; - if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { - uintptr_t*** tbl = (uintptr_t***)box_malloc((1<>JMPTABL_START3)&JMPTABLE_MASK3; + idx2 = ((addr)>>JMPTABL_START2)&JMPTABLE_MASK2; + idx1 = ((addr)>>JMPTABL_START1)&JMPTABLE_MASK1; + idx0 = ((addr) )&JMPTABLE_MASK0; + return (uintptr_t)create_jmptbl(idx0, idx1, idx2, idx3); } dynablock_t* getDB(uintptr_t addr) { uintptr_t idx3, idx2, idx1, idx0; - idx3 = ((addr)>>48)&0xffff; - idx2 = ((addr)>>32)&0xffff; - idx1 = ((addr)>>16)&0xffff; - idx0 = ((addr) )&0xffff; + idx3 = ((addr)>>JMPTABL_START3)&JMPTABLE_MASK3; + idx2 = ((addr)>>JMPTABL_START2)&JMPTABLE_MASK2; + idx1 = ((addr)>>JMPTABL_START1)&JMPTABLE_MASK1; + idx0 = ((addr) )&JMPTABLE_MASK0; /*if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { return NULL; } @@ -788,6 +784,10 @@ void protectDB(uintptr_t addr, uintptr_t size) if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); memprot[i].prot = newblock; +#ifdef TRACE_MEMSTAT + memprot_allocated += (1<<16) * sizeof(uint8_t); + if (memprot_allocated > memprot_max_allocated) memprot_max_allocated = memprot_allocated; +#endif } for (uintptr_t i=idx; i<=end; ++i) { uint32_t prot = memprot[i>>16].prot[i&0xffff]; @@ -966,6 +966,10 @@ void updateProtection(uintptr_t addr, size_t size, uint32_t prot) if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); memprot[i].prot = newblock; +#ifdef TRACE_MEMSTAT + memprot_allocated += (1<<16) * sizeof(uint8_t); + if (memprot_allocated > memprot_max_allocated) memprot_max_allocated = memprot_allocated; +#endif } for (uintptr_t i=idx; i<=end; ++i) { uint32_t old_prot = memprot[i>>16].prot[i&0xffff]; @@ -994,6 +998,10 @@ void setProtection(uintptr_t addr, size_t size, uint32_t prot) if(memprot[i].prot==memprot_default && prot) { uint8_t* newblock = box_calloc(MEMPROT_SIZE, sizeof(uint8_t)); memprot[i].prot = newblock; +#ifdef TRACE_MEMSTAT + memprot_allocated += (1<<16) * sizeof(uint8_t); + if (memprot_allocated > memprot_max_allocated) memprot_max_allocated = memprot_allocated; +#endif } if(prot || memprot[i].prot!=memprot_default) { uintptr_t bstart = ((i<<16) memprot_max_allocated) memprot_max_allocated = memprot_allocated; +#endif } } native_lock_storeb(&memprot[idx].hot[base], box64_dynarec_hotpage); @@ -1157,6 +1170,9 @@ void freeProtection(uintptr_t addr, size_t size) if(start==0 && finish==MEMPROT_SIZE-1) { memprot[key].prot = memprot_default; box_free(block); +#ifdef TRACE_MEMSTAT + memprot_allocated -= (1<<16) * sizeof(uint8_t); +#endif } else { memset(block+start, 0, (finish-start+1)*sizeof(uint8_t)); // blockempty is quite slow, so disable the free of blocks for now @@ -1170,6 +1186,9 @@ void freeProtection(uintptr_t addr, size_t size) uint8_t *hot = memprot[key].hot; memprot[key].hot = NULL; box_free(hot); +#ifdef TRACE_MEMSTAT + memprot_allocated -= (1<<16) * sizeof(uint8_t); +#endif } i+=finish-start; // +1 from the "for" loop } @@ -1323,12 +1342,14 @@ void init_custommem_helper(box64context_t* ctx) init_mutexes(); #ifdef DYNAREC if(box64_dynarec) - for(int i=0; i<(1< njmps_in_lv1_max) njmps_in_lv1_max = njmps_in_cur_lv1; + } + } + } + printf_log(LOG_INFO, "Allocation:\n- dynarec: %lld kio\n- customMalloc: %lld kio\n- memprot: %lld kio (peak at %lld kio)\n- jump table: %lld kio (%lld level 3, %lld level 2, %lld level 1 table allocated, for %lld jumps, with at most %lld per level 1)\n", dynarec_allocated / 1024, customMalloc_allocated / 1024, memprot_allocated / 1024, memprot_max_allocated / 1024, jmptbl_allocated / 1024, jmptbl_allocated3, jmptbl_allocated2, jmptbl_allocated1, njmps, njmps_in_lv1_max); +#endif if(!inited) return; inited = 0; @@ -1370,11 +1411,11 @@ void fini_custommem_helper(box64context_t *ctx) } box_free(mmaplist); - for (int i3=0; i3<(1<