diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-10-24 19:06:23 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-10-24 19:06:23 +0200 |
| commit | 2bd3a3ccfae661e764ba8dded64dcf8203704728 (patch) | |
| tree | 79f48bdb9b02f07d1953979034c5320281c3593a /src/custommem.c | |
| parent | 1886c454cabad5470aa73195096e8b7b5a284186 (diff) | |
| download | box64-2bd3a3ccfae661e764ba8dded64dcf8203704728.tar.gz box64-2bd3a3ccfae661e764ba8dded64dcf8203704728.zip | |
Optimized (and small fixes) to custom allocator ([DYNAREC] Speedup long launch time)
Diffstat (limited to 'src/custommem.c')
| -rw-r--r-- | src/custommem.c | 202 |
1 files changed, 134 insertions, 68 deletions
diff --git a/src/custommem.c b/src/custommem.c index a2c3e992..92d6a4f4 100644 --- a/src/custommem.c +++ b/src/custommem.c @@ -32,8 +32,9 @@ KHASH_MAP_INIT_INT64(dynablocks, dynablock_t*) static dynablocklist_t*** dynmap123[1<<DYNAMAP_SHIFT]; // 64bits.. in 4x16bits array static pthread_mutex_t mutex_mmap; -static mmaplist_t *mmaplist; -static size_t mmapsize; +static mmaplist_t *mmaplist = NULL; +static size_t mmapsize = 0; +static size_t mmapcap = 0; static kh_dynablocks_t *dblist_oversized; // store the list of oversized dynablocks (normal sized are inside mmaplist) static uintptr_t*** box64_jmptbl3[1<<JMPTABL_SHIFT]; static uintptr_t** box64_jmptbldefault2[1<<JMPTABL_SHIFT]; @@ -62,8 +63,8 @@ static blocklist_t* p_blocks = NULL; // actual blocks for custom mallo typedef union mark_s { struct { - unsigned int fill:1; unsigned int size:31; + unsigned int fill:1; }; uint32_t x32; } mark_t; @@ -72,86 +73,123 @@ typedef struct blockmark_s { mark_t next; } blockmark_t; +#define NEXT_BLOCK(b) (blockmark_t*)((uintptr_t)(b) + (b)->next.size + sizeof(blockmark_t)) +#define PREV_BLOCK(b) (blockmark_t*)(((uintptr_t)(b) - (b)->prev.size) - sizeof(blockmark_t)) +#define LAST_BLOCK(b, s) (blockmark_t*)(((uintptr_t)(b)+(s))-sizeof(blockmark_t)) + +static void printBlock(blockmark_t* b, void* start) +{ + printf_log(LOG_INFO, "========== Block is:\n"); + do { + printf_log(LOG_INFO, "%c%p, fill=%d, size=0x%x (prev=%d/0x%x)\n", b==start?'*':' ', b, b->next.fill, b->next.size, b->prev.fill, b->prev.size); + b = NEXT_BLOCK(b); + } while(b->next.x32); + printf_log(LOG_INFO, "===================\n"); +} // get first subblock free in block. Return NULL if no block, else first subblock free (mark included), filling size -static void* getFirstBlock(void* block, size_t maxsize, size_t* size) +static void* getFirstBlock(void* block, size_t maxsize, size_t* size, void* start) { // get start of block - blockmark_t *m = (blockmark_t*)block; + blockmark_t *m = (blockmark_t*)((start)?start:block); while(m->next.x32) { // while there is a subblock - if(!m->next.fill && m->next.size>=maxsize+sizeof(blockmark_t)) { - *size = m->next.size-sizeof(blockmark_t); + if(!m->next.fill && m->next.size>=maxsize) { + *size = m->next.size; return m; } - m = (blockmark_t*)((uintptr_t)m + m->next.size); + m = NEXT_BLOCK(m); } return NULL; } -static size_t getMaxFreeBlock(void* block, size_t block_size) +static size_t getMaxFreeBlock(void* block, size_t block_size, void* start) { // get start of block - blockmark_t *m = (blockmark_t*)((uintptr_t)block+block_size-sizeof(blockmark_t)); // start with the end - int maxsize = 0; - while(m->prev.x32) { // while there is a subblock - if(!m->prev.fill && m->prev.size>maxsize) { - maxsize = m->prev.size; - if((uintptr_t)block+maxsize>(uintptr_t)m) - return (maxsize>=sizeof(blockmark_t))?(maxsize-sizeof(blockmark_t)):0; // no block large enough left... + if(start) { + blockmark_t *m = (blockmark_t*)start; + int maxsize = 0; + while(m->next.x32) { // while there is a subblock + if(!m->next.fill && m->next.size>maxsize) { + maxsize = m->next.size; + } + m = NEXT_BLOCK(m); } - m = (blockmark_t*)((uintptr_t)m - m->prev.size); + return (maxsize>=sizeof(blockmark_t))?maxsize:0; + } else { + blockmark_t *m = LAST_BLOCK(block, block_size); // start with the end + int maxsize = 0; + while(m->prev.x32) { // while there is a subblock + if(!m->prev.fill && m->prev.size>maxsize) { + maxsize = m->prev.size; + if((uintptr_t)block+maxsize>(uintptr_t)m) + return (maxsize>=sizeof(blockmark_t))?maxsize:0; // no block large enough left... + } + m = PREV_BLOCK(m); + } + return (maxsize>=sizeof(blockmark_t))?maxsize:0; } - return (maxsize>=sizeof(blockmark_t))?(maxsize-sizeof(blockmark_t)):0; } -static void* allocBlock(void* block, void *sub, size_t size) +static void* allocBlock(void* block, void *sub, size_t size, void** pstart) { (void)block; blockmark_t *s = (blockmark_t*)sub; - blockmark_t *n = (blockmark_t*)((uintptr_t)s + s->next.size); + blockmark_t *n = NEXT_BLOCK(s); s->next.fill = 1; - s->next.size = size+sizeof(blockmark_t); - blockmark_t *m = (blockmark_t*)((uintptr_t)s + s->next.size); // this is new n + // check if a new mark is worth it + if(s->next.size>size+2*sizeof(blockmark_t)) + s->next.size = size; + blockmark_t *m = NEXT_BLOCK(s); // this is new n m->prev.fill = 1; - m->prev.size = s->next.size; if(n!=m) { // new mark - m->prev.fill = 1; m->prev.size = s->next.size; m->next.fill = 0; - m->next.size = (uintptr_t)n - (uintptr_t)m; + m->next.size = ((uintptr_t)n - (uintptr_t)m) - sizeof(blockmark_t); n->prev.fill = 0; n->prev.size = m->next.size; } + if(pstart && sub==*pstart) { + // get the next free block + m = (blockmark_t*)*pstart; + while(m->next.fill) + m = NEXT_BLOCK(m); + *pstart = (void*)m; + } return (void*)((uintptr_t)sub + sizeof(blockmark_t)); } -static void freeBlock(void *block, void* sub) +static size_t freeBlock(void *block, void* sub, void** pstart) { blockmark_t *m = (blockmark_t*)block; blockmark_t *s = (blockmark_t*)sub; - blockmark_t *n = (blockmark_t*)((uintptr_t)s + s->next.size); + blockmark_t *n = NEXT_BLOCK(s); if(block!=sub) - m = (blockmark_t*)((uintptr_t)s - s->prev.size); + m = PREV_BLOCK(s); s->next.fill = 0; n->prev.fill = 0; // check if merge with previous if (s->prev.x32 && !s->prev.fill) { // remove s... - m->next.size += s->next.size; + m->next.size += s->next.size + sizeof(blockmark_t); n->prev.size = m->next.size; s = m; } // check if merge with next if(n->next.x32 && !n->next.fill) { - blockmark_t *n2 = (blockmark_t*)((uintptr_t)n + n->next.size); + blockmark_t *n2 = NEXT_BLOCK(n); //remove n - s->next.size += n->next.size; + s->next.size += n->next.size + sizeof(blockmark_t); n2->prev.size = s->next.size; } + if(pstart && (uintptr_t)*pstart>(uintptr_t)sub) { + *pstart = (void*)s; + } + // return free size at current block (might be bigger) + return s->next.size; } // return 1 if block has been expanded to new size, 0 if not static int expandBlock(void* block, void* sub, size_t newsize) @@ -160,15 +198,21 @@ static int expandBlock(void* block, void* sub, size_t newsize) newsize = (newsize+3)&~3; blockmark_t *s = (blockmark_t*)sub; - blockmark_t *n = (blockmark_t*)((uintptr_t)s + s->next.size); + blockmark_t *n = NEXT_BLOCK(s); + if(s->next.size>=newsize) + // big enough, no shrinking... + return 1; if(s->next.fill) return 0; // next block is filled // unsigned bitfield of this length gets "promoted" to *signed* int... - if((size_t)(s->next.size + n->next.size) < newsize) + if((size_t)(s->next.size + n->next.size + sizeof(blockmark_t)) < newsize) return 0; // free space too short // ok, doing the alloc! - s->next.size = newsize+sizeof(blockmark_t); - blockmark_t *m = (blockmark_t*)((uintptr_t)s + s->next.size); // this is new n + if((s->next.size+n->next.size+sizeof(blockmark_t))-newsize<2*sizeof(blockmark_t)) + s->next.size += n->next.size+sizeof(blockmark_t); + else + s->next.size = newsize+sizeof(blockmark_t); + blockmark_t *m = NEXT_BLOCK(s); // this is new n m->prev.fill = 1; m->prev.size = s->next.size; if(n!=m) { @@ -197,11 +241,11 @@ void* customMalloc(size_t size) for(int i=0; i<n_blocks; ++i) { if(p_blocks[i].maxfree>=size) { size_t rsize = 0; - sub = getFirstBlock(p_blocks[i].block, size, &rsize); + sub = getFirstBlock(p_blocks[i].block, size, &rsize, NULL); if(sub) { - void* ret = allocBlock(p_blocks[i].block, sub, size); + void* ret = allocBlock(p_blocks[i].block, sub, size, NULL); if(rsize==p_blocks[i].maxfree) - p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size); + p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size, NULL); pthread_mutex_unlock(&mutex_blocks); return ret; } @@ -225,14 +269,14 @@ void* customMalloc(size_t size) blockmark_t* m = (blockmark_t*)p; m->prev.x32 = 0; m->next.fill = 0; - m->next.size = allocsize-sizeof(blockmark_t); - m = (blockmark_t*)(p+allocsize-sizeof(blockmark_t)); - m->next.x32 = 0; - m->prev.fill = 0; - m->prev.size = allocsize-sizeof(blockmark_t); + m->next.size = allocsize-2*sizeof(blockmark_t); + blockmark_t* n = NEXT_BLOCK(m); + n->next.x32 = 0; + n->prev.fill = 0; + n->prev.size = m->next.size; // alloc 1st block - void* ret = allocBlock(p_blocks[i].block, p, size); - p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size); + void* ret = allocBlock(p_blocks[i].block, p, size, NULL); + p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size, NULL); pthread_mutex_unlock(&mutex_blocks); return ret; } @@ -254,7 +298,7 @@ void* customRealloc(void* p, size_t size) && (addr<((uintptr_t)p_blocks[i].block+p_blocks[i].size))) { void* sub = (void*)(addr-sizeof(blockmark_t)); if(expandBlock(p_blocks[i].block, sub, size)) { - p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size); + p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size, NULL); pthread_mutex_unlock(&mutex_blocks); return p; } @@ -280,8 +324,8 @@ void customFree(void* p) if ((addr>(uintptr_t)p_blocks[i].block) && (addr<((uintptr_t)p_blocks[i].block+p_blocks[i].size))) { void* sub = (void*)(addr-sizeof(blockmark_t)); - freeBlock(p_blocks[i].block, sub); - p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size); + size_t newfree = freeBlock(p_blocks[i].block, sub, NULL); + if(p_blocks[i].maxfree < newfree) p_blocks[i].maxfree = newfree; pthread_mutex_unlock(&mutex_blocks); return; } @@ -298,6 +342,8 @@ typedef struct mmaplist_s { size_t size; kh_dynablocks_t* dblist; uint8_t* helper; + void* first; // first free block, to speed up things + int locked; // don't try to add stuff on locked block } mmaplist_t; uintptr_t FindFreeDynarecMap(dynablock_t* db, size_t size) @@ -305,13 +351,15 @@ uintptr_t FindFreeDynarecMap(dynablock_t* db, size_t size) // look for free space void* sub = NULL; for(size_t i=0; i<mmapsize; ++i) { - if(mmaplist[i].maxfree>=size) { + if(mmaplist[i].maxfree>=size+sizeof(blockmark_t) && !mmaplist[i].locked) { + mmaplist[i].locked = 1; size_t rsize = 0; - sub = getFirstBlock(mmaplist[i].block, size, &rsize); + sub = getFirstBlock(mmaplist[i].block, size, &rsize, mmaplist[i].first); if(sub) { - uintptr_t ret = (uintptr_t)allocBlock(mmaplist[i].block, sub, size); - if(rsize==mmaplist[i].maxfree) - mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size); + uintptr_t ret = (uintptr_t)allocBlock(mmaplist[i].block, sub, size, &mmaplist[i].first); + if(rsize==mmaplist[i].maxfree) { + mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size, mmaplist[i].first); + } kh_dynablocks_t *blocks = mmaplist[i].dblist; if(!blocks) { blocks = mmaplist[i].dblist = kh_init(dynablocks); @@ -321,9 +369,17 @@ uintptr_t FindFreeDynarecMap(dynablock_t* db, size_t size) int r; k = kh_put(dynablocks, blocks, (uintptr_t)ret, &r); kh_value(blocks, k) = db; - for(size_t j=0; j<size; ++j) - mmaplist[i].helper[(uintptr_t)ret-(uintptr_t)mmaplist[i].block+j] = (j<256)?j:255; + int size255=(size<256)?size:255; + for(size_t j=0; j<size255; ++j) + mmaplist[i].helper[(uintptr_t)ret-(uintptr_t)mmaplist[i].block+j] = j; + if(size!=size255) + memset(&mmaplist[i].helper[(uintptr_t)ret-(uintptr_t)mmaplist[i].block+256], -1, size-255); + mmaplist[i].locked = 0; return ret; + } else { + printf_log(LOG_INFO, "BOX64: Warning, sub not found, corrupted mmaplist[%d] info?\n", i); + if(box64_log >= LOG_DEBUG) + printBlock(mmaplist[i].block, mmaplist[i].first); } } } @@ -333,8 +389,11 @@ uintptr_t FindFreeDynarecMap(dynablock_t* db, size_t size) uintptr_t AddNewDynarecMap(dynablock_t* db, size_t size) { size_t i = mmapsize++; - dynarec_log(LOG_DEBUG, "Ask for DynaRec Block Alloc #%zu\n", mmapsize); - mmaplist = (mmaplist_t*)realloc(mmaplist, mmapsize*sizeof(mmaplist_t)); + dynarec_log(LOG_DEBUG, "Ask for DynaRec Block Alloc #%zu/%zu\n", mmapsize, mmapcap); + if(mmapsize>mmapcap) { + mmapcap += 32; + mmaplist = (mmaplist_t*)realloc(mmaplist, mmapcap*sizeof(mmaplist_t)); + } #ifndef USE_MMAP void *p = NULL; if(posix_memalign(&p, box64_pagesize, MMAPSIZE)) { @@ -353,21 +412,23 @@ uintptr_t AddNewDynarecMap(dynablock_t* db, size_t size) #endif setProtection((uintptr_t)p, MMAPSIZE, PROT_READ | PROT_WRITE | PROT_EXEC); + mmaplist[i].locked = 1; mmaplist[i].block = p; mmaplist[i].size = MMAPSIZE; mmaplist[i].helper = (uint8_t*)calloc(1, MMAPSIZE); + mmaplist[i].first = p; // setup marks blockmark_t* m = (blockmark_t*)p; m->prev.x32 = 0; m->next.fill = 0; - m->next.size = MMAPSIZE-sizeof(blockmark_t); - m = (blockmark_t*)(p+MMAPSIZE-sizeof(blockmark_t)); - m->next.x32 = 0; - m->prev.fill = 0; - m->prev.size = MMAPSIZE-sizeof(blockmark_t); + m->next.size = MMAPSIZE-2*sizeof(blockmark_t); + blockmark_t* n = NEXT_BLOCK(m); + n->next.x32 = 0; + n->prev.fill = 0; + n->prev.size = m->next.size; // alloc 1st block - uintptr_t sub = (uintptr_t)allocBlock(mmaplist[i].block, p, size); - mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size); + uintptr_t sub = (uintptr_t)allocBlock(mmaplist[i].block, p, size, &mmaplist[i].first); + mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size, mmaplist[i].first); kh_dynablocks_t *blocks = mmaplist[i].dblist = kh_init(dynablocks); kh_resize(dynablocks, blocks, 64); khint_t k; @@ -376,6 +437,7 @@ uintptr_t AddNewDynarecMap(dynablock_t* db, size_t size) kh_value(blocks, k) = db; for(size_t j=0; j<size; ++j) mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block + j] = (j<256)?j:255; + mmaplist[i].locked = 0; return sub; } @@ -388,15 +450,18 @@ void ActuallyFreeDynarecMap(dynablock_t* db, uintptr_t addr, size_t size) if ((addr>(uintptr_t)mmaplist[i].block) && (addr<((uintptr_t)mmaplist[i].block+mmaplist[i].size))) { void* sub = (void*)(addr-sizeof(blockmark_t)); - freeBlock(mmaplist[i].block, sub); - mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size); + size_t newfree = freeBlock(mmaplist[i].block, sub, &mmaplist[i].first); + if(mmaplist[i].maxfree < newfree) mmaplist[i].maxfree = newfree; kh_dynablocks_t *blocks = mmaplist[i].dblist; if(blocks) { khint_t k = kh_get(dynablocks, blocks, (uintptr_t)sub); if(k!=kh_end(blocks)) kh_del(dynablocks, blocks, k); - for(size_t j=0; j<size; ++j) - mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block+j] = 0; + memset(&mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block], 0, size); + } + if(mmaplist[i].locked) { + printf_log(LOG_INFO, "BOX64: Warning, Free a chunk in a locked mmaplist[%d]\n", i); + ++mmaplist[i].locked; } return; } @@ -1146,6 +1211,7 @@ void fini_custommem_helper(box64context_t *ctx) dblist_oversized = NULL; } mmapsize = 0; + mmapcap = 0; dynarec_log(LOG_DEBUG, "Free dynamic Dynarecblocks\n"); for (uintptr_t idx3=0; idx3<=0xffff; ++idx3) if(dynmap123[idx3]) { |