diff options
| author | Chi-Kuan Chiu <105687635+devarajabc@users.noreply.github.com> | 2025-06-23 05:01:17 -0600 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-06-23 13:01:17 +0200 |
| commit | f6d9caf0d13ac94af5a1ae3ed368d78893faa7dc (patch) | |
| tree | 1d45d98061498dbea9540475df0e49a2122ee35c /src | |
| parent | c66630da497c18622205cc58cb058a1f8cba7cd1 (diff) | |
| download | box64-f6d9caf0d13ac94af5a1ae3ed368d78893faa7dc.tar.gz box64-f6d9caf0d13ac94af5a1ae3ed368d78893faa7dc.zip | |
Add `map64_customMalloc` (#2764)
Most allocations target ~56 bytes, causing internal fragmentation in `map128_customMalloc`. Introduce `map64_customMalloc`, a 64-byte–chunk variant of `map128_customMalloc`. The new allocator uses a `uint16_t` “map” array: each bit in a 16-bit entry tracks the in-use status of one 64-byte chunk (16 chunks per map entry). The bitmap region is fixed at 256 bytes. Note that “map” here is a fast-lookup structure, distinct from the bitmap itself. All chunk- size, shift, and map-size calculations have been updated accordingly. Relates to #2740
Diffstat (limited to 'src')
| -rw-r--r-- | src/custommem.c | 146 |
1 files changed, 140 insertions, 6 deletions
diff --git a/src/custommem.c b/src/custommem.c index 19ec422c..599e60e7 100644 --- a/src/custommem.c +++ b/src/custommem.c @@ -81,6 +81,7 @@ static rbtree_t* blockstree = NULL; #define BTYPE_MAP 1 #define BTYPE_LIST 0 +#define BTYPE_MAP64 2 typedef struct blocklist_s { void* block; @@ -92,6 +93,7 @@ typedef struct blocklist_s { } blocklist_t; #define MMAPSIZE (512*1024) // allocate 512kb sized blocks +#define MMAPSIZE64 (64*2048) // allocate 128kb sized blocks for 64byte map #define MMAPSIZE128 (128*1024) // allocate 128kb sized blocks for 128byte map #define DYNMMAPSZ (2*1024*1024) // allocate 2Mb block for dynarec #define DYNMMAPSZ0 (128*1024) // allocate 128kb block for 1st page, to avoid wasting too much memory on small program / libs @@ -546,7 +548,7 @@ void* map128_customMalloc(size_t size, int is32bits) size = 128; mutex_lock(&mutex_blocks); for(int i=0; i<n_blocks; ++i) { - if(p_blocks[i].block && (p_blocks[i].type==BTYPE_MAP) && p_blocks[i].maxfree && (!box64_is32bits || ((!is32bits && p_blocks[i].block>(void*)0xffffffffLL)) || (is32bits && p_blocks[i].block<(void*)0x100000000LL))) { + if(p_blocks[i].block && (p_blocks[i].type == BTYPE_MAP) && p_blocks[i].maxfree && (!box64_is32bits || ((!is32bits && p_blocks[i].block>(void*)0xffffffffLL)) || (is32bits && p_blocks[i].block<(void*)0x100000000LL))) { // look for a free block uint8_t* map = p_blocks[i].first; for(uint32_t idx=p_blocks[i].lowest; idx<(p_blocks[i].size>>7); ++idx) { @@ -643,9 +645,113 @@ void* map128_customMalloc(size_t size, int is32bits) } return ret; } +// the BTYPE_MAP64 is a simple bitmap based allocator: it will allocate slices of 64bytes only, from a large 64k mapping +// the bitmap itself is also allocated in that mapping, as a slice of 256bytes, at the end of the mapping (and so marked as allocated) +void* map64_customMalloc(size_t size, int is32bits) +{ + size = 64; + mutex_lock(&mutex_blocks); + for(int i = 0; i < n_blocks; ++i) { + if (p_blocks[i].block + && p_blocks[i].type == BTYPE_MAP64 + && p_blocks[i].maxfree + && (!box64_is32bits + || ((!is32bits && p_blocks[i].block > (void*)0xffffffffLL) + || (is32bits && p_blocks[i].block < (void*)0x100000000LL))) + ) { + uint16_t* map = p_blocks[i].first; + uint32_t slices = p_blocks[i].size >> 6; + for (uint32_t idx = p_blocks[i].lowest; idx < slices; ++idx) { + if (!(idx & 15) && map[idx >> 4] == 0xFFFF) + idx += 15; + else if (!(map[idx >> 4] & (1u << (idx & 15)))) { + map[idx >> 4] |= 1u << (idx & 15); + p_blocks[i].maxfree -= 64; + p_blocks[i].lowest = idx + 1; + mutex_unlock(&mutex_blocks); + return p_blocks[i].block + (idx << 6); + } + } + #ifdef TRACE_MEMSTAT + printf_log(LOG_INFO, + "Warning: MAP has maxfree=%d and lowest=%d but no free 64 B slice found\n", + p_blocks[i].maxfree, p_blocks[i].lowest); + #endif + } + } + int i = n_blocks++; + if (n_blocks > c_blocks) { + c_blocks += box64_is32bits ? 256 : 8; + p_blocks = (blocklist_t*)box_realloc(p_blocks, c_blocks * sizeof(blocklist_t)); + } + + size_t allocsize = MMAPSIZE64; + p_blocks[i].block = NULL; // guard re-entrance + p_blocks[i].first = NULL; + p_blocks[i].size = 0; + + if(is32bits) mutex_unlock(&mutex_blocks); // unlocking, because mmap might use it + void* p = is32bits + ? box_mmap(NULL, allocsize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0) + : (box64_is32bits ? box32_dynarec_mmap(allocsize, -1, 0) : InternalMmap(NULL, allocsize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); + if(is32bits) mutex_lock(&mutex_blocks); + + #ifdef TRACE_MEMSTAT + customMalloc_allocated += allocsize; + #endif + + size_t mapsize = (allocsize / 64) / 8; + mapsize = (mapsize + 255) & ~255LL; + + p_blocks[i].type = BTYPE_MAP64; + p_blocks[i].block = p; + p_blocks[i].first = p+allocsize-mapsize; + p_blocks[i].size = allocsize; + + // mark the bitmap area itself as "used" + uint16_t* map = p_blocks[i].first; + for (size_t idx = (allocsize - mapsize) >> 6; idx < (allocsize >> 6); ++idx) { + map[idx >> 4] |= 1u << (idx & 15); + } + + if (is32bits && p > (void*)0xffffffffLL) { + p_blocks[i].maxfree = allocsize - mapsize; + return NULL; + } + + #ifdef TRACE_MEMSTAT + printf_log(LOG_INFO, + "Custommem: allocation %p-%p for %dbit 64 B MAP Alloc [%d]\n", + p, (uint8_t*)p + allocsize, + is32bits ? 32 : 64, i); + #endif + + void* ret = p_blocks[i].block; + map[0] |= 1u; + p_blocks[i].lowest = 1; + p_blocks[i].maxfree = allocsize - (mapsize + 64); + + mutex_unlock(&mutex_blocks); + add_blockstree((uintptr_t)p, (uintptr_t)p + allocsize, i); + + if (mapallmem) { + if (setting_prot) { + defered_prot_p = (uintptr_t)p; + defered_prot_sz = allocsize; + defered_prot_prot = PROT_READ | PROT_WRITE; + } else { + setProtection((uintptr_t)p, allocsize, PROT_READ | PROT_WRITE); + } + } + + return ret; +} + void* internal_customMalloc(size_t size, int is32bits) { + if(size<=64) + return map64_customMalloc(size, is32bits); if(size<=128) return map128_customMalloc(size, is32bits); size_t init_size = size; @@ -655,7 +761,7 @@ void* internal_customMalloc(size_t size, int is32bits) size_t fullsize = size+2*sizeof(blockmark_t); mutex_lock(&mutex_blocks); for(int i=0; i<n_blocks; ++i) { - if(p_blocks[i].block && (p_blocks[i].type==BTYPE_LIST) && p_blocks[i].maxfree>=init_size && (!box64_is32bits || ((!is32bits && p_blocks[i].block>(void*)0xffffffffLL)) || (is32bits && p_blocks[i].block<(void*)0x100000000LL))) { + if(p_blocks[i].block && (p_blocks[i].type == BTYPE_LIST) && p_blocks[i].maxfree>=init_size && (!box64_is32bits || ((!is32bits && p_blocks[i].block>(void*)0xffffffffLL)) || (is32bits && p_blocks[i].block<(void*)0x100000000LL))) { size_t rsize = 0; sub = getFirstBlock(p_blocks[i].block, init_size, &rsize, p_blocks[i].first); if(sub) { @@ -787,7 +893,7 @@ void* internal_customRealloc(void* p, size_t size, int is32bits) blocklist_t* l = findBlock(addr); if(l) { size_t subsize; - if(l->type==BTYPE_LIST) { + if(l->type == BTYPE_LIST) { blockmark_t* sub = (blockmark_t*)(addr-sizeof(blockmark_t)); if(expandBlock(l->block, sub, size, &l->first)) { l->maxfree = getMaxFreeBlock(l->block, l->size, l->first); @@ -795,13 +901,20 @@ void* internal_customRealloc(void* p, size_t size, int is32bits) return p; } subsize = sizeBlock(sub); - } else { + } else if(l->type == BTYPE_MAP) { //BTYPE_MAP if(size<=128) { mutex_unlock(&mutex_blocks); return p; } subsize = 128; + }else{ + // BTYPE_MAP64 + if(size<=64) { + mutex_unlock(&mutex_blocks); + return p; + } + subsize = 64; } mutex_unlock(&mutex_blocks); void* newp = internal_customMalloc(size, is32bits); @@ -843,7 +956,7 @@ void internal_customFree(void* p, int is32bits) if(l->maxfree < newfree) l->maxfree = newfree; mutex_unlock(&mutex_blocks); return; - } else { + } else if(l->type == BTYPE_MAP) { //BTYPE_MAP size_t idx = (addr-(uintptr_t)l->block)>>7; uint8_t* map = l->first; @@ -858,6 +971,21 @@ void internal_customFree(void* p, int is32bits) l->lowest = idx; mutex_unlock(&mutex_blocks); return; + }else{ + //BTYPE_MAP + size_t idx = (addr-(uintptr_t)l->block)>>6; + uint16_t* map = l->first; + if(map[idx>>4]&(1<<(idx&15))) { + map[idx>>4] ^= (1<<(idx&15)); + l->maxfree += 64; + } // warn if double free? + #ifdef TRACE_MEMSTAT + else printf_log(LOG_INFO, "Warning, customme free(%p) from MAP block %p, but not found as allocated\n", p, l); + #endif + if(l->lowest>idx) + l->lowest = idx; + mutex_unlock(&mutex_blocks); + return; } } mutex_unlock(&mutex_blocks); @@ -898,6 +1026,8 @@ void* internal_customMemAligned(size_t align, size_t size, int is32bits) size_t init_size = (size+align_mask)&~align_mask; size = roundSize(size); if(align<8) align = 8; + if(size<=64 && align<=64) + return map64_customMalloc(size, is32bits); if(size<=128 && align<=128) return map128_customMalloc(size, is32bits); // look for free space @@ -1018,10 +1148,14 @@ size_t customGetUsableSize(void* p) mutex_lock(&mutex_blocks); blocklist_t* l = findBlock(addr); if(l) { - if(l->type==BTYPE_MAP) { + if(l->type == BTYPE_MAP) { mutex_unlock(&mutex_blocks); return 128; } + else if(l->type == BTYPE_MAP64) { + mutex_unlock(&mutex_blocks); + return 64; + } blockmark_t* sub = (void*)(addr-sizeof(blockmark_t)); size_t size = SIZE_BLOCK(sub->next); |