about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-10-29 22:32:47 +0200
committerGitHub <noreply@github.com>2022-10-29 22:32:47 +0200
commit0e47d1269cc660d2b1451e76375b2d3545ce66f4 (patch)
tree27c958e5ef3cd147625112a82626624d51fd6fe5 /src
parent7dc5359c2b10521696e7d251627eff029d53ee28 (diff)
downloadbox64-0e47d1269cc660d2b1451e76375b2d3545ce66f4.tar.gz
box64-0e47d1269cc660d2b1451e76375b2d3545ce66f4.zip
Malloc override (#437)
* Add some malloc override mecanism, but missing c++ function overriding
* Add c++ new/delete redirection too
* Added support for libtbbmalloc_proxy
Diffstat (limited to 'src')
-rwxr-xr-xsrc/elfs/elfloader.c44
-rwxr-xr-xsrc/emu/x64int3.c5
-rwxr-xr-xsrc/emu/x64run_private.c1
-rwxr-xr-xsrc/include/callback.h2
-rwxr-xr-xsrc/include/debug.h4
-rwxr-xr-xsrc/include/elfloader.h1
-rwxr-xr-xsrc/librarian/library.c6
-rwxr-xr-xsrc/library_list.h1
-rwxr-xr-xsrc/main.c5
-rw-r--r--src/mallochook.c558
-rwxr-xr-xsrc/tools/callback.c62
-rwxr-xr-xsrc/tools/fileutils.c7
-rw-r--r--src/wrapped/generated/functions_list.txt18
-rw-r--r--src/wrapped/generated/wrappedlibctypes.h2
-rw-r--r--src/wrapped/generated/wrappedtbbmallocproxydefs.h8
-rw-r--r--src/wrapped/generated/wrappedtbbmallocproxytypes.h30
-rw-r--r--src/wrapped/generated/wrappedtbbmallocproxyundefs.h8
-rw-r--r--src/wrapped/generated/wrapper.c5
-rw-r--r--src/wrapped/generated/wrapper.h2
-rwxr-xr-xsrc/wrapped/wrappedlibc.c4
-rwxr-xr-xsrc/wrapped/wrappedlibc_private.h5
-rw-r--r--src/wrapped/wrappedtbbmallocproxy.c23
-rw-r--r--src/wrapped/wrappedtbbmallocproxy_private.h34
23 files changed, 771 insertions, 64 deletions
diff --git a/src/elfs/elfloader.c b/src/elfs/elfloader.c
index f93ce1df..35a7d3a2 100755
--- a/src/elfs/elfloader.c
+++ b/src/elfs/elfloader.c
@@ -54,7 +54,7 @@ elfheader_t* LoadAndCheckElfHeader(FILE* f, const char* name, int exec)
     if(!h)
         return NULL;
 
-    if ((h->path = realpath(name, NULL)) == NULL) {
+    if ((h->path = box_realpath(name, NULL)) == NULL) {
         h->path = (char*)box_malloc(1);
         h->path[0] = '\0';
     }
@@ -821,6 +821,7 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t
     }
     return bindnow?ret_ok:0;
 }
+void checkHookedSymbols(lib_t *maplib, elfheader_t* h); // in mallochook.c
 int RelocateElf(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t* head)
 {
     if(head->rel) {
@@ -837,7 +838,7 @@ int RelocateElf(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t* he
         if(RelocateElfRELA(maplib, local_maplib, bindnow, head, cnt, (Elf64_Rela *)(head->rela + head->delta), NULL))
             return -1;
     }
-   
+    checkHookedSymbols(maplib, head);
     return 0;
 }
 
@@ -945,22 +946,6 @@ uintptr_t GetLastByte(elfheader_t* h)
 void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* weaksymbols, kh_mapsymbols_t* localsymbols, elfheader_t* h)
 {
     if(box64_dump && h->DynSym) DumpDynSym(h);
-    int libcef = (strstr(h->name, "libcef.so"))?1:0;
-    //libcef.so is linked with tcmalloc staticaly, but this cannot be easily supported in box64, so hacking some "unlink" here
-    const char* avoid_libcef[] = {"malloc", "realloc", "free", "calloc", "cfree",
-        "__libc_malloc", "__libc_calloc", "__libc_free", "__libc_memallign", "__libc_pvalloc",
-        "__libc_realloc", "__libc_valloc", "__posix_memalign",
-        "valloc", "pvalloc", "posix_memalign", "malloc_stats", "malloc_usable_size",
-        /*"mallopt",*/ "localtime_r",
-        //c++ symbol from libstdc++ too
-        //"_ZnwmRKSt9nothrow_t", "_ZdaPv",    // operator new(unsigned long, std::nothrow_t const&), operator delete[](void*)
-        //"_Znwm", "_ZdlPv", "_Znam",         // operator new(unsigned long), operator delete(void*), operator new[](unsigned long)
-        //"_ZnwmSt11align_val_t", "_ZnwmSt11align_val_tRKSt9nothrow_t",   // operator new(unsigned long, std::align_val_t)
-        //"_ZnamSt11align_val_t", "_ZnamSt11align_val_tRKSt9nothrow_t",   // operator new[](unsigned long, std::align_val_t)
-        //"_ZdlPvRKSt9nothrow_t", "_ZdaPvSt11align_val_tRKSt9nothrow_t",  // more delete operators
-        //"_ZdlPvmSt11align_val_t", "_ZdaPvRKSt9nothrow_t",
-        //"_ZdaPvSt11align_val_t", "_ZdlPvSt11align_val_t",
-    };
     printf_dump(LOG_NEVER, "Will look for Symbol to add in SymTable(%zu)\n", h->numSymTab);
     for (size_t i=0; i<h->numSymTab; ++i) {
         const char * symname = h->StrTab+h->SymTab[i].st_name;
@@ -994,13 +979,6 @@ void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* wea
                     }
             } else {
                 int to_add = 1;
-                if(libcef) {
-                    if(strstr(symname, "_Zn")==symname || strstr(symname, "_Zd")==symname)
-                        to_add = 0;
-                    for(int j=0; j<sizeof(avoid_libcef)/sizeof(avoid_libcef[0]) && to_add; ++j)
-                        if(!strcmp(symname, avoid_libcef[j]))
-                            to_add = 0;
-                }
                 if(!to_add || (bind==STB_GNU_UNIQUE && FindGlobalSymbol(maplib, symname, -1, NULL)))
                     continue;
                 uintptr_t offs = (type==STT_TLS)?h->SymTab[i].st_value:(h->SymTab[i].st_value + h->delta);
@@ -1036,13 +1014,6 @@ void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* wea
                 printf_dump(LOG_NEVER, "Adding Default Version \"%s\" for Symbol\"%s\"\n", vername, symname);
             }
             int to_add = 1;
-            if(libcef) {
-                if(strstr(symname, "_Zn")==symname || strstr(symname, "_Zd")==symname)
-                    to_add = 0;
-                for(int j=0; j<sizeof(avoid_libcef)/sizeof(avoid_libcef[0]) && to_add; ++j)
-                    if(!strcmp(symname, avoid_libcef[j]))
-                        to_add = 0;
-            }
             if(!to_add || (bind==STB_GNU_UNIQUE && FindGlobalSymbol(maplib, symname, version, vername)))
                 continue;
             printf_dump(LOG_NEVER, "Adding Versionned Symbol(bind=%s) \"%s\" (ver=%d/%s) with offset=%p sz=%zu\n", (bind==STB_LOCAL)?"LOCAL":((bind==STB_WEAK)?"WEAK":"GLOBAL"), symname, version, vername?vername:"(none)", (void*)offs, sz);
@@ -1056,7 +1027,6 @@ void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* wea
                 }
         }
     }
-    
 }
 
 /*
@@ -1188,7 +1158,11 @@ void RefreshElfTLS(elfheader_t* h)
         }
     }
 }
-
+void MarkElfInitDone(elfheader_t* h)
+{
+    if(h)
+        h->init_done = 1;
+}
 void RunElfInit(elfheader_t* h, x64emu_t *emu)
 {
     if(!h || h->init_done)
@@ -1209,6 +1183,7 @@ void RunElfInit(elfheader_t* h, x64emu_t *emu)
         return;
     }
     printf_log(LOG_DEBUG, "Calling Init for %s @%p\n", ElfName(h), (void*)p);
+    h->init_done = 1;
     if(h->initentry)
         RunFunctionWithEmu(emu, 0, p, 3, context->argc, context->argv, context->envv);
     printf_log(LOG_DEBUG, "Done Init for %s\n", ElfName(h));
@@ -1221,7 +1196,6 @@ void RunElfInit(elfheader_t* h, x64emu_t *emu)
         }
     }
 
-    h->init_done = 1;
     h->fini_done = 0;   // can be fini'd now (in case it was re-inited)
     printf_log(LOG_DEBUG, "All Init Done for %s\n", ElfName(h));
     return;
diff --git a/src/emu/x64int3.c b/src/emu/x64int3.c
index dc2b02a2..0b3abba3 100755
--- a/src/emu/x64int3.c
+++ b/src/emu/x64int3.c
@@ -146,7 +146,10 @@ void x64Int3(x64emu_t* emu, uintptr_t* addr)
                     snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p, %zu)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, R_RDX);
                     perr = 1;
                 } else if (!strcmp(s, "write")) {
-                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p, %zu)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, R_RDX);
+                    if(R_EDI==2 || R_EDI==3)
+                        snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p\"%s\", %zu)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, (char*)R_RSI, R_RDX);
+                    else
+                        snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p, %zu)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, R_RDX);
                     perr = 1;
                 } else if (strstr(s, "access")==s) {
                     tmp = (char*)(R_RDI);
diff --git a/src/emu/x64run_private.c b/src/emu/x64run_private.c
index cc8ef149..c057b8a4 100755
--- a/src/emu/x64run_private.c
+++ b/src/emu/x64run_private.c
@@ -51,6 +51,7 @@ int32_t EXPORT my___libc_start_main(x64emu_t* emu, int *(main) (int, char * *, c
     } else {
         RunElfInit(my_context->elfs[0], emu);
     }
+    MarkElfInitDone(my_context->elfs[0]);
     printf_log(LOG_DEBUG, "Transfert to main(%d, %p, %p)=>%p from __libc_start_main\n", my_context->argc, my_context->argv, my_context->envv, main);
     // call main and finish
     Push64(emu, GetRBP(emu));   // set frame pointer
diff --git a/src/include/callback.h b/src/include/callback.h
index dba59ee8..dae26f1e 100755
--- a/src/include/callback.h
+++ b/src/include/callback.h
@@ -6,6 +6,8 @@
 typedef struct x64emu_s x64emu_t;
 
 uint64_t RunFunction(box64context_t *context, uintptr_t fnc, int nargs, ...);
+// save all modified register
+uint64_t RunSafeFunction(box64context_t *context, uintptr_t fnc, int nargs, ...);
 // use emu state to run function
 uint64_t RunFunctionWithEmu(x64emu_t *emu, int QuitOnLongJumpExit, uintptr_t fnc, int nargs, ...);
 
diff --git a/src/include/debug.h b/src/include/debug.h
index c03fc0b0..7c14a6ee 100755
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -75,8 +75,9 @@ extern FILE* ftrace;
 #define EXPORTDYN 
 #endif
 
-#ifdef ANDROID
 void init_malloc_hook();
+extern size_t(*box_malloc_usable_size)(void*);
+#ifdef ANDROID
 extern void*(*__libc_malloc)(size_t);
 extern void*(*__libc_realloc)(void*, size_t);
 extern void*(*__libc_calloc)(size_t, size_t);
@@ -95,5 +96,6 @@ extern void* __libc_memalign(size_t, size_t);
 #define box_free        __libc_free
 #define box_memalign    __libc_memalign 
 extern char* box_strdup(const char* s);
+extern char* box_realpath(const char* path, char* ret);
 
 #endif //__DEBUG_H_
diff --git a/src/include/elfloader.h b/src/include/elfloader.h
index 6846dcd4..a4f59719 100755
--- a/src/include/elfloader.h
+++ b/src/include/elfloader.h
@@ -38,6 +38,7 @@ void RefreshElfTLS(elfheader_t* h);
 void RunElfInit(elfheader_t* h, x64emu_t *emu);
 void RunElfFini(elfheader_t* h, x64emu_t *emu);
 void RunDeferedElfInit(x64emu_t *emu);
+void MarkElfInitDone(elfheader_t* h);
 void* GetBaseAddress(elfheader_t* h);
 void* GetElfDelta(elfheader_t* h);
 uint32_t GetBaseSize(elfheader_t* h);
diff --git a/src/librarian/library.c b/src/librarian/library.c
index 951407a9..7baceeca 100755
--- a/src/librarian/library.c
+++ b/src/librarian/library.c
@@ -289,7 +289,7 @@ static int loadEmulatedLib(const char* libname, library_t *lib, box64context_t*
 
         if(lib->path && strcmp(lib->path, libname)) {
             box_free(lib->path);
-            lib->path = realpath(libname, NULL);
+            lib->path = box_realpath(libname, NULL);
             if(!lib->path)
                 lib->path = box_strdup(libname);
         }
@@ -349,7 +349,7 @@ library_t *NewLibrary(const char* path, box64context_t* context)
 {
     printf_log(LOG_DEBUG, "Trying to load \"%s\"\n", path);
     library_t *lib = (library_t*)box_calloc(1, sizeof(library_t));
-    lib->path = realpath(path, NULL);
+    lib->path = box_realpath(path, NULL);
     if(!lib->path)
         lib->path = box_strdup(path);
     if(libGL && !strcmp(path, libGL))
@@ -595,7 +595,7 @@ int IsSameLib(library_t* lib, const char* path)
             ret=1;
     } else {
         char rpath[PATH_MAX];
-        realpath(path, rpath);
+        box_realpath(path, rpath);
         if(!strcmp(rpath, lib->path))
             ret=1;
     }
diff --git a/src/library_list.h b/src/library_list.h
index 2f26cd29..991c5bb7 100755
--- a/src/library_list.h
+++ b/src/library_list.h
@@ -149,6 +149,7 @@ GO("libtinfo.so.5", libtinfo)
 GO("libpanel.so.5", libpanel)
 GO("libncurses.so.6", libncurses6)
 GO("libtinfo.so.6", libtinfo6)
+GO("libtbbmalloc_proxy.so.2", tbbmallocproxy)
 GO("libtcmalloc_minimal.so.0", tcmallocminimal)
 GO("libtcmalloc_minimal.so.4", tcmallocminimal)
 GO("libmpg123.so.0", mpg123)
diff --git a/src/main.c b/src/main.c
index ac7629f9..329c4b24 100755
--- a/src/main.c
+++ b/src/main.c
@@ -1045,10 +1045,7 @@ static void free_contextargv()
 void pressure_vessel(int argc, const char** argv, int nextarg);
 extern char** environ;
 int main(int argc, const char **argv, char **env) {
-    #ifdef ANDROID
     init_malloc_hook();
-    #endif
-
     init_auxval(argc, argv, environ?environ:env);
     // trying to open and load 1st arg
     if(argc==1) {
@@ -1318,7 +1315,7 @@ int main(int argc, const char **argv, char **env) {
         FreeCollection(&ld_preload);
         return -1;
     }
-    if(!(my_context->fullpath = realpath(my_context->argv[0], NULL)))
+    if(!(my_context->fullpath = box_realpath(my_context->argv[0], NULL)))
         my_context->fullpath = box_strdup(my_context->argv[0]);
     if(getenv("BOX64_ARG0"))
         my_context->argv[0] = box_strdup(getenv("BOX64_ARG0"));
diff --git a/src/mallochook.c b/src/mallochook.c
index c4a28499..ec58b732 100644
--- a/src/mallochook.c
+++ b/src/mallochook.c
@@ -2,10 +2,121 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <errno.h>
+#include <sys/param.h>
 #include <dlfcn.h>
 
 #include "box64context.h"
 #include "debug.h"
+#include "callback.h"
+#include "librarian.h"
+#include "elfs/elfloader_private.h"
+
+/*
+    This file here is for handling overriding of malloc functions
+
+ Libraries like tcmalloc overrides all malloc/free/new/delete function and implement a custom version.
+ Problem is, box64 is already loaded in memoery, and already using system libc malloc for many of it's things
+ before those lib are even loaded in memory.
+ Also, those override function can be define in the main executable, or in a lib loaded directly by the exectable
+ or even in a lib loaded later using dlsym.
+
+ The 2 different strategies to handle this can be defined as "Embrace" and "Exterminate" (as it cannot simply be ignored, 
+ or you end up with mixing free/realloc from one lib and malloc/free from libc)
+
+ In the "Embrace" strategy, the overriden malloc function are taken into account as soon as possible, and are used for all loaded lib, including native
+ In the "Exterminate" strategy, the overriden malloc function are erased, and replaced with libc malloc as soon as they are defined.
+
+ The advantage of "Embrace" is that the system will run the function it supposed to be using, and potential side-effect and staticaly linked functions 
+ will run as intended.
+ The downside of "Embrace" is that is makes it impossible to load a library with dlsym that override malloc function, especialy 
+ if it loads natively wrapped function
+
+ The advantage of "Exterminate" is that you wont be emulating basic malloc and friend, wich are used extensively in every program. Also, loading lib 
+ with dlopen will not a be a problem.
+ The downside of "Exterminate" is that side effect are less well controled. Staticaly linked stuff and anonymous symbols might put this strategy in trouble.
+
+ This is the Exterminate strategy implementation
+*/
+
+#include "bridge.h"
+#include "tools/bridge_private.h"
+#include "wrapper.h"
+
+#define SUPER()                 \
+GO(malloc, pFL);                \
+GO(free, vFp);                  \
+GO(calloc, pFLL);               \
+GO(realloc, pFpL);              \
+GO(aligned_alloc, pFLL);        \
+GO(memalign, pFLL);             \
+GO(posix_memalign, iFpLL);      \
+GO(pvalloc, pFL);               \
+GO(valloc, pFL);                \
+GO(cfree, vFp);                 \
+GO(malloc_usable_size, LFp) ;   \
+GO2(_Znwm, pFL);                \
+GO2(_ZnwmRKSt9nothrow_t, pFLp); \
+GO2(_Znam, pFL);                \
+GO2(_ZnamRKSt9nothrow_t, pFLp); \
+GO2(_ZdaPv, vFp);               \
+GO2(_ZdaPvm, vFpL);             \
+GO2(_ZdaPvmSt11align_val_t, vFpLL);             \
+GO2(_ZdlPv, vFp);                               \
+GO2(_ZdlPvm, vFpL);                             \
+GO2(_ZnwmSt11align_val_t, pFLL);                \
+GO2(_ZnwmSt11align_val_tRKSt9nothrow_t, pFLLp); \
+GO2(_ZnamSt11align_val_t, pFLL);                \
+GO2(_ZnamSt11align_val_tRKSt9nothrow_t, pFLLp); \
+GO2(_ZdlPvRKSt9nothrow_t, vFpp);                \
+GO2(_ZdaPvSt11align_val_tRKSt9nothrow_t, vFpLp);\
+GO2(_ZdlPvmSt11align_val_t, vFpLL);             \
+GO2(_ZdaPvRKSt9nothrow_t, vFpp);                \
+GO2(_ZdaPvSt11align_val_t, vFpL);               \
+GO2(_ZdlPvSt11align_val_t, vFpL);               \
+GO2(_ZdlPvSt11align_val_tRKSt9nothrow_t, vFpLp);\
+GO2(tc_calloc, pFLL);           \
+GO2(tc_cfree, vFp);             \
+GO2(tc_delete, vFp);            \
+GO2(tc_deletearray, vFp);       \
+GO2(tc_deletearray_nothrow, vFpp);              \
+GO2(tc_delete_nothrow, vFpp);   \
+GO2(tc_free, vFp);              \
+GO2(tc_malloc, pFL);            \
+GO2(tc_malloc_size, LFp);       \
+GO2(tc_new, pFL);               \
+GO2(tc_new_nothrow, pFLp);      \
+GO2(tc_newarray, pFL);          \
+GO2(tc_newarray_nothrow, pFLp); \
+GO2(tc_pvalloc, pFL);           \
+GO2(tc_valloc, pFL);            \
+GO2(tc_memalign, pFLL);         \
+GO2(tc_malloc_skip_new_handler_weak, pFL);      \
+GO2(tc_mallocopt, iFii);        \
+GO2(tc_malloc_stats, vFv);      \
+GO2(tc_malloc_skip_new_handler, pFL);           \
+GO2(tc_mallinfo, pFv);          \
+GO2(tc_posix_memalign, iFpLL);  \
+GO2(tc_realloc, pFpL);          \
+
+//GO2(tc_set_new_mode, iFi);
+//GO2(tc_version, iFi);
+
+typedef void  (vFv_t)   (void);
+typedef int   (iFv_t)   (void);
+typedef int   (iFi_t)   (int);
+typedef void* (*pFL_t)  (size_t);
+typedef void* (*pFLp_t) (size_t, void* p);
+typedef void  (*vFp_t)  (void*);
+typedef void* (*pFp_t)  (void*);
+typedef size_t(*LFp_t)  (void*);
+typedef int   (*iFii_t) (int, int);
+typedef void  (*vFpp_t) (void*, void*);
+typedef void  (*vFpL_t) (void*, size_t);
+typedef void* (*pFLL_t) (size_t, size_t);
+typedef void* (*pFLLp_t)(size_t, size_t, void* p);
+typedef void  (*vFpLp_t)(void*, size_t, void*);
+typedef void  (*vFpLL_t)(void*, size_t, size_t);
 
 #ifdef ANDROID
 void*(*__libc_malloc)(size_t) = NULL;
@@ -13,18 +124,451 @@ void*(*__libc_realloc)(size_t, void*) = NULL;
 void*(*__libc_calloc)(size_t, size_t) = NULL;
 void (*__libc_free*)(void*) = NULL;
 void*(*__libc_memalign)(size_t, size_t) = NULL;
+#endif
+size_t(*box_malloc_usable_size)(void*) = NULL;
+
+int GetTID();
+
+char* box_strdup(const char* s) {
+    char* ret = box_calloc(1, strlen(s)+1);
+    memcpy(ret, s, strlen(s));
+    return ret;
+}
+
+char* box_realpath(const char* path, char* ret)
+{
+    if(ret)
+        return realpath(path, ret);
+#ifdef PATH_MAX
+    size_t path_max = PATH_MAX;
+#else
+    size_t path_max = pathconf(path, _PC_PATH_MAX);
+    if (path_max <= 0)
+    path_max = 4096;
+#endif
+    char tmp[path_max];
+    char* p = realpath(path, tmp);
+    if(!p)
+        return NULL;
+    return box_strdup(tmp);
+}
+
+static size_t pot(size_t l) {
+    size_t ret = 0;
+    while (l>(1<<ret))  ++ret;
+    return 1<<ret;
+}
+
+// redefining all libc memory allocation routines
+EXPORT void* malloc(size_t l)
+{
+    return box_calloc(1, l);
+}
+
+EXPORT void free(void* p)
+{
+    box_free(p);
+}
+
+EXPORT void* calloc(size_t n, size_t s)
+{
+    return box_calloc(n, s);
+}
+
+EXPORT void* realloc(void* p, size_t s)
+{
+    return box_realloc(p, s);
+}
+
+EXPORT void* aligned_alloc(size_t align, size_t size)
+{
+    return box_memalign(align, size);
+}
+
+EXPORT void* memalign(size_t align, size_t size)
+{
+    return box_memalign(align, size);
+}
+
+EXPORT int posix_memalign(void** p, size_t align, size_t size)
+{
+    if(align%sizeof(void*) || pot(align)!=align)
+        return EINVAL;
+    void* ret = box_memalign(align, size);
+    if(!ret)
+        return ENOMEM;
+    *p = ret;
+    return 0;
+}
+
+EXPORT void* valloc(size_t size)
+{
+    return box_memalign(box64_pagesize, size);
+}
+
+EXPORT void* pvalloc(size_t size)
+{
+    return box_memalign(box64_pagesize, (size+box64_pagesize-1)&~(box64_pagesize-1));
+}
+
+EXPORT void cfree(void* p)
+{
+    box_free(p);
+}
+
+EXPORT size_t malloc_usable_size(void* p)
+{
+    return box_malloc_usable_size(p);
+}
+
+EXPORT void* my__Znwm(size_t sz)   //operator new(size_t)
+{
+    return box_malloc(sz);
+}
+
+EXPORT void* my__ZnwmRKSt9nothrow_t(size_t sz, void* p)   //operator new(size_t, std::nothrow_t const&)
+{
+    return box_malloc(sz);
+}
+
+EXPORT void* my__Znam(size_t sz)   //operator new[](size_t)
+{
+    return box_malloc(sz);
+}
+
+EXPORT void* my__ZnamRKSt9nothrow_t(size_t sz, void* p)   //operator new[](size_t, std::nothrow_t const&)
+{
+    return box_malloc(sz);
+}
+
+
+EXPORT void my__ZdaPv(void* p)   //operator delete[](void*)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdaPvm(void* p, size_t sz)   //operator delete[](void*, size_t)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdaPvmSt11align_val_t(void* p, size_t sz, size_t align)   //operator delete[](void*, unsigned long, std::align_val_t)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdlPv(void* p)   //operator delete(void*)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdlPvm(void* p, size_t sz)   //operator delete(void*, size_t)
+{
+    box_free(p);
+}
+
+EXPORT void* my__ZnwmSt11align_val_t(size_t sz, size_t align)  //// operator new(unsigned long, std::align_val_t)
+{
+    return box_memalign(align, sz);
+}
+
+EXPORT void* my__ZnwmSt11align_val_tRKSt9nothrow_t(size_t sz, size_t align, void* p)  //// operator new(unsigned long, std::align_val_t, std::nothrow_t const&)
+{
+    return box_memalign(align, sz);
+}
+
+EXPORT void* my__ZnamSt11align_val_t(size_t sz, size_t align)  //// operator new[](unsigned long, std::align_val_t)
+{
+    return box_memalign(align, sz);
+}
+
+EXPORT void* my__ZnamSt11align_val_tRKSt9nothrow_t(size_t sz, size_t align, void* p)  //// operator new[](unsigned long, std::align_val_t, std::nothrow_t const&)
+{
+    return box_memalign(align, sz);
+}
+
+EXPORT void my__ZdlPvRKSt9nothrow_t(void* p, void* n)   //operator delete(void*, std::nothrow_t const&)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdaPvSt11align_val_tRKSt9nothrow_t(void* p, size_t align, void* n)   //operator delete[](void*, std::align_val_t, std::nothrow_t const&)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdlPvmSt11align_val_t(void* p, size_t sz, size_t align)   //operator delete(void*, unsigned long, std::align_val_t)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdaPvRKSt9nothrow_t(void* p, void* n)   //operator delete[](void*, std::nothrow_t const&)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdaPvSt11align_val_t(void* p, size_t align)   //operator delete[](void*, std::align_val_t)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdlPvSt11align_val_t(void* p, size_t align)   //operator delete(void*, std::align_val_t)
+{
+    box_free(p);
+}
+
+EXPORT void my__ZdlPvSt11align_val_tRKSt9nothrow_t(void* p, size_t align, void* n)   //operator delete(void*, std::align_val_t, std::nothrow_t const&)
+{
+    box_free(p);
+}
+
+EXPORT void* my_tc_calloc(size_t n, size_t s)
+{
+    return box_calloc(n, s);
+}
+
+EXPORT void my_tc_cfree(void* p)
+{
+    box_free(p);
+}
+
+EXPORT void my_tc_delete(void* p)
+{
+    box_free(p);
+}
+
+EXPORT void my_tc_deletearray(void* p)
+{
+    box_free(p);
+}
+
+EXPORT void my_tc_deletearray_nothrow(void* p, void* n)
+{
+    box_free(p);
+}
+
+EXPORT void my_tc_delete_nothrow(void* p, void* n)
+{
+    box_free(p);
+}
+
+EXPORT void my_tc_free(void* p)
+{
+    box_free(p);
+}
+
+EXPORT void* my_tc_malloc(size_t s)
+{
+    return box_calloc(1, s);
+}
+
+EXPORT size_t my_tc_malloc_size(void* p)
+{
+    return box_malloc_usable_size(p);
+}
+
+EXPORT void* my_tc_new(size_t s)
+{
+    return box_calloc(1, s);
+}
+
+EXPORT void* my_tc_new_nothrow(size_t s, void* n)
+{
+        return box_calloc(1, s);
+}
+
+EXPORT void* my_tc_newarray(size_t s)
+{
+        return box_calloc(1, s);
+}
+
+EXPORT void* my_tc_newarray_nothrow(size_t s, void* n)
+{
+        return box_calloc(1, s);
+}
+
+EXPORT void* my_tc_pvalloc(size_t size)
+{
+    return box_memalign(box64_pagesize, (size+box64_pagesize-1)&~(box64_pagesize-1));
+}
+
+EXPORT void* my_tc_valloc(size_t size)
+{
+    return box_memalign(box64_pagesize, size);
+}
+
+EXPORT void* my_tc_memalign(size_t align, size_t size)
+{
+    return box_memalign(align, size);
+}
+
+EXPORT void* my_tc_malloc_skip_new_handler_weak(size_t s)
+{
+    return box_calloc(1, s);
+}
+
+EXPORT int my_tc_mallocopt(int param, int value)
+{
+    // ignoring...
+    return 1;
+}
+
+EXPORT void my_tc_malloc_stats()
+{
+    // ignoring
+}
+/*
+EXPORT int my_tc_set_new_mode(int mode)
+{
+    // ignoring
+    static int old = 0;
+    int ret = old;
+    old = mode;
+    return ret;
+}
+*/
+EXPORT void* my_tc_malloc_skip_new_handler(size_t s)
+{
+    return box_calloc(1, s);
+}
+
+EXPORT void* my_tc_mallinfo(void)
+{
+    // ignored, returning null stuffs
+    static size_t faked[10] = {0};
+    return faked;
+}
+
+EXPORT int my_tc_posix_memalign(void** p, size_t align, size_t size)
+{
+    if(align%sizeof(void*) || pot(align)!=align)
+        return EINVAL;
+    void* ret = box_memalign(align, size);
+    if(!ret)
+        return ENOMEM;
+    *p = ret;
+    return 0;
+}
+
+EXPORT void* my_tc_realloc(void* p, size_t s)
+{
+    return box_realloc(p, s);
+}
+/*
+EXPORT int my_tc_version(int i)
+{
+    return 2;
+}
+*/
+
+
+
+
+#pragma pack(push, 1)
+typedef struct reloc_jmp_s {
+    uint8_t _ff;
+    uint8_t _25;
+    uint32_t _00;
+    void* addr;
+} reloc_jmp_t;
+typedef struct simple_jmp_s {
+    uint8_t _e9;
+    uint32_t delta;
+} simple_jmp_t;
+#pragma pack(pop)
+
+static void addRelocJmp(void* offs, void* where, size_t size, const char* name)
+{
+    reloc_jmp_t r_jmp = {0};
+    simple_jmp_t s_jmp = {0};
+    size_t sz = 0;
+    intptr_t off64 = (intptr_t)where - ((intptr_t)offs+5);
+    void* p = NULL;
+    int32_t off32 = (int32_t)off64;
+    if(off32 == off64) {
+        s_jmp._e9 = 0xe9;
+        s_jmp.delta = (uint32_t)off32;
+        p = &s_jmp;
+        sz = sizeof(s_jmp);
+    } else {
+        r_jmp._ff = 0xff;
+        r_jmp._25 = 0x25;
+        r_jmp.addr = where;
+        p = &r_jmp;
+        sz = sizeof(r_jmp);
+    }
+    if(size>=sz)
+        memcpy(offs, p, sz);
+    else {
+        printf_log(LOG_INFO, "Warning, cannot redirect %s, too small %zu vs %zu\n", name, size, sz);
+    }
+}
+
+void checkHookedSymbols(lib_t *maplib, elfheader_t* h)
+{
+    int hooked = 0;
+    for (size_t i=0; i<h->numDynSym && hooked<2; ++i) {
+        const char * symname = h->DynStr+h->DynSym[i].st_name;
+        int bind = ELF64_ST_BIND(h->DynSym[i].st_info);
+        int type = ELF64_ST_TYPE(h->DynSym[i].st_info);
+        int vis = h->DynSym[i].st_other&0x3;
+        if((type==STT_FUNC) 
+        && (vis==STV_DEFAULT || vis==STV_PROTECTED) && (h->DynSym[i].st_shndx!=0 && h->DynSym[i].st_shndx<=65521)) {
+            uintptr_t offs = h->DynSym[i].st_value + h->delta;
+            size_t sz = h->DynSym[i].st_size;
+            if(bind!=STB_LOCAL && bind!=STB_WEAK && sz>=sizeof(reloc_jmp_t)) {
+                #define GO(A, B) if(!strcmp(symname, #A)) ++hooked;
+                #define GO2(A, B)
+                SUPER()
+                #undef GO
+                #undef GO2
+            }
+        }
+    }
+    if(hooked<2)
+        return; // only redirect on lib that hooked / redefined the operators
+    printf_log(LOG_INFO, "Redirecting overriden malloc function for %s\n", ElfName(h));
+    for (size_t i=0; i<h->numDynSym; ++i) {
+        const char * symname = h->DynStr+h->DynSym[i].st_name;
+        int bind = ELF64_ST_BIND(h->DynSym[i].st_info);
+        int type = ELF64_ST_TYPE(h->DynSym[i].st_info);
+        int vis = h->DynSym[i].st_other&0x3;
+        if((type==STT_FUNC) 
+        && (vis==STV_DEFAULT || vis==STV_PROTECTED) && (h->DynSym[i].st_shndx!=0 && h->DynSym[i].st_shndx<=65521)) {
+            uintptr_t offs = h->DynSym[i].st_value + h->delta;
+            size_t sz = h->DynSym[i].st_size;
+            if(bind!=STB_LOCAL && bind!=STB_WEAK) {
+                #define GO(A, B) if(!strcmp(symname, "__libc_" #A)) {uintptr_t alt = AddCheckBridge(my_context->system, B, A, 0, #A); printf_log(LOG_DEBUG, "Redirecting %s function from %p (%s)\n", symname, (void*)offs, ElfName(h)); addRelocJmp((void*)offs, (void*)alt, sz, #A);}
+                #define GO2(A, B)
+                SUPER()
+                #undef GO
+                #undef GO2
+                #define GO(A, B) if(!strcmp(symname, #A)) {uintptr_t alt = AddCheckBridge(my_context->system, B, A, 0, #A); printf_log(LOG_DEBUG, "Redirecting %s function from %p (%s)\n", symname, (void*)offs, ElfName(h)); addRelocJmp((void*)offs, (void*)alt, sz, #A);}
+                #define GO2(A, B) if(!strcmp(symname, #A)) {uintptr_t alt = AddCheckBridge(my_context->system, B, my_##A, 0, #A); printf_log(LOG_DEBUG, "Redirecting %s function from %p (%s)\n", symname, (void*)offs, ElfName(h)); addRelocJmp((void*)offs, (void*)alt, sz, #A);}
+                SUPER()
+                #undef GO
+                #undef GO2
+            }
+        }
+    }
+}
 
 void init_malloc_hook() {
+#ifdef ANDROID
     __libc_malloc = dlsym(RTLD_NEXT, "malloc");
     __libc_realloc = dlsym(RTLD_NEXT, "realloc");
-    __libc_calloc = dlsym(RTLD_NEXT, "realloc");
+    __libc_calloc = dlsym(RTLD_NEXT, "calloc");
     __libc_free = dlsym(RTLD_NEXT, "free");
     __libc_memalign = dlsym(RTLD_NEXT, "memalign");
-}
 #endif
+    box_malloc_usable_size = dlsym(RTLD_NEXT, "malloc_usable_size");
+    #if 0
+    #define GO(A, B)
+    #define GO2(A, B)   box_##A = (B##_t)dlsym(RTLD_NEXT, #A); if(box_##A == (B##_t)A) box_##A = NULL;
+    SUPER()
+    #undef GO2
+    #undef GO
+    #endif
+}
 
-char* box_strdup(const char* s) {
-    char* ret = box_calloc(1, strlen(s)+1);
-    memcpy(ret, s, strlen(s));
-    return ret;
-}
\ No newline at end of file
+#undef SUPER
\ No newline at end of file
diff --git a/src/tools/callback.c b/src/tools/callback.c
index 0d9b0c5d..d3d5101e 100755
--- a/src/tools/callback.c
+++ b/src/tools/callback.c
@@ -56,6 +56,68 @@ uint64_t RunFunction(box64context_t *context, uintptr_t fnc, int nargs, ...)
 }
 
 EXPORTDYN
+uint64_t RunSafeFunction(box64context_t *context, uintptr_t fnc, int nargs, ...)
+{
+    (void)context;
+
+    x64emu_t *emu = thread_get_emu();
+    int align = (nargs>6)?(((nargs-6)&1)):0;
+    int stackn = align + ((nargs>6)?(nargs-6):0);
+
+    Push64(emu, R_RBP); // push rbp
+    uintptr_t old_rbp = R_RBP = R_RSP;      // mov rbp, rsp
+
+    Push64(emu, R_RDI);
+    Push64(emu, R_RSI);
+    Push64(emu, R_RDX);
+    Push64(emu, R_RCX);
+    Push64(emu, R_R8);
+    Push64(emu, R_R9);
+    Push64(emu, R_R10);
+    Push64(emu, R_R11);
+    Push64(emu, R_RAX);
+
+    R_RSP -= stackn*sizeof(void*);   // need to push in reverse order
+
+    uint64_t *p = (uint64_t*)R_RSP;
+
+    va_list va;
+    va_start (va, nargs);
+    for (int i=0; i<nargs; ++i) {
+        if(i<6) {
+            int nn[] = {_DI, _SI, _DX, _CX, _R8, _R9};
+            emu->regs[nn[i]].q[0] = va_arg(va, uint64_t);
+        } else {
+            *p = va_arg(va, uint64_t);
+            p++;
+        }
+    }
+    va_end (va);
+
+    uintptr_t oldip = R_RIP;
+    DynaCall(emu, fnc);
+
+    uint64_t ret = R_RAX;
+    R_RIP = oldip;
+
+    R_RAX = Pop64(emu);
+    R_R11 = Pop64(emu);
+    R_R10 = Pop64(emu);
+    R_R9 = Pop64(emu);
+    R_R8 = Pop64(emu);
+    R_RCX = Pop64(emu);
+    R_RDX = Pop64(emu);
+    R_RSI = Pop64(emu);
+    R_RDI = Pop64(emu);
+
+    R_RSP = old_rbp;          // mov rsp, rbp
+    R_RBP = Pop64(emu);     // pop rbp
+    
+
+    return ret;
+}
+
+EXPORTDYN
 uint64_t RunFunctionWithEmu(x64emu_t *emu, int QuitOnLongJump, uintptr_t fnc, int nargs, ...)
 {
     int align = (nargs>6)?(((nargs-6)&1)):0;
diff --git a/src/tools/fileutils.c b/src/tools/fileutils.c
index 67e8c652..04905c04 100755
--- a/src/tools/fileutils.c
+++ b/src/tools/fileutils.c
@@ -57,11 +57,8 @@ char* ResolveFile(const char* filename, path_collection_t* paths)
         } else
             strcpy(p, paths->paths[i]);
         strcat(p, filename);
-        if(FileExist(p, IS_FILE)) {
-            char p2[MAX_PATH];
-            realpath(p, p2);
-            return box_strdup(p2);
-        }
+        if(FileExist(p, IS_FILE))
+            return box_realpath(p, NULL);
     }
 
     return box_strdup(filename); //NULL;
diff --git a/src/wrapped/generated/functions_list.txt b/src/wrapped/generated/functions_list.txt
index 682b0b98..e34f0a04 100644
--- a/src/wrapped/generated/functions_list.txt
+++ b/src/wrapped/generated/functions_list.txt
@@ -9,6 +9,7 @@
 #() vFL
 #() vFp
 #() vFS
+#() vFP
 #() cFv
 #() cFi
 #() cFu
@@ -535,6 +536,7 @@
 #() pFddd
 #() pFDip
 #() pFLup
+#() pFLLp
 #() pFpii
 #() pFpiu
 #() pFpid
@@ -2885,8 +2887,6 @@ wrappedlibc:
   - syscall
 - LFL:
   - getauxval
-- pFL:
-  - malloc
 - pFp:
   - __deregister_frame_info
   - mallinfo
@@ -3974,6 +3974,20 @@ wrappedsmpeg2:
   - SMPEG_new_rwops
 wrappedsoftokn3:
 wrappedssl3:
+wrappedtbbmallocproxy:
+- vFp:
+  - _ZdlPv
+- vFP:
+  - _ZdaPv
+- pFL:
+  - _Znam
+  - _Znwm
+- vFpp:
+  - _ZdaPvRKSt9nothrow_t
+  - _ZdlPvRKSt9nothrow_t
+- pFLp:
+  - _ZnamRKSt9nothrow_t
+  - _ZnwmRKSt9nothrow_t
 wrappedtcmallocminimal:
 - iFpL:
   - munmap
diff --git a/src/wrapped/generated/wrappedlibctypes.h b/src/wrapped/generated/wrappedlibctypes.h
index 4dfede09..9352b479 100644
--- a/src/wrapped/generated/wrappedlibctypes.h
+++ b/src/wrapped/generated/wrappedlibctypes.h
@@ -19,7 +19,6 @@ typedef int64_t (*iFi_t)(int64_t);
 typedef int64_t (*iFp_t)(void*);
 typedef intptr_t (*lFv_t)(void);
 typedef uintptr_t (*LFL_t)(uintptr_t);
-typedef void* (*pFL_t)(uintptr_t);
 typedef void* (*pFp_t)(void*);
 typedef void (*vFpi_t)(void*, int64_t);
 typedef void (*vFpu_t)(void*, uint64_t);
@@ -107,7 +106,6 @@ typedef int64_t (*iFppipppp_t)(void*, void*, int64_t, void*, void*, void*, void*
 	GO(uname, iFp_t) \
 	GO(syscall, lFv_t) \
 	GO(getauxval, LFL_t) \
-	GO(malloc, pFL_t) \
 	GO(__deregister_frame_info, pFp_t) \
 	GO(mallinfo, pFp_t) \
 	GO(__longjmp_chk, vFpi_t) \
diff --git a/src/wrapped/generated/wrappedtbbmallocproxydefs.h b/src/wrapped/generated/wrappedtbbmallocproxydefs.h
new file mode 100644
index 00000000..57ef66fe
--- /dev/null
+++ b/src/wrapped/generated/wrappedtbbmallocproxydefs.h
@@ -0,0 +1,8 @@
+/*******************************************************************
+ * File automatically generated by rebuild_wrappers.py (v2.1.0.16) *
+ *******************************************************************/
+#ifndef __wrappedtbbmallocproxyDEFS_H_
+#define __wrappedtbbmallocproxyDEFS_H_
+
+
+#endif // __wrappedtbbmallocproxyDEFS_H_
diff --git a/src/wrapped/generated/wrappedtbbmallocproxytypes.h b/src/wrapped/generated/wrappedtbbmallocproxytypes.h
new file mode 100644
index 00000000..f26554a8
--- /dev/null
+++ b/src/wrapped/generated/wrappedtbbmallocproxytypes.h
@@ -0,0 +1,30 @@
+/*******************************************************************
+ * File automatically generated by rebuild_wrappers.py (v2.1.0.16) *
+ *******************************************************************/
+#ifndef __wrappedtbbmallocproxyTYPES_H_
+#define __wrappedtbbmallocproxyTYPES_H_
+
+#ifndef LIBNAME
+#error You should only #include this file inside a wrapped*.c file
+#endif
+#ifndef ADDED_FUNCTIONS
+#define ADDED_FUNCTIONS() 
+#endif
+
+typedef void (*vFp_t)(void*);
+typedef void (*vFP_t)(void*);
+typedef void* (*pFL_t)(uintptr_t);
+typedef void (*vFpp_t)(void*, void*);
+typedef void* (*pFLp_t)(uintptr_t, void*);
+
+#define SUPER() ADDED_FUNCTIONS() \
+	GO(_ZdlPv, vFp_t) \
+	GO(_ZdaPv, vFP_t) \
+	GO(_Znam, pFL_t) \
+	GO(_Znwm, pFL_t) \
+	GO(_ZdaPvRKSt9nothrow_t, vFpp_t) \
+	GO(_ZdlPvRKSt9nothrow_t, vFpp_t) \
+	GO(_ZnamRKSt9nothrow_t, pFLp_t) \
+	GO(_ZnwmRKSt9nothrow_t, pFLp_t)
+
+#endif // __wrappedtbbmallocproxyTYPES_H_
diff --git a/src/wrapped/generated/wrappedtbbmallocproxyundefs.h b/src/wrapped/generated/wrappedtbbmallocproxyundefs.h
new file mode 100644
index 00000000..ed09cf65
--- /dev/null
+++ b/src/wrapped/generated/wrappedtbbmallocproxyundefs.h
@@ -0,0 +1,8 @@
+/*******************************************************************
+ * File automatically generated by rebuild_wrappers.py (v2.1.0.16) *
+ *******************************************************************/
+#ifndef __wrappedtbbmallocproxyUNDEFS_H_
+#define __wrappedtbbmallocproxyUNDEFS_H_
+
+
+#endif // __wrappedtbbmallocproxyUNDEFS_H_
diff --git a/src/wrapped/generated/wrapper.c b/src/wrapped/generated/wrapper.c
index 2301dbf9..5a65456b 100644
--- a/src/wrapped/generated/wrapper.c
+++ b/src/wrapped/generated/wrapper.c
@@ -43,6 +43,7 @@ typedef void (*vFl_t)(intptr_t);
 typedef void (*vFL_t)(uintptr_t);
 typedef void (*vFp_t)(void*);
 typedef void (*vFS_t)(void*);
+typedef void (*vFP_t)(void*);
 typedef int8_t (*cFv_t)(void);
 typedef int8_t (*cFi_t)(int64_t);
 typedef int8_t (*cFu_t)(uint64_t);
@@ -569,6 +570,7 @@ typedef void* (*pFdUU_t)(double, uint64_t, uint64_t);
 typedef void* (*pFddd_t)(double, double, double);
 typedef void* (*pFDip_t)(long double, int64_t, void*);
 typedef void* (*pFLup_t)(uintptr_t, uint64_t, void*);
+typedef void* (*pFLLp_t)(uintptr_t, uintptr_t, void*);
 typedef void* (*pFpii_t)(void*, int64_t, int64_t);
 typedef void* (*pFpiu_t)(void*, int64_t, uint64_t);
 typedef void* (*pFpid_t)(void*, int64_t, double);
@@ -2237,6 +2239,7 @@ void vFl(x64emu_t *emu, uintptr_t fcn) { vFl_t fn = (vFl_t)fcn; fn((intptr_t)R_R
 void vFL(x64emu_t *emu, uintptr_t fcn) { vFL_t fn = (vFL_t)fcn; fn((uintptr_t)R_RDI); }
 void vFp(x64emu_t *emu, uintptr_t fcn) { vFp_t fn = (vFp_t)fcn; fn((void*)R_RDI); }
 void vFS(x64emu_t *emu, uintptr_t fcn) { vFS_t fn = (vFS_t)fcn; fn(io_convert((void*)R_RDI)); }
+void vFP(x64emu_t *emu, uintptr_t fcn) { vFP_t fn = (vFP_t)fcn; fn(*(void**)(R_RSP + 8)); }
 void cFv(x64emu_t *emu, uintptr_t fcn) { cFv_t fn = (cFv_t)fcn; R_RAX=fn(); }
 void cFi(x64emu_t *emu, uintptr_t fcn) { cFi_t fn = (cFi_t)fcn; R_RAX=fn((int64_t)R_RDI); }
 void cFu(x64emu_t *emu, uintptr_t fcn) { cFu_t fn = (cFu_t)fcn; R_RAX=fn((uint64_t)R_RDI); }
@@ -2763,6 +2766,7 @@ void pFdUU(x64emu_t *emu, uintptr_t fcn) { pFdUU_t fn = (pFdUU_t)fcn; R_RAX=(uin
 void pFddd(x64emu_t *emu, uintptr_t fcn) { pFddd_t fn = (pFddd_t)fcn; R_RAX=(uintptr_t)fn(emu->xmm[0].d[0], emu->xmm[1].d[0], emu->xmm[2].d[0]); }
 void pFDip(x64emu_t *emu, uintptr_t fcn) { pFDip_t fn = (pFDip_t)fcn; R_RAX=(uintptr_t)fn(LD2localLD((void*)(R_RSP + 8)), (int64_t)R_RDI, (void*)R_RSI); }
 void pFLup(x64emu_t *emu, uintptr_t fcn) { pFLup_t fn = (pFLup_t)fcn; R_RAX=(uintptr_t)fn((uintptr_t)R_RDI, (uint64_t)R_RSI, (void*)R_RDX); }
+void pFLLp(x64emu_t *emu, uintptr_t fcn) { pFLLp_t fn = (pFLLp_t)fcn; R_RAX=(uintptr_t)fn((uintptr_t)R_RDI, (uintptr_t)R_RSI, (void*)R_RDX); }
 void pFpii(x64emu_t *emu, uintptr_t fcn) { pFpii_t fn = (pFpii_t)fcn; R_RAX=(uintptr_t)fn((void*)R_RDI, (int64_t)R_RSI, (int64_t)R_RDX); }
 void pFpiu(x64emu_t *emu, uintptr_t fcn) { pFpiu_t fn = (pFpiu_t)fcn; R_RAX=(uintptr_t)fn((void*)R_RDI, (int64_t)R_RSI, (uint64_t)R_RDX); }
 void pFpid(x64emu_t *emu, uintptr_t fcn) { pFpid_t fn = (pFpid_t)fcn; R_RAX=(uintptr_t)fn((void*)R_RDI, (int64_t)R_RSI, emu->xmm[0].d[0]); }
@@ -4886,6 +4890,7 @@ int isSimpleWrapper(wrapper_t fun) {
 	if (fun == &pFdUU) return 2;
 	if (fun == &pFddd) return 4;
 	if (fun == &pFLup) return 1;
+	if (fun == &pFLLp) return 1;
 	if (fun == &pFpii) return 1;
 	if (fun == &pFpiu) return 1;
 	if (fun == &pFpid) return 2;
diff --git a/src/wrapped/generated/wrapper.h b/src/wrapped/generated/wrapper.h
index 149ba730..ad572e3a 100644
--- a/src/wrapped/generated/wrapper.h
+++ b/src/wrapped/generated/wrapper.h
@@ -42,6 +42,7 @@ void vFl(x64emu_t *emu, uintptr_t fnc);
 void vFL(x64emu_t *emu, uintptr_t fnc);
 void vFp(x64emu_t *emu, uintptr_t fnc);
 void vFS(x64emu_t *emu, uintptr_t fnc);
+void vFP(x64emu_t *emu, uintptr_t fnc);
 void cFv(x64emu_t *emu, uintptr_t fnc);
 void cFi(x64emu_t *emu, uintptr_t fnc);
 void cFu(x64emu_t *emu, uintptr_t fnc);
@@ -568,6 +569,7 @@ void pFdUU(x64emu_t *emu, uintptr_t fnc);
 void pFddd(x64emu_t *emu, uintptr_t fnc);
 void pFDip(x64emu_t *emu, uintptr_t fnc);
 void pFLup(x64emu_t *emu, uintptr_t fnc);
+void pFLLp(x64emu_t *emu, uintptr_t fnc);
 void pFpii(x64emu_t *emu, uintptr_t fnc);
 void pFpiu(x64emu_t *emu, uintptr_t fnc);
 void pFpid(x64emu_t *emu, uintptr_t fnc);
diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c
index f5d8c8b2..690fa66f 100755
--- a/src/wrapped/wrappedlibc.c
+++ b/src/wrapped/wrappedlibc.c
@@ -2949,7 +2949,7 @@ EXPORT char* my_program_invocation_short_name = NULL;
 EXPORT char my___libc_single_threaded = 0;
 
 #define PRE_INIT\
-    if(box64_tcmalloc_minimal)                                      \
+    if(1)                                                      \
         lib->w.lib = dlopen(NULL, RTLD_LAZY | RTLD_GLOBAL);    \
     else
 
@@ -2964,7 +2964,7 @@ EXPORT char my___libc_single_threaded = 0;
     my___progname = my_program_invocation_short_name =                          \
         strrchr(box64->argv[0], '/') + 1;                                       \
     getMy(lib);                                                                 \
-    setNeededLibs(lib, 3,                                              \
+    setNeededLibs(lib, 3,                                                       \
         "ld-linux-x86-64.so.2",                                                 \
         "libpthread.so.0",                                                      \
         "librt.so.1");
diff --git a/src/wrapped/wrappedlibc_private.h b/src/wrapped/wrappedlibc_private.h
index 8a3a355a..066596b4 100755
--- a/src/wrapped/wrappedlibc_private.h
+++ b/src/wrapped/wrappedlibc_private.h
@@ -110,7 +110,7 @@ GO(catopen, pFpi)
 GO(cfgetispeed, uFp)
 GO(cfgetospeed, uFp)
 GO(cfmakeraw, vFp)
-GO2(cfree, vFp, free)
+GO(cfree, vFp)
 GO(cfsetispeed, iFpu)
 GO(cfsetospeed, iFpu)
 GO(cfsetspeed, iFpu)
@@ -1116,7 +1116,7 @@ GO(__madvise, iFpLi)
 GOW(madvise, iFpLi)
 GOWM(makecontext, iFEppiV)
 GOWM(mallinfo, pFEp)
-GOM(malloc, pFL) // need to wrap to clear allocated memory?
+GO(malloc, pFL) // need to wrap to clear allocated memory?
 //GO(malloc_get_state, // Obsolete
 GOW(malloc_info, iFip)
 //GO(malloc_set_state, // Obsolete
@@ -2319,3 +2319,4 @@ DATAM(program_invocation_short_name, sizeof(void*))
 DATAM(__libc_single_threaded, 1)
 
 GO(iconvctl, iFlip)
+GO(dummy__ZnwmSt11align_val_tRKSt9nothrow_t, pFLLp) // for mallochook.c
\ No newline at end of file
diff --git a/src/wrapped/wrappedtbbmallocproxy.c b/src/wrapped/wrappedtbbmallocproxy.c
new file mode 100644
index 00000000..af95d93b
--- /dev/null
+++ b/src/wrapped/wrappedtbbmallocproxy.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <dlfcn.h>
+
+#include "wrappedlibs.h"
+
+#include "wrapper.h"
+#include "bridge.h"
+#include "librarian/library_private.h"
+#include "x64emu.h"
+
+// Fake the lib for now, don't load it
+const char* tbbmallocproxyName = "libtbbmalloc_proxy.so.2";
+#define LIBNAME tbbmallocproxy
+
+// this preinit basically open "box64" as dlopen
+#define PRE_INIT\
+    lib->w.lib = dlopen(NULL, RTLD_LAZY | RTLD_GLOBAL);    \
+    if(0)
+
+#include "wrappedlib_init.h"
diff --git a/src/wrapped/wrappedtbbmallocproxy_private.h b/src/wrapped/wrappedtbbmallocproxy_private.h
new file mode 100644
index 00000000..bf1eefa9
--- /dev/null
+++ b/src/wrapped/wrappedtbbmallocproxy_private.h
@@ -0,0 +1,34 @@
+#if !(defined(GO) && defined(GOM) && defined(GO2) && defined(DATA))
+#error Meh...
+#endif
+
+//GO(calloc, 
+//GO(_fini, 
+//GO(free, 
+//GO(_init, 
+//GO(__libc_calloc, 
+//GO(__libc_free, 
+//GO(__libc_malloc, 
+//GO(__libc_memalign, 
+//GO(__libc_pvalloc, 
+//GO(__libc_realloc, 
+//GO(__libc_valloc, 
+//GO(mallinfo, 
+//GO(malloc, 
+//GO(malloc_usable_size, 
+//GO(mallopt, 
+//GO(memalign, 
+//GO(posix_memalign, 
+//GO(pvalloc, 
+//GO(realloc, 
+//GO(__TBB_internal_find_original_malloc, 
+//GO(__TBB_malloc_proxy, 
+//GO(valloc, 
+GOM(_ZdaPv, vFP)                //%noE
+GOM(_ZdaPvRKSt9nothrow_t, vFpp) //%noE
+GOM(_ZdlPv, vFp)                //%noE
+GOM(_ZdlPvRKSt9nothrow_t, vFpp) //%noE
+GOM(_Znam, pFL)                 //%noE
+GOM(_ZnamRKSt9nothrow_t, pFLp)  //%noE
+GOM(_Znwm, pFL)                 //%noE
+GOM(_ZnwmRKSt9nothrow_t, pFLp)  //%noE