about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-02 11:45:50 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-02 11:45:50 +0100
commit997b5c6b50b9263b06fb0a21dd6c2e727ce3e4a1 (patch)
treebc488d342c0dc3827448de0c72b761d8e16727f5 /src
parentbe92787329b59cf0f2202ca7623055372df0d90f (diff)
downloadbox64-997b5c6b50b9263b06fb0a21dd6c2e727ce3e4a1.tar.gz
box64-997b5c6b50b9263b06fb0a21dd6c2e727ce3e4a1.zip
Added some x86_64 regs and emu infrastructure
Diffstat (limited to 'src')
-rwxr-xr-xsrc/emu/x64emu.c454
-rwxr-xr-xsrc/emu/x64emu_private.h93
-rwxr-xr-xsrc/emu/x64int3.c312
-rwxr-xr-xsrc/emu/x64run_private.h236
-rwxr-xr-xsrc/emu/x87emu_private.c307
-rwxr-xr-xsrc/emu/x87emu_private.h201
-rwxr-xr-xsrc/include/box64context.h7
-rwxr-xr-xsrc/include/regs.h290
-rwxr-xr-xsrc/include/x64emu.h63
-rwxr-xr-xsrc/include/x64run.h14
-rwxr-xr-xsrc/main.c3
11 files changed, 1980 insertions, 0 deletions
diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c
new file mode 100755
index 00000000..e2cffe4a
--- /dev/null
+++ b/src/emu/x64emu.c
@@ -0,0 +1,454 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+
+#include "debug.h"
+#include "box64stack.h"
+#include "x64emu.h"
+#include "x64emu_private.h"
+#include "x87emu_private.h"
+#include "box64context.h"
+#include "x64run.h"
+#include "x64run_private.h"
+//#include "callback.h"
+//#include "bridge.h"
+#ifdef DYNAREC
+#include "custommem.h"
+#endif
+
+typedef struct cleanup_s {
+    void*       f;
+    int         arg;
+    void*       a;
+} cleanup_t;
+
+static uint32_t x86emu_parity_tab[8] =
+{
+	0x96696996,
+	0x69969669,
+	0x69969669,
+	0x96696996,
+	0x69969669,
+	0x96696996,
+	0x96696996,
+	0x69969669,
+};
+
+uint32_t* GetParityTab()
+{
+    return x86emu_parity_tab;
+}
+
+//void PushExit(x64emu_t* emu)
+//{
+//    uintptr_t endMarker = AddCheckBridge(my_context->system, NULL, NULL, 0);
+//    Push(emu, endMarker);
+//}
+
+//void* GetExit()
+//{
+//    return (void*)AddCheckBridge(my_context->system, NULL, NULL, 0);
+//}
+
+static void internalX64Setup(x64emu_t* emu, box64context_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack)
+{
+    emu->context = context;
+    // setup cpu helpers
+    for (int i=0; i<8; ++i)
+        emu->sbiidx[i] = &emu->regs[i];
+    emu->sbiidx[4] = &emu->zero;
+    emu->x86emu_parity_tab = x86emu_parity_tab;
+    emu->eflags.x32 = 0x202; // default flags?
+    // own stack?
+    emu->stack2free = (ownstack)?(void*)stack:NULL;
+    emu->init_stack = (void*)stack;
+    emu->size_stack = stacksize;
+    // set default value
+    R_RIP = start;
+    R_RSP = (stack + stacksize) & ~7;   // align stack start, always
+    // fake init of segments...
+    emu->segs[_CS] = 0x73;
+    emu->segs[_DS] = emu->segs[_ES] = emu->segs[_SS] = 0x7b;
+    emu->segs[_FS] = default_fs;
+    emu->segs[_GS] = 0x33;
+    // setup fpu regs
+    reset_fpu(emu);
+}
+
+EXPORTDYN
+x64emu_t *NewX64Emu(box64context_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack)
+{
+    printf_log(LOG_DEBUG, "Allocate a new X86_64 Emu, with EIP=%p and Stack=%p/0x%X\n", (void*)start, (void*)stack, stacksize);
+
+    x64emu_t *emu = (x64emu_t*)calloc(1, sizeof(x64emu_t));
+
+    internalX64Setup(emu, context, start, stack, stacksize, ownstack);
+
+    return emu;
+}
+
+x64emu_t *NewX64EmuFromStack(x64emu_t* emu, box64context_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack)
+{
+    printf_log(LOG_DEBUG, "New X86_64 Emu from stack, with EIP=%p and Stack=%p/0x%X\n", (void*)start, (void*)stack, stacksize);
+
+    internalX64Setup(emu, context, start, stack, stacksize, ownstack);
+    
+    return emu;
+}
+
+EXPORTDYN
+void SetupX86Emu(x64emu_t *emu)
+{
+    printf_log(LOG_DEBUG, "Setup X86_64 Emu\n");
+}
+
+void SetTraceEmu(uintptr_t start, uintptr_t end)
+{
+//    if(my_context->zydis) {
+//        if (end == 0) {
+//            printf_log(LOG_INFO, "Setting trace\n");
+//        } else {
+//            if(end!=1) {  // 0-1 is basically no trace, so don't printf it...
+//                printf_log(LOG_INFO, "Setting trace only between %p and %p\n", (void*)start, (void*)end);
+//            }
+//        }
+//    }
+    trace_start = start;
+    trace_end = end;
+}
+
+void AddCleanup(x64emu_t *emu, void *p)
+{
+    if(my_context->clean_sz == my_context->clean_cap) {
+        my_context->clean_cap += 4;
+        my_context->cleanups = (cleanup_t*)realloc(my_context->cleanups, sizeof(cleanup_t)*my_context->clean_cap);
+    }
+    my_context->cleanups[my_context->clean_sz].arg = 0;
+    my_context->cleanups[my_context->clean_sz].a = NULL;
+    my_context->cleanups[my_context->clean_sz++].f = p;
+}
+
+void AddCleanup1Arg(x64emu_t *emu, void *p, void* a)
+{
+    if(my_context->clean_sz == my_context->clean_cap) {
+        my_context->clean_cap += 4;
+        my_context->cleanups = (cleanup_t*)realloc(my_context->cleanups, sizeof(cleanup_t)*my_context->clean_cap);
+    }
+    my_context->cleanups[my_context->clean_sz].arg = 1;
+    my_context->cleanups[my_context->clean_sz].a = a;
+    my_context->cleanups[my_context->clean_sz++].f = p;
+}
+
+//void CallCleanup(x64emu_t *emu, void* p)
+//{
+//    printf_log(LOG_DEBUG, "Calling atexit registered functions for %p mask\n", p);
+//    for(int i=my_context->clean_sz-1; i>=0; --i) {
+//        if(p==my_context->cleanups[i].f) {
+//            printf_log(LOG_DEBUG, "Call cleanup #%d\n", i);
+//            RunFunctionWithEmu(emu, 0, (uintptr_t)(my_context->cleanups[i].f), my_context->cleanups[i].arg, my_context->cleanups[i].a );
+//            // now remove the cleanup
+//            if(i!=my_context->clean_sz-1)
+//                memmove(my_context->cleanups+i, my_context->cleanups+i+1, (my_context->clean_sz-i-1)*sizeof(cleanup_t));
+//            --my_context->clean_sz;
+//        }
+//    }
+//}
+
+//void CallAllCleanup(x64emu_t *emu)
+//{
+//    printf_log(LOG_DEBUG, "Calling atexit registered functions\n");
+//    for(int i=my_context->clean_sz-1; i>=0; --i) {
+//        printf_log(LOG_DEBUG, "Call cleanup #%d\n", i);
+//        RunFunctionWithEmu(emu, 0, (uintptr_t)(my_context->cleanups[i].f), my_context->cleanups[i].arg, my_context->cleanups[i].a );
+//    }
+//    my_context->clean_sz = 0;
+//    free(my_context->cleanups);
+//    my_context->cleanups = NULL;
+//}
+
+static void internalFreeX64(x64emu_t* emu)
+{
+    free(emu->stack2free);
+}
+
+EXPORTDYN
+void FreeX64Emu(x64emu_t **emu)
+{
+    if(!emu)
+        return;
+    printf_log(LOG_DEBUG, "%04d|Free a X86_64 Emu (%p)\n", GetTID(), *emu);
+
+    internalFreeX64(*emu);
+
+    free(*emu);
+    *emu = NULL;
+}
+
+void FreeX64EmuFromStack(x64emu_t **emu)
+{
+    if(!emu)
+        return;
+    printf_log(LOG_DEBUG, "%04d|Free a X86_64 Emu from stack (%p)\n", GetTID(), *emu);
+
+    internalFreeX64(*emu);
+}
+
+void CloneEmu(x64emu_t *newemu, const x64emu_t* emu)
+{
+	memcpy(newemu->regs, emu->regs, sizeof(emu->regs));
+    memcpy(&newemu->ip, &emu->ip, sizeof(emu->ip));
+	memcpy(&newemu->eflags, &emu->eflags, sizeof(emu->eflags));
+    newemu->old_ip = emu->old_ip;
+    memcpy(newemu->segs, emu->segs, sizeof(emu->segs));
+    memset(newemu->segs_serial, 0, sizeof(newemu->segs_serial));
+	memcpy(newemu->fpu, emu->fpu, sizeof(emu->fpu));
+    memcpy(newemu->fpu_ld, emu->fpu_ld, sizeof(emu->fpu_ld));
+    memcpy(newemu->fpu_ll, emu->fpu_ll, sizeof(emu->fpu_ll));
+	memcpy(newemu->p_regs, emu->p_regs, sizeof(emu->p_regs));
+	newemu->cw = emu->cw;
+    newemu->cw_mask_all = emu->cw_mask_all;
+	memcpy(&newemu->sw, &emu->sw, sizeof(emu->sw));
+	newemu->top = emu->top;
+    newemu->fpu_stack = emu->fpu_stack;
+	memcpy(&newemu->round, &emu->round, sizeof(emu->round));
+    memcpy(newemu->mmx, emu->mmx, sizeof(emu->mmx));
+    memcpy(newemu->xmm, emu->xmm, sizeof(emu->xmm));
+    newemu->mxcsr = emu->mxcsr;
+    newemu->quit = emu->quit;
+    newemu->error = emu->error;
+    // adapt R_RSP to new stack frame
+    uintptr_t oldst = (uintptr_t)((emu->init_stack)?emu->init_stack:emu->context->stack);
+    uintptr_t newst = (uintptr_t)((newemu->init_stack)?newemu->init_stack:newemu->context->stack);
+    newemu->regs[_SP].q[0] = emu->regs[_SP].q[0] + (intptr_t)(newst - oldst);
+}
+
+box64context_t* GetEmuContext(x64emu_t* emu)
+{
+    return emu->context;
+}
+
+uint32_t GetEAX(x64emu_t *emu)
+{
+    return R_EAX;
+}
+uint64_t GetRAX(x64emu_t *emu)
+{
+    return R_RAX;
+}
+void SetEAX(x64emu_t *emu, uint32_t v)
+{
+    R_EAX = v;
+}
+void SetEBX(x64emu_t *emu, uint32_t v)
+{
+    R_EBX = v;
+}
+void SetECX(x64emu_t *emu, uint32_t v)
+{
+    R_ECX = v;
+}
+void SetEDX(x64emu_t *emu, uint32_t v)
+{
+    R_EDX = v;
+}
+void SetESI(x64emu_t *emu, uint32_t v)
+{
+    R_ESI = v;
+}
+void SetEDI(x64emu_t *emu, uint32_t v)
+{
+    R_EDI = v;
+}
+void SetEBP(x64emu_t *emu, uint32_t v)
+{
+    R_EBP = v;
+}
+//void SetESP(x64emu_t *emu, uint32_t v)
+//{
+//    R_ESP = v;
+//}
+void SetRAX(x64emu_t *emu, uint64_t v)
+{
+    R_RAX = v;
+}
+void SetRBX(x64emu_t *emu, uint64_t v)
+{
+    R_RBX = v;
+}
+void SetRCX(x64emu_t *emu, uint64_t v)
+{
+    R_RCX = v;
+}
+void SetRDX(x64emu_t *emu, uint64_t v)
+{
+    R_RDX = v;
+}
+void SetRSI(x64emu_t *emu, uint64_t v)
+{
+    R_RSI = v;
+}
+void SetRDI(x64emu_t *emu, uint64_t v)
+{
+    R_RDI = v;
+}
+void SetRBP(x64emu_t *emu, uint64_t v)
+{
+    R_RBP = v;
+}
+void SetRSP(x64emu_t *emu, uint64_t v)
+{
+    R_RSP = v;
+}
+void SetRIP(x64emu_t *emu, uint64_t v)
+{
+    R_RIP = v;
+}
+uint64_t GetRSP(x64emu_t *emu)
+{
+    return R_RSP;
+}
+void SetFS(x64emu_t *emu, uint16_t v)
+{
+    emu->segs[_FS] = v;
+}
+uint16_t GetFS(x64emu_t *emu)
+{
+    return emu->segs[_FS];
+}
+
+
+void ResetFlags(x64emu_t *emu)
+{
+    emu->df = d_none;
+}
+
+const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip)
+{
+    static char buff[800];
+    char* regname[] = {"RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI", 
+                       "R8",  "R9",  "R10", "R11", "R12", "R13", "R14", "R15"};
+    char tmp[80];
+    buff[0] = '\0';
+    if(trace_emm) {
+        // do emm reg is needed
+        for(int i=0; i<8; ++i) {
+            sprintf(tmp, "mm%d:%016llx", i, emu->mmx[i].q);
+            strcat(buff, tmp);
+            if ((i&3)==3) strcat(buff, "\n"); else strcat(buff, " ");
+        }
+    }
+    if(trace_xmm) {
+        // do xmm reg is needed
+        for(int i=0; i<8; ++i) {
+            sprintf(tmp, "%d:%016llx%016llx", i, emu->xmm[i].q[1], emu->xmm[i].q[0]);
+            strcat(buff, tmp);
+            if ((i&3)==3) strcat(buff, "\n"); else strcat(buff, " ");
+        }
+    }
+    // start with FPU regs...
+    if(emu->fpu_stack) {
+        for (int i=0; i<emu->fpu_stack; i++) {
+            sprintf(tmp, "ST%d=%f", i, emu->fpu[(emu->top+i)&7].d);
+            strcat(buff, tmp);
+            int c = 10-strlen(tmp);
+            if(c<1) c=1;
+            while(c--) strcat(buff, " ");
+            if(i==3) strcat(buff, "\n");
+        }
+        strcat(buff, "\n");
+    }
+    for (int i=_AX; i<=_R15; ++i) {
+        sprintf(tmp, "%s=%016llx ", regname[i], emu->regs[i].q[0]);
+        strcat(buff, tmp);
+
+        if (i==3) {
+            if(emu->df) {
+                strcat(buff, "FLAGS=??????\n");
+            } else {
+#define FLAG_CHAR(f) (ACCESS_FLAG(F_##f##F)) ? #f : "-"
+                sprintf(tmp, "FLAGS=%s%s%s%s%s%s\n", FLAG_CHAR(O), FLAG_CHAR(C), FLAG_CHAR(P), FLAG_CHAR(A), FLAG_CHAR(Z), FLAG_CHAR(S));
+                strcat(buff, tmp);
+#undef FLAG_CHAR
+            }
+        }
+    }
+    sprintf(tmp, "RIP=%016llx ", ip);
+    strcat(buff, tmp);
+    return buff;
+}
+
+void StopEmu(x64emu_t* emu, const char* reason)
+{
+    emu->quit = 1;
+    printf_log(LOG_NONE, "%s", reason);
+    // dump stuff...
+    printf_log(LOG_NONE, "CPU Regs=%s\n", DumpCPURegs(emu, R_RIP));
+    // TODO: stack, memory/instruction around EIP, etc..
+}
+
+void UnimpOpcode(x64emu_t* emu)
+{
+    R_RIP = emu->old_ip;
+
+    int tid = syscall(SYS_gettid);
+    printf_log(LOG_NONE, "%04d|%p: Unimplemented Opcode (%02X) %02X %02X %02X %02X %02X %02X %02X %02X\n", 
+        tid, (void*)emu->old_ip, Peek(emu, -1),
+        Peek(emu, 0), Peek(emu, 1), Peek(emu, 2), Peek(emu, 3),
+        Peek(emu, 4), Peek(emu, 5), Peek(emu, 6), Peek(emu, 7));
+    emu->quit=1;
+    emu->error |= ERR_UNIMPL;
+}
+
+//void EmuCall(x64emu_t* emu, uintptr_t addr)
+//{
+//    uint64_t old_rsp = R_RSP;
+//    uint64_t old_rbx = R_RBX;
+//    uint64_t old_rdi = R_RDI;
+//    uint64_t old_rsi = R_RSI;
+//    uint64_t old_rbp = R_RBP;
+//    uint64_t old_rip = R_RIP;
+//    PushExit(emu);
+//    R_RIP = addr;
+//    emu->df = d_none;
+//    Run(emu, 0);
+//    emu->quit = 0;  // reset Quit flags...
+//    emu->df = d_none;
+//    if(emu->quitonlongjmp && emu->longjmp) {
+//        emu->longjmp = 0;   // don't change anything because of the longjmp
+//    } else {
+//        R_RBX = old_ebx;
+//        R_RDI = old_edi;
+//        R_RSI = old_esi;
+//        R_RBP = old_ebp;
+//        R_RSP = old_esp;
+//        R_RIP = old_eip;  // and set back instruction pointer
+//    }
+//}
+
+uint64_t ReadTSC(x64emu_t* emu)
+{
+    //TODO: implement hardware counter read?
+    // Read the TimeStamp Counter as 64bits.
+    // this is supposed to be the number of instrunctions executed since last reset
+// fall back to gettime...
+#ifndef NOGETCLOCK
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+  return (uint64_t)(ts.tv_sec) * 1000000000LL + ts.tv_nsec;
+#else
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return (uint64_t)(tv.tv_sec) * 1000000 + tv.tv_usec;
+#endif
+}
+
+void ResetSegmentsCache(x64emu_t *emu)
+{
+    if(!emu)
+        return;
+    memset(emu->segs_serial, 0, sizeof(emu->segs_serial));
+}
\ No newline at end of file
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
new file mode 100755
index 00000000..9b007014
--- /dev/null
+++ b/src/emu/x64emu_private.h
@@ -0,0 +1,93 @@
+#ifndef __X86EMU_PRIVATE_H_
+#define __X86EMU_PRIVATE_H_
+
+#include "regs.h"
+
+typedef struct box64context_s box64context_t;
+//typedef struct i386_ucontext_s i386_ucontext_t;
+
+#define ERR_UNIMPL  1
+#define ERR_DIVBY0  2
+#define ERR_ILLEGAL 4
+
+#ifdef DYNAREC
+#define CSTACK      32
+#define CSTACKMASK  31
+#endif
+
+typedef struct forkpty_s {
+    void*    amaster;
+    void*   name;
+    void*   termp;
+    void*   winp;
+    void*   f;  // forkpty function
+} forkpty_t;
+
+typedef struct x64emu_s {
+    // cpu
+	reg64_t     regs[16];
+	x86flags_t  eflags;
+    reg64_t     ip;
+    uintptr_t   old_ip;
+    // fpu
+	fpu_reg_t   fpu[9];
+	uint16_t    cw,cw_mask_all;
+	x87flags_t  sw;
+	uint32_t    top;        // top is part of sw, but it's faster to have it separatly
+    int         fpu_stack;
+	fpu_round_t round;
+    fpu_ld_t    fpu_ld[9]; // for long double emulation / 80bits fld fst
+    fpu_ll_t    fpu_ll[9]; // for 64bits fild / fist sequence
+	fpu_p_reg_t p_regs[9];
+    // mmx
+    mmx_regs_t  mmx[8];
+    // sse
+    sse_regs_t  xmm[8];
+    uint32_t    mxcsr;
+    // defered flags
+    defered_flags_t df;
+    uint64_t    op1;
+    uint64_t    op2;
+    uint64_t    res;
+    uint32_t    *x86emu_parity_tab; // helper
+    #ifdef HAVE_TRACE
+    uintptr_t   prev2_ip, prev_ip;
+    #endif
+    // segments
+    uint32_t    segs[6];        // only 32bits value?
+    uintptr_t   segs_offs[6];   // computed offset associate with segment
+    uint64_t    segs_serial[6];  // are seg offset clean (not 0) or does they need to be re-computed (0)? For GS, serial need to be the same as context->sel_serial
+    // emu control
+    int         quit;
+    int         error;
+    int         fork;   // quit because need to fork
+    forkpty_t*  forkpty_info;
+    int         exit;
+    int         quitonlongjmp;  // quit if longjmp is called
+    int         longjmp;        // if quit because of longjmp
+    // parent context
+    box64context_t *context;
+    // cpu helpers
+    reg64_t     zero;
+    reg64_t     *sbiidx[8];
+    // scratch stack, used for alignement of double and 64bits ints on arm. 200 elements should be enough
+    uint64_t    scratch[200];
+    // local stack, do be deleted when emu is freed
+    void*       stack2free; // this is the stack to free (can be NULL)
+    void*       init_stack; // initial stack (owned or not)
+    uint32_t    size_stack; // stack size (owned or not)
+
+    //i386_ucontext_t *uc_link; // to handle setcontext
+
+    int         type;       // EMUTYPE_xxx define
+
+} x64emu_t;
+
+#define EMUTYPE_NONE    0
+#define EMUTYPE_MAIN    1
+#define EMUTYPE_SIGNAL  2
+
+//#define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0; emu->quit=1;}
+#define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0;} // should rise a SIGFPE and not quit
+
+#endif //__X86EMU_PRIVATE_H_
\ No newline at end of file
diff --git a/src/emu/x64int3.c b/src/emu/x64int3.c
new file mode 100755
index 00000000..4d693c5a
--- /dev/null
+++ b/src/emu/x64int3.c
@@ -0,0 +1,312 @@
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <pthread.h>
+#include <signal.h>
+
+#include "debug.h"
+#include "box64stack.h"
+#include "x64emu.h"
+//#include "x64run.h"
+#include "x64emu_private.h"
+#include "x64run_private.h"
+#include "x87emu_private.h"
+//#include "x64primop.h"
+//#include "x64trace.h"
+//#include "wrapper.h"
+#include "box64context.h"
+//#include "librarian.h"
+
+#include <elf.h>
+#include "elfloader.h"
+#include "elfs/elfloader_private.h"
+
+typedef int32_t (*iFpppp_t)(void*, void*, void*, void*);
+
+//x64emu_t* x64emu_fork(x64emu_t* emu, int forktype)
+//{
+//    // execute atforks prepare functions, in reverse order
+//    for (int i=my_context->atfork_sz-1; i>=0; --i)
+//        if(my_context->atforks[i].prepare)
+//            EmuCall(emu, my_context->atforks[i].prepare);
+//    int type = emu->type;
+//    int v;
+//    if(forktype==2) {
+//        iFpppp_t forkpty = (iFpppp_t)emu->forkpty_info->f;
+//        v = forkpty(emu->forkpty_info->amaster, emu->forkpty_info->name, emu->forkpty_info->termp, emu->forkpty_info->winp);
+//        emu->forkpty_info = NULL;
+//    } else
+//        v = fork();
+//    if(type == EMUTYPE_MAIN)
+//        thread_set_emu(emu);
+//    if(v==EAGAIN || v==ENOMEM) {
+//        // error...
+//    } else if(v!=0) {  
+//        // execute atforks parent functions
+//        for (int i=0; i<my_context->atfork_sz; --i)
+//            if(my_context->atforks[i].parent)
+//                EmuCall(emu, my_context->atforks[i].parent);
+//
+//    } else if(v==0) {
+//        // execute atforks child functions
+//        for (int i=0; i<my_context->atfork_sz; --i)
+//            if(my_context->atforks[i].child)
+//                EmuCall(emu, my_context->atforks[i].child);
+//    }
+//    R_EAX = v;
+//    return emu;
+//}
+
+extern int errno;
+//void x64Int3(x64emu_t* emu)
+//{
+//    if(Peek(emu, 0)=='S' && Peek(emu, 1)=='C') // Signature for "Out of x86 door"
+//    {
+//        R_EIP += 2;
+//        #ifdef RK3399
+//        volatile    // to avoid addr to be put in an VFPU register
+//        #endif
+//        uint32_t addr = Fetch32(emu);
+//        if(addr==0) {
+//            //printf_log(LOG_INFO, "%p:Exit x86 emu (emu=%p)\n", *(void**)(R_ESP), emu);
+//            emu->quit=1; // normal quit
+//        } else {
+//            RESET_FLAGS(emu);
+//            wrapper_t w = (wrapper_t)addr;
+//            addr = Fetch32(emu);
+//            /* This party can be used to trace only 1 specific lib (but it is quite slow)
+//            elfheader_t *h = FindElfAddress(my_context, *(uintptr_t*)(R_ESP));
+//            int have_trace = 0;
+//            if(h && strstr(ElfName(h), "libMiles")) have_trace = 1;*/
+//            if(box86_log>=LOG_DEBUG /*|| have_trace*/) {
+//                pthread_mutex_lock(&emu->context->mutex_trace);
+//                int tid = GetTID();
+//                char buff[256] = "\0";
+//                char buff2[64] = "\0";
+//                char buff3[64] = "\0";
+//                char *tmp;
+//                int post = 0;
+//                int perr = 0;
+//                uint32_t *pu32 = NULL;
+//                const char *s = NULL;
+//                {
+//                    Dl_info info;
+//                    if(dladdr((void*)addr, &info))
+//                        s = info.dli_sname;
+//                }
+//                if(!s) s = GetNativeName((void*)addr);
+//                if(addr==(uintptr_t)PltResolver) {
+//                    snprintf(buff, 256, "%s", " ... ");
+//                } else
+//                if(strstr(s, "SDL_RWFromFile")==s || strstr(s, "SDL_RWFromFile")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%s, %s)", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else  if(strstr(s, "glColor4f")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%f, %f, %f, %f)", tid, *(void**)(R_ESP), s, *(float*)(R_ESP+4), *(float*)(R_ESP+8), *(float*)(R_ESP+12), *(float*)(R_ESP+16));
+//                } else  if(strstr(s, "glTexCoord2f")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%f, %f)", tid, *(void**)(R_ESP), s, *(float*)(R_ESP+4), *(float*)(R_ESP+8));
+//                } else  if(strstr(s, "glVertex2f")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%f, %f)", tid, *(void**)(R_ESP), s, *(float*)(R_ESP+4), *(float*)(R_ESP+8));
+//                } else  if(strstr(s, "glVertex3f")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%f, %f, %f)", tid, *(void**)(R_ESP), s, *(float*)(R_ESP+4), *(float*)(R_ESP+8), *(float*)(R_ESP+12));
+//                } else  if(strstr(s, "__open64")==s || strcmp(s, "open64")==0) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", %d, %d)", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(int*)(R_ESP+8), *(int*)(R_ESP+12));
+//                    perr = 1;
+//                } else  if(!strcmp(s, "opendir")) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4));
+//                    perr = 1;
+//                } else  if(strstr(s, "__open")==s || strcmp(s, "open")==0) {
+//                    tmp = *(char**)(R_ESP+4);
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", %d (,%d))", tid, *(void**)(R_ESP), s, (tmp)?tmp:"(nil)", *(int*)(R_ESP+8), *(int*)(R_ESP+12));
+//                    perr = 1;
+//                } else  if(strcmp(s, "mkdir")==0) {
+//                    tmp = *(char**)(R_ESP+4);
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", %d)", tid, *(void**)(R_ESP), s, (tmp)?tmp:"(nil)", *(int*)(R_ESP+8));
+//                    perr = 1;
+//                } else  if(!strcmp(s, "fopen")) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\")", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+8));
+//                    perr = 1;
+//                } else  if(!strcmp(s, "freopen")) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\", %p)", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+8), *(void**)(R_ESP+12));
+//                    perr = 1;
+//                } else  if(!strcmp(s, "fopen64")) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\")", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+8));
+//                    perr = 2;
+//                } else  if(!strcmp(s, "chdir")) {
+//                    pu32=*(uint32_t**)(R_ESP+4);
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_ESP), s, pu32?((pu32==(uint32_t*)1)?"/1/":(char*)pu32):"/0/");
+//                } else  if(strstr(s, "getenv")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4));
+//                    post = 2;
+//                } else  if(strstr(s, "pread")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p, %u, %d)", tid, *(void**)(R_ESP), s, *(int32_t*)(R_ESP+4), *(void**)(R_ESP+8), *(uint32_t*)(R_ESP+12), *(int32_t*)(R_ESP+16));
+//                    perr = 1;
+//                } else  if(strstr(s, "ioctl")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, 0x%x, %p)", tid, *(void**)(R_ESP), s, *(int32_t*)(R_ESP+4), *(int32_t*)(R_ESP+8), *(void**)(R_ESP+12));
+//                    perr = 1;
+//                } else  if(strstr(s, "statvfs64")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p(\"%s\"), %p)", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4), *(char**)(R_ESP+4), *(void**)(R_ESP+8));
+//                } else  if(strstr(s, "index")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p(\"%s\"), %i(%c))", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+4), *(int32_t*)(R_ESP+8), *(int32_t*)(R_ESP+8));
+//                } else  if(strstr(s, "rindex")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p(\"%s\"), %i(%c))", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+4), *(int32_t*)(R_ESP+8), *(int32_t*)(R_ESP+8));
+//                } else  if(strstr(s, "my___xstat64")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p(\"%s\"), %p)", tid, *(void**)(R_ESP), s, *(int32_t*)(R_ESP+4), *(char**)(R_ESP+8), *(char**)(R_ESP+8), *(void**)(R_ESP+12));
+//                    perr = 1;
+//                } else  if(strcmp(s, "my___xstat")==0) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p(\"%s\"), %p)", tid, *(void**)(R_ESP), s, *(int32_t*)(R_ESP+4), *(char**)(R_ESP+8), *(char**)(R_ESP+8), *(void**)(R_ESP+12));
+//                    perr = 1;
+//                } else  if(strstr(s, "my___lxstat64")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p(\"%s\"), %p)", tid, *(void**)(R_ESP), s, *(int32_t*)(R_ESP+4), *(char**)(R_ESP+8), *(char**)(R_ESP+8), *(void**)(R_ESP+12));
+//                    perr = 1;
+//                } else  if(strstr(s, "sem_timedwait")==s) {
+//                    pu32 = *(uint32_t**)(R_ESP+8);
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p, %p[%d sec %d ns])", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4), *(void**)(R_ESP+8), pu32?pu32[0]:-1, pu32?pu32[1]:-1);
+//                    perr = 1;
+//                } else  if(strstr(s, "waitpid")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p, 0x%x)", tid, *(void**)(R_ESP), s, *(int32_t*)(R_ESP+4), *(void**)(R_ESP+8), *(uint32_t*)(R_ESP+12));
+//                    perr = 1;
+//                } else  if(strstr(s, "clock_gettime")==s || strstr(s, "__clock_gettime")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p)", tid, *(void**)(R_ESP), s, *(uint32_t*)(R_ESP+4), *(void**)(R_ESP+8));
+//                    post = 1;
+//                    pu32 = *(uint32_t**)(R_ESP+8);
+//                } else  if(strstr(s, "semop")==s) {
+//                    int16_t* p16 = *(int16_t**)(R_ESP+8);
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p[%u/%d/%d], %d)", tid, *(void**)(R_ESP), s, *(int*)(R_ESP+4), p16, p16[0], p16[1], p16[2], *(int*)(R_ESP+12));
+//                    perr = 1;
+//                } else  if(!strcmp(s, "my_mmap64")) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p, 0x%x, %d, 0x%x, %d, %lld)", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4), *(size_t*)(R_ESP+8), *(int*)(R_ESP+12), *(int*)(R_ESP+16), *(int*)(R_ESP+20), *(int64_t*)(R_ESP+24));
+//                    perr = 1;
+//                } else  if(!strcmp(s, "my_mmap")) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p, 0x%x, %d, 0x%x, %d, %d)", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4), *(size_t*)(R_ESP+8), *(int*)(R_ESP+12), *(int*)(R_ESP+16), *(int*)(R_ESP+20), *(int*)(R_ESP+24));
+//                    perr = 1;
+//                } else  if(strstr(s, "strcasecmp")==s || strstr(s, "__strcasecmp")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\")", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else  if(strstr(s, "gtk_signal_connect_full")) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p, \"%s\", %p, %p, %p, %p, %d, %d)", tid, *(void**)(R_ESP), "gtk_signal_connect_full", *(void**)(R_ESP+4), *(char**)(R_ESP+8), *(void**)(R_ESP+12), *(void**)(R_ESP+16), *(void**)(R_ESP+20), *(void**)(R_ESP+24), *(int32_t*)(R_ESP+28), *(int32_t*)(R_ESP+32));
+//                } else  if(strstr(s, "strcmp")==s || strstr(s, "__strcmp")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\")", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else  if(strstr(s, "strstr")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%.127s\", \"%.127s\")", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else  if(strstr(s, "strlen")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p(\"%s\"))", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), ((R_ESP+4))?(*(char**)(R_ESP+4)):"nil");
+//                } else  if(strstr(s, "my_vsnprintf")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%08X, %u, %08X...)", tid, *(void**)(R_ESP), s, *(uint32_t*)(R_ESP+4), *(uint32_t*)(R_ESP+8), *(uint32_t*)(R_ESP+12));
+//                    pu32 = *(uint32_t**)(R_ESP+4);
+//                    post = 3;
+//                } else  if(strstr(s, "my_vsprintf")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%08X, \"%s\"...)", tid, *(void**)(R_ESP), s, *(uint32_t*)(R_ESP+4), *(char**)(R_ESP+8));
+//                    pu32 = *(uint32_t**)(R_ESP+4);
+//                    post = 3;
+//                } else  if(strstr(s, "my_snprintf")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%08X, %u, %08X...)", tid, *(void**)(R_ESP), s, *(uint32_t*)(R_ESP+4), *(uint32_t*)(R_ESP+8), *(uint32_t*)(R_ESP+12));
+//                    pu32 = *(uint32_t**)(R_ESP+4);
+//                    post = 3;
+//                } else  if(strstr(s, "my_sprintf")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%08X, %08X...)", tid, *(void**)(R_ESP), s, *(uint32_t*)(R_ESP+4), *(uint32_t*)(R_ESP+8));
+//                    pu32 = *(uint32_t**)(R_ESP+4);
+//                    post = 3;
+//                } else  if(strstr(s, "my_printf")==s) {
+//                    pu32 = *(uint32_t**)(R_ESP+4);
+//                    if(((uintptr_t)pu32)<0x5) // probably a _chk function
+//                        pu32 = *(uint32_t**)(R_ESP+8);
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\"...)", tid, *(void**)(R_ESP), s, pu32?((char*)(pu32)):"nil");
+//                } else  if(strstr(s, "puts")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\"...)", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4));
+//                } else  if(strstr(s, "fputs")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", %p...)", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(void**)(R_ESP+8));
+//                } else  if(strstr(s, "my_fprintf")==s) {
+//                    pu32 = *(uint32_t**)(R_ESP+8);
+//                    if(((uintptr_t)pu32)<0x5) // probably a __fprint_chk
+//                        pu32 = *(uint32_t**)(R_ESP+12);
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%08X, \"%s\", ...)", tid, *(void**)(R_ESP), s, *(uint32_t*)(R_ESP+4), pu32?((char*)(pu32)):"nil");
+//                } else  if(strstr(s, "my_vfprintf")==s) {
+//                    pu32 = *(uint32_t**)(R_ESP+8);
+//                    if(((uintptr_t)pu32)<0x5) // probably a _chk function
+//                        pu32 = *(uint32_t**)(R_ESP+12);
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%08X, \"%s\", ...)", tid, *(void**)(R_ESP), s, *(uint32_t*)(R_ESP+4), pu32?((char*)(pu32)):"nil");
+//                } else  if(strstr(s, "vkGetInstanceProcAddr")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p, \"%s\")", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else  if(strstr(s, "vkGetDeviceProcAddr")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p, \"%s\")", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else  if(strstr(s, "my_glXGetProcAddress")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4));
+//                } else  if(strstr(s, "my_sscanf")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\", ...)", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else  if(!strcmp(s, "vsscanf")) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\", ...)", tid, *(void**)(R_ESP), s, *(char**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else if(strstr(s, "XCreateWindow")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p, %p, %d, %d, %u, %u, %u, %d, %u, %p, %u, %p)", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4), *(void**)(R_ESP+8), *(int*)(R_ESP+12), *(int*)(R_ESP+16), *(uint32_t*)(R_ESP+20), *(uint32_t*)(R_ESP+24), *(uint32_t*)(R_ESP+28), *(int32_t*)(R_ESP+32), *(uint32_t*)(R_ESP+36), *(void**)(R_ESP+40), *(uint32_t*)(R_ESP+44), *(void**)(R_ESP+48));
+//                } else if(strstr(s, "XLoadQueryFont")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p, \"%s\")", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else if(strstr(s, "pthread_mutex_lock")==s) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p)", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4));
+//                } else if(!strcmp(s, "fmodf")) {
+//                    post = 4;
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%f, %f)", tid, *(void**)(R_ESP), s, *(float*)(R_ESP+4), *(float*)(R_ESP+8));
+//                } else if(!strcmp(s, "fmod")) {
+//                    post = 4;
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%f, %f)", tid, *(void**)(R_ESP), s, *(double*)(R_ESP+4), *(double*)(R_ESP+12));
+//                } else if(strstr(s, "SDL_GetWindowSurface")==s) {
+//                    post = 5;
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p)", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4));
+//                } else if(strstr(s, "udev_monitor_new_from_netlink")==s) {
+//                    post = 5;
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%p, \"%s\")", tid, *(void**)(R_ESP), s, *(void**)(R_ESP+4), *(char**)(R_ESP+8));
+//                } else  if(!strcmp(s, "my_syscall")) {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p, %p, %p...)", tid, *(void**)(R_ESP), s, *(int32_t*)(R_ESP+4), *(void**)(R_ESP+8), *(void**)(R_ESP+12), *(void**)(R_ESP+16));
+//                    perr = 1;
+//                } else {
+//                    snprintf(buff, 255, "%04d|%p: Calling %s (%08X, %08X, %08X...)", tid, *(void**)(R_ESP), s, *(uint32_t*)(R_ESP+4), *(uint32_t*)(R_ESP+8), *(uint32_t*)(R_ESP+12));
+//                }
+//                printf_log(LOG_NONE, "%s =>", buff);
+//                pthread_mutex_unlock(&emu->context->mutex_trace);
+//                w(emu, addr);   // some function never come back, so unlock the mutex first!
+//                pthread_mutex_lock(&emu->context->mutex_trace);
+//                if(post)
+//                    switch(post) {
+//                    case 1: snprintf(buff2, 63, " [%d sec %d nsec]", pu32?pu32[0]:-1, pu32?pu32[1]:-1);
+//                            break;
+//                    case 2: snprintf(buff2, 63, "(%s)", R_EAX?((char*)R_EAX):"nil");
+//                            break;
+//                    case 3: snprintf(buff2, 63, "(%s)", pu32?((char*)pu32):"nil");
+//                            break;
+//                    case 4: snprintf(buff2, 63, " (%f)", ST0.d);
+//                            break;
+//                    case 5: {
+//                            uint32_t* p = (uint32_t*)R_EAX;
+//                            if(p)
+//                                snprintf(buff2, 63, " size=%dx%d, pitch=%d, pixels=%p", p[2], p[3], p[4], p+5);
+//                            else
+//                                snprintf(buff2, 63, "NULL Surface");
+//                            }
+//                            break;
+//                }
+//                if(perr==1 && ((int)R_EAX)<0)
+//                    snprintf(buff3, 63, " (errno=%d:\"%s\")", errno, strerror(errno));
+//                else if(perr==2 && R_EAX==0)
+//                    snprintf(buff3, 63, " (errno=%d:\"%s\")", errno, strerror(errno));
+//                printf_log(LOG_NONE, " return 0x%08X%s%s\n", R_EAX, buff2, buff3);
+//                pthread_mutex_unlock(&emu->context->mutex_trace);
+//            } else
+//                w(emu, addr);
+//        }
+//        return;
+//    }
+//    if(my_context->signals[SIGTRAP])
+//        raise(SIGTRAP);
+//    else
+//        printf_log(LOG_INFO, "%04d|Warning, ignoring unsupported Int 3 call @%p\n", GetTID(), (void*)R_EIP);
+//    //emu->quit = 1;
+//}
+
+int GetTID()
+{
+    return syscall(SYS_gettid);
+}
\ No newline at end of file
diff --git a/src/emu/x64run_private.h b/src/emu/x64run_private.h
new file mode 100755
index 00000000..21c7f722
--- /dev/null
+++ b/src/emu/x64run_private.h
@@ -0,0 +1,236 @@
+#ifndef __X86RUN_PRIVATE_H_
+#define __X86RUN_PRIVATE_H_
+
+#include <stdint.h>
+#include "regs.h"
+#include "x64emu_private.h"
+typedef struct x64emu_s x64emu_t;
+
+static inline uint8_t Fetch8(x64emu_t *emu) {return *(uint8_t*)(R_RIP++);}
+static inline int8_t Fetch8s(x64emu_t *emu) {return *(int8_t*)(R_RIP++);}
+static inline uint16_t Fetch16(x64emu_t *emu)
+{
+    uint16_t val = *(uint16_t*)R_RIP;
+    R_RIP+=2;
+    return val;
+}
+static inline int16_t Fetch16s(x64emu_t *emu)
+{
+    int16_t val = *(int16_t*)R_RIP;
+    R_RIP+=2;
+    return val;
+}
+static inline uint32_t Fetch32(x64emu_t *emu)
+{
+    uint32_t val = *(uint32_t*)R_RIP;
+    R_RIP+=4;
+    return val;
+}
+static inline int32_t Fetch32s(x64emu_t *emu)
+{
+    int32_t val = *(int32_t*)R_RIP;
+    R_RIP+=4;
+    return val;
+}
+static inline uint64_t Fetch64(x64emu_t *emu)
+{
+    uint64_t val = *(uint64_t*)R_RIP;
+    R_RIP+=8;
+    return val;
+}
+static inline int64_t Fetch64s(x64emu_t *emu)
+{
+    int64_t val = *(int64_t*)R_RIP;
+    R_RIP+=8;
+    return val;
+}
+static inline uint8_t Peek(x64emu_t *emu, int offset){return *(uint8_t*)(R_RIP + offset);}
+
+static inline uint64_t Pop(x64emu_t *emu)
+{
+    uint64_t* st = ((uint64_t*)(R_RSP));
+    R_RSP += 8;
+    return *st;
+}
+
+static inline void Push(x64emu_t *emu, uint64_t v)
+{
+    R_RSP -= 8;
+    *((uint64_t*)R_RSP) = v;
+}
+
+
+// the op code definition can be found here: http://ref.x86asm.net/geek32.html
+
+//static inline reg32_t* GetECommon(x64emu_t* emu, uint32_t m)
+//{
+//    if (m<=7) {
+//        if(m==0x4) {
+//            uint8_t sib = Fetch8(emu);
+//            uintptr_t base = ((sib&0x7)==5)?Fetch32(emu):(emu->regs[(sib&0x7)].dword[0]); // base
+//            base += (emu->sbiidx[(sib>>3)&7]->sdword[0] << (sib>>6));
+//            return (reg32_t*)base;
+//        } else if (m==0x5) { //disp32
+//            return (reg32_t*)Fetch32(emu);
+//        }
+//        return (reg32_t*)(emu->regs[m].dword[0]);
+//    } else {
+//        uintptr_t base;
+//        if((m&7)==4) {
+//            uint8_t sib = Fetch8(emu);
+//            base = emu->regs[(sib&0x7)].dword[0]; // base
+//            base += (emu->sbiidx[(sib>>3)&7]->sdword[0] << (sib>>6));
+//        } else {
+//            base = emu->regs[(m&0x7)].dword[0];
+//        }
+//        base+=(m&0x80)?Fetch32s(emu):Fetch8s(emu);
+//        return (reg32_t*)base;
+//    }
+//}
+
+//static inline reg32_t* GetEb(x64emu_t *emu, uint32_t v)
+//{
+//    uint32_t m = v&0xC7;    // filter Eb
+//    if(m>=0xC0) {
+//        int lowhigh = (m&4)>>2;
+//         return (reg32_t *)(((char*)(&emu->regs[(m&0x03)]))+lowhigh);  //?
+//    } else return GetECommon(emu, m);
+//}
+
+//static inline reg32_t* GetEd(x64emu_t *emu, uint32_t v)
+//{
+//    uint32_t m = v&0xC7;    // filter Ed
+//    if(m>=0xC0) {
+//         return &emu->regs[(m&0x07)];
+//    } else return GetECommon(emu, m);
+//}
+
+#define GetEw GetEd
+
+//static inline reg32_t* GetEw16(x64emu_t *emu, uint32_t v)
+//{
+//    uint32_t m = v&0xC7;    // filter Ed
+//    if(m>=0xC0) {
+//         return &emu->regs[(m&0x07)];
+//    } else {
+//        uint32_t base = 0;
+//        switch(m&7) {
+//            case 0: base = R_BX+R_SI; break;
+//            case 1: base = R_BX+R_DI; break;
+//            case 2: base = R_BP+R_SI; break;
+//            case 3: base = R_BP+R_DI; break;
+//            case 4: base =      R_SI; break;
+//            case 5: base =      R_DI; break;
+//            case 6: base =      R_BP; break;
+//            case 7: base =      R_BX; break;
+//        }
+//        switch((m>>6)&3) {
+//            case 0: if(m==6) base = Fetch16(emu); break;
+//            case 1: base += Fetch8s(emu); break;
+//            case 2: base += Fetch16s(emu); break;
+//            // case 3 is C0..C7, already dealt with
+//        }
+//        return (reg32_t*)base;
+//    }
+//}
+
+//static inline reg32_t* GetEw16off(x64emu_t *emu, uint32_t v, uintptr_t offset)
+//{
+//    uint32_t m = v&0xC7;    // filter Ed
+//    if(m>=0xC0) {
+//         return &emu->regs[(m&0x07)];
+//    } else {
+//        uint32_t base = 0;
+//        switch(m&7) {
+//            case 0: base = R_BX+R_SI; break;
+//            case 1: base = R_BX+R_DI; break;
+//            case 2: base = R_BP+R_SI; break;
+//            case 3: base = R_BP+R_DI; break;
+//            case 4: base =      R_SI; break;
+//            case 5: base =      R_DI; break;
+//            case 6: base =      R_BP; break;
+//            case 7: base =      R_BX; break;
+//        }
+//        switch((m>>6)&3) {
+//            case 0: if(m==6) base = Fetch16(emu); break;
+//            case 1: base += Fetch8s(emu); break;
+//            case 2: base += Fetch16s(emu); break;
+//            // case 3 is C0..C7, already dealt with
+//        }
+//        return (reg32_t*)(base+offset);
+//    }
+//}
+
+//static inline mmx_regs_t* GetEm(x64emu_t *emu, uint32_t v)
+//{
+//    uint32_t m = v&0xC7;    // filter Ed
+//    if(m>=0xC0) {
+//         return &emu->mmx[m&0x07];
+//    } else return (mmx_regs_t*)GetECommon(emu, m);
+//}
+
+//static inline sse_regs_t* GetEx(x64emu_t *emu, uint32_t v)
+//{
+//    uint32_t m = v&0xC7;    // filter Ed
+//    if(m>=0xC0) {
+//         return &emu->xmm[m&0x07];
+//    } else return (sse_regs_t*)GetECommon(emu, m);
+//}
+
+
+//static inline reg32_t* GetG(x64emu_t *emu, uint32_t v)
+//{
+//    return &emu->regs[((v&0x38)>>3)];
+//}
+
+//static inline reg32_t* GetGb(x64emu_t *emu, uint32_t v)
+//{
+//    uint8_t m = (v&0x38)>>3;
+//    return (reg32_t*)&emu->regs[m&3].byte[m>>2];
+//}
+
+//static inline mmx_regs_t* GetGm(x64emu_t *emu, uint32_t v)
+//{
+//    uint8_t m = (v&0x38)>>3;
+//    return &emu->mmx[m&7];
+//}
+
+//static inline sse_regs_t* GetGx(x64emu_t *emu, uint32_t v)
+//{
+//    uint8_t m = (v&0x38)>>3;
+//    return &emu->xmm[m&7];
+//}
+
+//void UpdateFlags(x64emu_t *emu);
+
+//#define CHECK_FLAGS(emu) if(emu->df) UpdateFlags(emu)
+#define RESET_FLAGS(emu) emu->df = d_none
+
+//void Run67(x64emu_t *emu);
+//void Run0F(x64emu_t *emu);
+//void Run660F(x64emu_t *emu);
+//void Run66D9(x64emu_t *emu);    // x87
+//void Run6766(x64emu_t *emu);
+//void RunGS(x64emu_t *emu);
+//void RunFS(x64emu_t *emu);
+//void RunFS66(x64emu_t *emu, uintptr_t tlsdata);
+//void RunLock(x64emu_t *emu);
+//void RunLock66(x64emu_t *emu);
+
+//void x86Syscall(x64emu_t *emu);
+//void x86Int3(x64emu_t* emu);
+//x64emu_t* x64emu_fork(x64emu_t* e, int forktype);
+
+//uintptr_t GetSegmentBaseEmu(x64emu_t* emu, int seg);
+#define GetGSBaseEmu(emu)    GetSegmentBaseEmu(emu, _GS)
+#define GetFSBaseEmu(emu)    GetSegmentBaseEmu(emu, _FS)
+#define GetESBaseEmu(emu)    GetSegmentBaseEmu(emu, _ES)
+#define GetDSBaseEmu(emu)    GetSegmentBaseEmu(emu, _DS)
+
+//const char* GetNativeName(void* p);
+
+#ifdef HAVE_TRACE
+void PrintTrace(x64emu_t* emu, uintptr_t ip, int dynarec);
+#endif
+
+#endif //__X86RUN_PRIVATE_H_
\ No newline at end of file
diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c
new file mode 100755
index 00000000..3e0c0458
--- /dev/null
+++ b/src/emu/x87emu_private.c
@@ -0,0 +1,307 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "debug.h"
+#include "x64emu_private.h"
+#include "x87emu_private.h"
+//#include "x64run_private.h"
+
+void reset_fpu(x64emu_t* emu)
+{
+    memset(emu->fpu, 0, sizeof(emu->fpu));
+    memset(emu->fpu_ld, 0, sizeof(emu->fpu_ld));
+    emu->cw = 0x37F;
+    emu->sw.x16 = 0x0000;
+    emu->top = 0;
+    emu->fpu_stack = 0;
+    for(int i=0; i<9; ++i)
+        emu->p_regs[i].tag = 0b11;  // STx is empty
+}
+
+void fpu_fbst(x64emu_t* emu, uint8_t* d) {
+    // very aproximative... but should not be much used...
+    uint8_t p;
+    uint8_t sign = 0x00;
+    double tmp, v = ST0.d;
+    if(ST0.d<0.0) 
+    {
+        sign = 0x80;
+        v = -v;
+    }
+    for (int i=0; i<9; ++i) {
+        tmp = floor(v/10.0);
+        p = (v - 10.0*tmp);
+        v = tmp;
+        tmp = floor(v/10.0);
+        p |= ((uint8_t)(v - 10.0*tmp))<<4;
+        v = tmp;
+
+        *(d++)=p;
+    }
+    tmp = floor(v/10.0);
+    p = (v - 10.0*tmp);
+    p |= sign;
+    *(d++)=p;
+    // no flags....
+}
+
+void fpu_fbld(x64emu_t* emu, uint8_t* s) {
+    uint8_t p;
+    uint64_t tmp = 0;
+    uint64_t m = 1;
+    for (int i=0; i<9; ++i) {
+        p =*(s++);
+        tmp += m * (p&0x0f);
+        m *= 10;
+        tmp += m * ((p>>4)&0x0f);
+        m *= 10;
+    }
+    ST0.d = tmp;
+    p =*(s++);
+    ST0.d += m * (p&0x0f);
+    if(p&0x80)
+        ST0.d = -ST0.d;
+}
+
+
+#define FPU_t fpu_reg_t
+#define BIAS80 16383
+#define BIAS64 1023
+// long double (80bits) -> double (64bits)
+void LD2D(void* ld, void* d)
+{
+	FPU_t result;
+    #pragma pack(push, 1)
+	struct {
+		FPU_t f;
+		int16_t b;
+	} val;
+    #pragma pack(pop)
+    #if 1
+    memcpy(&val, ld, 10);
+    #else
+	val.f.l.lower = *(uint32_t*)ld;
+    val.f.l.upper = *(uint32_t*)(char*)(ld+4);
+	val.b  = *(int16_t*)((char*)ld+8);
+    #endif
+	int32_t exp64 = (((uint32_t)(val.b&0x7fff) - BIAS80) + BIAS64);
+	int32_t exp64final = exp64&0x7ff;
+    // do specific value first (0, infinite...)
+    // bit 63 is "integer part"
+    // bit 62 is sign
+    if((uint32_t)(val.b&0x7fff)==0x7fff) {
+        // infinity and nans
+        int t = 0; //nan
+        switch((val.f.l.upper>>30)) {
+            case 0: if((val.f.l.upper&(1<<29))==0) t = 1;
+                    break;
+            case 2: if((val.f.l.upper&(1<<29))==0) t = 1;
+                    break;
+        }
+        if(t) {    // infinite
+            result.d = HUGE_VAL;
+        } else {      // NaN
+            result.l.upper = 0x7ff << 20;
+            result.l.lower = 0;
+        }
+        if(val.b&0x8000)
+            result.l.upper |= 0x80000000;
+        *(uint64_t*)d = result.ll;
+        return;
+    }
+    if(((uint32_t)(val.b&0x7fff)==0) || (exp64<=0)) {
+        //if(val.f.ll==0)
+        // zero
+        //if(val.f.ll!=0)
+        // denormal, but that's to small value for double 
+        uint64_t r = 0;
+        if(val.b&0x8000)
+            r |= 0x8000000000000000LL;
+        *(uint64_t*)d = r;
+        return;
+    }
+
+    if(exp64>=0x7ff) {
+        // to big value...
+        result.d = HUGE_VAL;
+        if(val.b&0x8000)
+            result.l.upper |= 0x80000000;
+        *(uint64_t*)d = result.ll;
+        return;
+    }
+
+	uint64_t mant64 = (val.f.ll >> 11) & 0xfffffffffffffLL;
+	uint32_t sign = (val.b&0x8000)?1:0;
+    result.ll = mant64;
+	result.l.upper |= (sign <<31)|((exp64final&0x7ff) << 20);
+
+	*(uint64_t*)d = result.ll;
+}
+
+// double (64bits) -> long double (80bits)
+void D2LD(void* d, void* ld)
+{
+    #pragma pack(push, 1)
+	struct {
+		FPU_t f;
+		int16_t b;
+	} val;
+    #pragma pack(pop)
+    FPU_t s;
+    s.ll = *(uint64_t*)d;   // use memcpy to avoid risk of Bus Error?
+    // do special value first
+    if((s.ll&0x7fffffffffffffffLL)==0) {
+        // zero...
+        val.f.ll = 0;
+        if(s.l.upper&0x8000)
+            val.b = 0x8000;
+        else
+            val.b = 0;
+        memcpy(ld, &val, 10);
+        return;
+    }
+
+	int32_t sign80 = (s.l.upper&0x80000000)?1:0;
+	int32_t exp80 =  s.l.upper&0x7ff00000;
+	int32_t exp80final = (exp80>>20);
+	int64_t mant80 = s.ll&0x000fffffffffffffLL;
+	int64_t mant80final = (mant80 << 11);
+    if(exp80final==0x7ff) {
+        // NaN and Infinite
+        exp80final = 0x7fff;
+        if(mant80==0x0)
+            mant80final = 0x8000000000000000LL; //infinity
+        else
+            mant80final = 0xc000000000000000LL; //(quiet)NaN
+    } else {
+        if(exp80!=0){ 
+            mant80final |= 0x8000000000000000LL;
+            exp80final += (BIAS80 - BIAS64);
+        }
+    }
+	val.b = ((int16_t)(sign80)<<15)| (int16_t)(exp80final);
+	val.f.ll = mant80final;
+    memcpy(ld, &val, 10);
+    /*memcpy(ld, &f.ll, 8);
+    memcpy((char*)ld + 8, &val.b, 2);*/
+}
+
+double FromLD(void* ld)
+{
+    double ret;
+    LD2D(ld, &ret);
+    return ret;
+}
+
+void fpu_loadenv(x64emu_t* emu, char* p, int b16)
+{
+    emu->cw = *(uint16_t*)p;
+    p+=(b16)?2:4;
+    emu->sw.x16 = *(uint16_t*)p;
+    emu->top = emu->sw.f.F87_TOP;
+    p+=(b16)?2:4;
+    // tagword: 2bits*8
+    // tags... (only full = 0b11 / free = 0b00)
+    uint16_t tags = *(uint16_t*)p;
+    for(int i=0; i<8; ++i)
+        emu->p_regs[i].tag = (tags>>(i*2))&0b11;
+    // intruction pointer: 16bits
+    // data (operand) pointer: 16bits
+    // last opcode: 11bits save: 16bits restaured (1st and 2nd opcode only)
+}
+
+void fpu_savenv(x64emu_t* emu, char* p, int b16)
+{
+    emu->sw.f.F87_TOP = emu->top&7;
+    *(uint16_t*)p = emu->cw;
+    p+=2;
+    if(!b16) {*(uint16_t*)p = 0; p+=2;}
+    *(uint16_t*)p = emu->sw.x16;
+    p+=2;
+    if(!b16) {*(uint16_t*)p = 0; p+=2;}
+    // tagword: 2bits*8
+    // tags...
+    uint16_t tags = 0;
+    for (int i=0; i<8; ++i)
+        tags |= (emu->p_regs[i].tag)<<(i*2);
+    *(uint16_t*)p = tags;
+    // other stuff are not pushed....
+}
+
+typedef struct xsave_s {
+    uint16_t ControlWord;        /* 000 */
+    uint16_t StatusWord;         /* 002 */
+    uint8_t  TagWord;            /* 004 */
+    uint8_t  Reserved1;          /* 005 */
+    uint16_t ErrorOpcode;        /* 006 */
+    uint32_t ErrorOffset;        /* 008 */
+    uint16_t ErrorSelector;      /* 00c */
+    uint16_t Reserved2;          /* 00e */
+    uint32_t DataOffset;         /* 010 */
+    uint16_t DataSelector;       /* 014 */
+    uint16_t Reserved3;          /* 016 */
+    uint32_t MxCsr;              /* 018 */
+    uint32_t MxCsr_Mask;         /* 01c */
+    sse_regs_t FloatRegisters[8];/* 020 */  // fpu/mmx are store in 128bits here
+    sse_regs_t XmmRegisters[16]; /* 0a0 */
+    uint8_t  Reserved4[96];      /* 1a0 */
+} xsave_t;
+
+void fpu_fxsave(x64emu_t* emu, void* ed)
+{
+    xsave_t *p = (xsave_t*)ed;
+    // should save flags & all
+    emu->sw.f.F87_TOP = emu->top&7;
+    p->ControlWord = emu->cw;
+    p->StatusWord = emu->sw.x16;
+    uint8_t tags = 0;
+    for (int i=0; i<8; ++i)
+        tags |= ((emu->p_regs[i].tag)<<(i*2)==0b11)?0:1;
+    p->TagWord = tags;
+    p->ErrorOpcode = 0;
+    p->ErrorOffset = 0;
+    p->ErrorSelector = 0;
+    p->DataOffset = 0;
+    p->DataSelector = 0;
+    p->MxCsr = 0;
+    p->MxCsr_Mask = 0;
+    // copy MMX regs...
+    for(int i=0; i<8; ++i)
+        memcpy(&p->FloatRegisters[i].q[0], &emu->mmx[0], sizeof(emu->mmx[0]));
+    // copy SSE regs
+    memcpy(&p->XmmRegisters[0], &emu->xmm[0], sizeof(emu->xmm));
+    // put also FPU regs in a reserved area... on XMM 8-15
+    for(int i=0; i<8; ++i)
+        memcpy(&p->XmmRegisters[8+i].q[0], &emu->fpu[0], sizeof(emu->fpu[0]));
+    // put a magic sign in reserved area, box86 specific
+    ((unsigned int *)p->Reserved4)[11] = 0x50515253;
+}
+
+void fpu_fxrstor(x64emu_t* emu, void* ed)
+{
+    xsave_t *p = (xsave_t*)ed;
+    emu->cw = p->ControlWord;
+    emu->sw.x16 = p->StatusWord;
+    emu->top = emu->sw.f.F87_TOP;
+    uint8_t tags = p->TagWord;
+    for(int i=0; i<8; ++i)
+        emu->p_regs[i].tag = (tags>>(i*2))?0:0b11;
+    // copy back MMX regs...
+    for(int i=0; i<8; ++i)
+        memcpy(&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(emu->mmx[0]));
+    // copy SSE regs
+    memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm));
+    // check the box86 magic sign in reserved area
+    if(((unsigned int *)p->Reserved4)[11] == 0x50515253) {
+        // also FPU regs where a reserved area... on XMM 8-15?
+        for(int i=0; i<8; ++i)
+            memcpy(&emu->fpu[0], &p->XmmRegisters[8+i].q[0], sizeof(emu->fpu[0]));
+    } else {
+        // copy the mmx to fpu...
+        for(int i=0; i<8; ++i)
+            memcpy(&emu->fpu[0], &emu->mmx[i], sizeof(emu->mmx[0]));
+    }
+}
diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h
new file mode 100755
index 00000000..b8fcf9ad
--- /dev/null
+++ b/src/emu/x87emu_private.h
@@ -0,0 +1,201 @@
+#ifndef __X87RUN_PRIVATE_H_
+#define __X87RUN_PRIVATE_H_
+
+#include <stdint.h>
+#include <math.h>
+#include "regs.h"
+#include "x64run_private.h"
+#include "debug.h"
+typedef struct x64emu_s x64emu_t;
+
+#define PI		3.14159265358979323846
+#define L2E		1.4426950408889634
+#define L2T		3.3219280948873623
+#define LN2		0.69314718055994531
+#define LG2		0.3010299956639812
+
+//void Run66D9(x64emu_t *emu);
+//void Run66DD(x64emu_t *emu);
+//void RunDF(x64emu_t *emu);
+
+#define ST0 emu->fpu[emu->top]
+#define ST1 emu->fpu[(emu->top+1)&7]
+#define ST(a) emu->fpu[(emu->top+(a))&7]
+
+#define STld(a)  emu->fpu_ld[(emu->top+(a))&7]
+#define STll(a)  emu->fpu_ll[(emu->top+(a))&7]
+
+static inline void fpu_do_push(x64emu_t* emu)
+{
+    int newtop = (emu->top-1)&7;
+    /*if(emu->p_regs[newtop].tag!=0b11) {// not empty, overflow!
+        printf_log(LOG_NONE, "Warning: %p: FPU Stack overflow\n", (void*)emu->old_ip);    // probably better to raise something
+        //emu->quit = 1;
+        return;
+    }*/
+    if(emu->fpu_stack<8)
+        ++emu->fpu_stack; 
+    emu->p_regs[newtop].tag = 0;    // full
+    emu->top = newtop;
+}
+
+static inline void fpu_do_pop(x64emu_t* emu)
+{
+    int curtop = (emu->top)&7;
+    /*if(emu->p_regs[(emu->top)&7].tag==0b11) {// underflow
+        printf_log(LOG_NONE, "Warning: %p: FPU Stack underflow\n", (void*)emu->old_ip);    // probably better to raise something
+        //emu->quit = 1;
+        return;
+    }*/
+    if(emu->fpu_stack>0)
+        --emu->fpu_stack;
+    
+    emu->p_regs[curtop].tag = 0b11;    // empty
+    emu->top = (emu->top+1)&7;
+}
+
+static inline void fpu_do_free(x64emu_t* emu, int i)
+{
+    emu->p_regs[(emu->top+i)&7].tag = 0b11;    // empty
+    // check if all empty
+    for(int i=0; i<8; ++i)
+        if(emu->p_regs[i].tag != 0b11)
+            return;
+    emu->fpu_stack = 0;
+}
+
+void reset_fpu(x64emu_t* emu);
+
+static inline void fpu_fcom(x64emu_t* emu, double b)
+{
+    emu->sw.f.F87_C1 = 0;
+    if(isnan(ST0.d) || isnan(b)) {
+        emu->sw.f.F87_C0 = 1;
+        emu->sw.f.F87_C2 = 1;
+        emu->sw.f.F87_C3 = 1;
+    } else if (isgreater(ST0.d, b)) {
+        emu->sw.f.F87_C0 = 0;
+        emu->sw.f.F87_C2 = 0;
+        emu->sw.f.F87_C3 = 0;
+    } else if (isless(ST0.d, b)) {
+        emu->sw.f.F87_C0 = 1;
+        emu->sw.f.F87_C2 = 0;
+        emu->sw.f.F87_C3 = 0;
+    } 
+    else {
+        emu->sw.f.F87_C0 = 0;
+        emu->sw.f.F87_C2 = 0;
+        emu->sw.f.F87_C3 = 1;
+    }
+}
+
+static inline void fpu_fcomi(x64emu_t* emu, double b)
+{
+    RESET_FLAGS(emu);
+    CLEAR_FLAG(F_AF);
+    CLEAR_FLAG(F_OF);
+    CLEAR_FLAG(F_SF);
+    emu->sw.f.F87_C1 = 0;
+    if(isnan(ST0.d) || isnan(b)) {
+        SET_FLAG(F_CF);
+        SET_FLAG(F_PF);
+        SET_FLAG(F_ZF);
+    } else if (isgreater(ST0.d, b)) {
+        CLEAR_FLAG(F_CF);
+        CLEAR_FLAG(F_PF);
+        CLEAR_FLAG(F_ZF);
+    } else if (isless(ST0.d, b)) {
+        SET_FLAG(F_CF);
+        CLEAR_FLAG(F_PF);
+        CLEAR_FLAG(F_ZF);
+    } 
+    else {
+        CLEAR_FLAG(F_CF);
+        CLEAR_FLAG(F_PF);
+        SET_FLAG(F_ZF);
+    }
+}
+
+static inline double fpu_round(x64emu_t* emu, double d) {
+    if (!isfinite(d))
+        return d;
+    switch(emu->round) {
+        case ROUND_Nearest:
+            return nearbyint(d);
+        case ROUND_Down:
+            return floor(d);
+        case ROUND_Up:
+            return ceil(d);
+        case ROUND_Chop:
+        default:
+            return trunc(d);
+    }
+}
+
+static inline void fpu_fxam(x64emu_t* emu) {
+    emu->sw.f.F87_C1 = (ST0.l.upper&0x80000000)?1:0;
+    if(!emu->fpu_stack) {
+        emu->sw.f.F87_C3 = 1;
+        emu->sw.f.F87_C2 = 0;
+        emu->sw.f.F87_C0 = 1;
+        return;
+    }
+    if(isinf(ST0.d)) 
+    {  // TODO: Unsuported and denormal not analysed...
+        emu->sw.f.F87_C3 = 0;
+        emu->sw.f.F87_C2 = 1;
+        emu->sw.f.F87_C0 = 1;
+        return;
+    }
+    if(isnan(ST0.d))
+    {  // TODO: Unsuported and denormal not analysed...
+        emu->sw.f.F87_C3 = 0;
+        emu->sw.f.F87_C2 = 0;
+        emu->sw.f.F87_C0 = 1;
+        return;
+    }
+    if(ST0.d==0.0)
+    {
+        emu->sw.f.F87_C3 = 1;
+        emu->sw.f.F87_C2 = 0;
+        emu->sw.f.F87_C0 = 0;
+        return;
+    }
+    // normal...
+    emu->sw.f.F87_C3 = 0;
+    emu->sw.f.F87_C2 = 1;
+    emu->sw.f.F87_C0 = 0;
+
+}
+
+static inline void fpu_ftst(x64emu_t* emu) {
+    emu->sw.f.F87_C1 = 0;
+    if(isinf(ST0.d) || isnan(ST0.d)) 
+    {  // TODO: Unsuported and denormal not analysed...
+        emu->sw.f.F87_C3 = 1;
+        emu->sw.f.F87_C2 = 1;
+        emu->sw.f.F87_C0 = 1;
+        return;
+    }
+    if(ST0.d==0.0)
+    {
+        emu->sw.f.F87_C3 = 1;
+        emu->sw.f.F87_C2 = 0;
+        emu->sw.f.F87_C0 = 0;
+        return;
+    }
+    // normal...
+    emu->sw.f.F87_C3 = 0;
+    emu->sw.f.F87_C2 = 0;
+    emu->sw.f.F87_C0 = (ST0.l.upper&0x80000000)?1:0;
+}
+
+void fpu_fbst(x64emu_t* emu, uint8_t* d);
+void fpu_fbld(x64emu_t* emu, uint8_t* s);
+
+void fpu_loadenv(x64emu_t* emu, char* p, int b16);
+void fpu_savenv(x64emu_t* emu, char* p, int b16);
+void fpu_fxsave(x64emu_t* emu, void* ed);
+void fpu_fxrstor(x64emu_t* emu, void* ed);
+
+#endif //__X87RUN_PRIVATE_H_
diff --git a/src/include/box64context.h b/src/include/box64context.h
index 3ee5f927..8f5738f5 100755
--- a/src/include/box64context.h
+++ b/src/include/box64context.h
@@ -5,6 +5,7 @@
 #include "pathcoll.h"
 
 typedef struct elfheader_s elfheader_t;
+typedef struct cleanup_s cleanup_t;
 
 typedef void* (*procaddess_t)(const char* name);
 typedef void* (*vkprocaddess_t)(void* instance, const char* name);
@@ -55,6 +56,12 @@ typedef struct box64context_s {
     void*               tlsdata;    // the initial global tlsdata
     int32_t             tlssize;    // wanted size of tlsdata
 
+    uintptr_t           *auxval_start;
+
+    cleanup_t   *cleanups;          // atexit functions
+    int         clean_sz;
+    int         clean_cap;
+
     //zydis_dec_t         *dec;           // trace
 
     uint8_t             canary[4];
diff --git a/src/include/regs.h b/src/include/regs.h
new file mode 100755
index 00000000..98a6bada
--- /dev/null
+++ b/src/include/regs.h
@@ -0,0 +1,290 @@
+#ifndef __REGS_H_
+#define __REGS_H_
+
+enum {
+	_AX, _CX, _DX, _BX,
+	_SP, _BP, _SI, _DI,
+	_R8, _R9,_R10,_R11,
+   _R12,_R13,_R14,_R15
+};
+
+enum {
+    _CS, _DS, _SS, _ES, _FS, _GS
+};
+
+
+typedef union {
+	int64_t  sq[1];
+	uint64_t q[1];
+	int32_t  sdword[2];
+	uint32_t dword[2];
+	int16_t  sword[4];
+	uint16_t word[4];
+	int8_t   sbyte[8];
+	uint8_t  byte[8];
+} reg64_t;
+
+typedef struct {
+    uint32_t tag;
+} fpu_p_reg_t;
+
+typedef enum {
+	ROUND_Nearest = 0,		
+	ROUND_Down    = 1,
+	ROUND_Up      = 2,	
+	ROUND_Chop    = 3
+} fpu_round_t;
+
+typedef enum {
+	d_none = 0,
+	d_add8,
+	d_add16,
+	d_add32,
+	d_and8,
+	d_and16,
+	d_and32,
+	d_dec8,
+	d_dec16,
+	d_dec32,
+	d_inc8,		//10
+	d_inc16,
+	d_inc32,
+	d_imul8,
+	d_imul16,
+	d_imul32,
+	d_or8,
+	d_or16,
+	d_or32,
+	d_mul8,
+	d_mul16,	//20
+	d_mul32,
+	d_neg8,
+	d_neg16,
+	d_neg32,
+	d_shl8,
+	d_shl16,
+	d_shl32,
+	d_shr8,
+	d_shr16,
+	d_shr32,	//30
+	d_sar8,
+	d_sar16,
+	d_sar32,
+	d_sub8,
+	d_sub16, 
+	d_sub32,
+	d_xor8,
+	d_xor16,
+	d_xor32,
+	d_cmp8,		//40	// thoses are used only with Dynarec
+	d_cmp16,
+	d_cmp32,
+	d_tst8,
+	d_tst16,
+	d_tst32,
+	d_adc8,
+	d_adc16,
+	d_adc32,
+	d_sbb8,
+	d_sbb16, 
+	d_sbb32,
+	d_rol8,
+	d_rol16,
+	d_rol32,
+	d_ror8,
+	d_ror16,
+	d_ror32,
+	d_rcl8,
+	d_rcl16,
+	d_rcl32,
+	d_rcr8,
+	d_rcr16,
+	d_rcr32,
+	d_unknown	//46
+} defered_flags_t;
+
+#pragma pack(push, 1)
+
+typedef union {
+    double d;
+    struct {
+        uint32_t lower;
+        uint32_t upper;
+    } l;
+    struct {
+        float lower;
+        float upper;
+    } f;
+    int64_t ll;
+} fpu_reg_t;
+
+typedef union {
+	//long double ld;	// works only if 80bits!
+	struct {
+		uint64_t lower;
+		uint16_t upper;
+	} l;
+} longdouble_t;
+
+typedef struct {
+	#ifdef HAVE_LD80BITS
+	long double 	ld;
+	#else
+	longdouble_t 	ld;
+	#endif
+	uint64_t		ref;
+} fpu_ld_t;
+
+typedef struct {
+	int64_t			ll;
+	int64_t			ref;
+} fpu_ll_t;
+
+typedef union {
+    struct __attribute__ ((__packed__)) {
+        unsigned int _F_CF:1;
+		unsigned int _F_res1:1;
+        unsigned int _F_PF:1;
+		unsigned int _F_res2:1;
+        unsigned int _F_AF:1;
+		unsigned int _F_res3:1;
+        unsigned int _F_ZF:1;
+        unsigned int _F_SF:1;
+        unsigned int _F_TF:1;
+        unsigned int _F_IF:1;
+        unsigned int _F_DF:1;
+        unsigned int _F_OF:1;
+        unsigned int _F_IOPL:2;
+        unsigned int _F_NT:1;
+        unsigned int _F_dummy:1;
+        unsigned int _F_RF:1;
+        unsigned int _F_VM:1;
+        unsigned int _F_AC:1;
+        unsigned int _F_VIF:1; 
+        unsigned int _F_VIP:1;
+        unsigned int _F_ID:1;
+    } f;
+    uint32_t    x32;
+} x86flags_t;
+
+typedef enum {
+    F_CF = 0,
+	F_res1,
+    F_PF,
+	F_res2,
+    F_AF,
+	F_res3,
+    F_ZF,
+    F_SF,
+    F_TF,
+    F_IF,
+    F_DF,
+    F_OF,
+    F_IOPL, // double
+    F_NT = 14,
+    F_dummy,
+    F_RF,
+    F_VM,
+    F_AC,
+    F_VIF,
+    F_VIP,
+    F_ID,
+	F_LAST
+} flags_names_t;
+
+
+typedef union {
+    struct __attribute__ ((__packed__)) {
+        unsigned int F87_IE:1;
+        unsigned int F87_DE:1;
+        unsigned int F87_ZE:1;
+        unsigned int F87_OE:1;
+        unsigned int F87_UE:1;
+        unsigned int F87_PE:1;
+        unsigned int F87_SF:1;
+        unsigned int F87_ES:1;
+        unsigned int F87_C0:1;
+		unsigned int F87_C1:1;
+		unsigned int F87_C2:1;
+		unsigned int F87_TOP:3;
+		unsigned int F87_C3:1;
+		unsigned int F87_B:1;
+    } f;
+    uint16_t    x16;
+} x87flags_t;
+
+typedef union {
+	uint64_t	q;
+	int64_t		sq;
+	uint32_t	ud[2];
+	int32_t 	sd[2];
+	uint16_t 	uw[4];
+	int16_t 	sw[4];
+	uint8_t 	ub[8];
+	int8_t 		sb[8];
+} mmx_regs_t;
+
+typedef union {
+	uint64_t q[2];
+	int64_t sq[2];
+	double d[2];
+	float f[4];
+	uint32_t ud[4];
+	int32_t sd[4];
+	uint16_t uw[8];
+	int16_t sw[8];
+	uint8_t ub[16];
+	int8_t sb[16];
+} sse_regs_t;
+#pragma pack(pop)
+
+#define R_RIP emu->ip.q[0]
+#define R_RAX emu->regs[_AX].q[0]
+#define R_RBX emu->regs[_BX].q[0]
+#define R_RCX emu->regs[_CX].q[0]
+#define R_RDX emu->regs[_DX].q[0]
+#define R_RDI emu->regs[_DI].q[0]
+#define R_RSI emu->regs[_SI].q[0]
+#define R_RSP emu->regs[_SP].q[0]
+#define R_RBP emu->regs[_BP].q[0]
+#define R_R8 emu->regs[_R8].q[0]
+#define R_R9 emu->regs[_R9].q[0]
+#define R_R10 emu->regs[_R10].q[0]
+#define R_R11 emu->regs[_R11].q[0]
+#define R_R12 emu->regs[_R12].q[0]
+#define R_R13 emu->regs[_R13].q[0]
+#define R_R14 emu->regs[_R14].q[0]
+#define R_R15 emu->regs[_R15].q[0]
+#define R_EAX emu->regs[_AX].dword[0]
+#define R_EBX emu->regs[_BX].dword[0]
+#define R_ECX emu->regs[_CX].dword[0]
+#define R_EDX emu->regs[_DX].dword[0]
+#define R_EDI emu->regs[_DI].dword[0]
+#define R_ESI emu->regs[_SI].dword[0]
+#define R_ESP emu->regs[_SP].dword[0]
+#define R_EBP emu->regs[_BP].dword[0]
+#define R_AX emu->regs[_AX].word[0]
+#define R_BX emu->regs[_BX].word[0]
+#define R_CX emu->regs[_CX].word[0]
+#define R_DX emu->regs[_DX].word[0]
+#define R_DI emu->regs[_DI].word[0]
+#define R_SI emu->regs[_SI].word[0]
+#define R_SP emu->regs[_SP].word[0]
+#define R_BP emu->regs[_BP].word[0]
+#define R_AL emu->regs[_AX].byte[0]
+#define R_AH emu->regs[_AX].byte[1]
+#define R_CX emu->regs[_CX].word[0]
+#define R_CL emu->regs[_CX].byte[0]
+#define R_CS emu->segs[_CS]
+#define R_DS emu->segs[_DS]
+#define R_SS emu->segs[_SS]
+#define R_ES emu->segs[_ES]
+#define R_FS emu->segs[_FS]
+#define R_GS emu->segs[_GS]
+
+#define ACCESS_FLAG(F)  emu->eflags.f._##F
+#define SET_FLAG(F)     emu->eflags.f._##F = 1
+#define CLEAR_FLAG(F)   emu->eflags.f._##F = 0
+#define CONDITIONAL_SET_FLAG(COND, F)   emu->eflags.f._##F = (COND)?1:0
+
+#endif //__REGS_H_
diff --git a/src/include/x64emu.h b/src/include/x64emu.h
new file mode 100755
index 00000000..408fd3db
--- /dev/null
+++ b/src/include/x64emu.h
@@ -0,0 +1,63 @@
+#ifndef __X86EMU_H_
+#define __X86EMU_H_
+
+typedef struct x64emu_s x64emu_t;
+typedef struct box64context_s box64context_t;
+
+x64emu_t *NewX86Emu(box64context_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack);
+x64emu_t *NewX86EmuFromStack(x64emu_t* emu, box64context_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack);
+void SetupX86Emu(x64emu_t *emu);
+void FreeX86Emu(x64emu_t **x64emu);
+void FreeX86EmuFromStack(x64emu_t **emu);
+void CloneEmu(x64emu_t *newemu, const x64emu_t* emu);
+void SetTraceEmu(uintptr_t trace_start, uintptr_t trace_end);
+
+box64context_t* GetEmuContext(x64emu_t* emu);
+uint32_t* GetParityTab();
+
+uint32_t GetEAX(x64emu_t *emu);
+uint64_t GetRAX(x64emu_t *emu);
+void SetEAX(x64emu_t *emu, uint32_t v);
+void SetEBX(x64emu_t *emu, uint32_t v);
+void SetECX(x64emu_t *emu, uint32_t v);
+void SetEDX(x64emu_t *emu, uint32_t v);
+void SetEDI(x64emu_t *emu, uint32_t v);
+void SetESI(x64emu_t *emu, uint32_t v);
+void SetEBP(x64emu_t *emu, uint32_t v);
+//void SetESP(x64emu_t *emu, uint32_t v);
+void SetRAX(x64emu_t *emu, uint64_t v);
+void SetRBX(x64emu_t *emu, uint64_t v);
+void SetRCX(x64emu_t *emu, uint64_t v);
+void SetRDX(x64emu_t *emu, uint64_t v);
+void SetRDI(x64emu_t *emu, uint64_t v);
+void SetRSI(x64emu_t *emu, uint64_t v);
+void SetRBP(x64emu_t *emu, uint64_t v);
+void SetRSP(x64emu_t *emu, uint64_t v);
+void SetRIP(x64emu_t *emu, uint64_t v);
+void SetFS(x64emu_t *emu, uint16_t v);
+uint16_t GetFS(x64emu_t *emu);
+uint64_t GetRSP(x64emu_t *emu);
+void ResetFlags(x64emu_t *emu);
+void ResetSegmentsCache(x64emu_t *emu);
+const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip);
+
+void StopEmu(x64emu_t* emu, const char* reason);
+//void PushExit(x64emu_t* emu);
+//void* GetExit();
+void EmuCall(x64emu_t* emu, uintptr_t addr);
+void AddCleanup(x64emu_t *emu, void *p);
+void AddCleanup1Arg(x64emu_t *emu, void *p, void* a);
+void CallCleanup(x64emu_t *emu, void* p);
+void CallAllCleanup(x64emu_t *emu);
+void UnimpOpcode(x64emu_t* emu);
+
+uint64_t ReadTSC(x64emu_t* emu);
+
+double FromLD(void* ld);        // long double (80bits pointer) -> double
+void LD2D(void* ld, void* d);   // long double (80bits) -> double (64bits)
+void D2LD(void* d, void* ld);   // double (64bits) -> long double (64bits)
+
+void printFunctionAddr(uintptr_t nextaddr, const char* text);
+const char* getAddrFunctionName(uintptr_t addr);
+
+#endif //__X86EMU_H_
\ No newline at end of file
diff --git a/src/include/x64run.h b/src/include/x64run.h
new file mode 100755
index 00000000..6b7333f7
--- /dev/null
+++ b/src/include/x64run.h
@@ -0,0 +1,14 @@
+#ifndef __X64RUN_H_
+#define __X64RUN_H_
+#include <stdint.h>
+
+typedef struct x64emu_s x64emu_t;
+//int Run(x64emu_t *emu, int step); // 0 if run was successfull, 1 if error in x86 world
+//int DynaRun(x64emu_t *emu);
+
+//uint32_t LibSyscall(x64emu_t *emu);
+//void PltResolver(x64emu_t* emu);
+//extern uintptr_t pltResolver;
+int GetTID();
+
+#endif //__X64RUN_H_
\ No newline at end of file
diff --git a/src/main.c b/src/main.c
index d9052a58..f64996cf 100755
--- a/src/main.c
+++ b/src/main.c
@@ -817,6 +817,9 @@ int main(int argc, const char **argv, const char **env) {
         FreeCollection(&ld_preload);
         return -1;
     }
+    // init x86_64 emu
+    //x64emu_t *emu = NewX64Emu(my_context, my_context->ep, (uintptr_t)my_context->stack, my_context->stacksz, 0);
+
 
     return 0;
 }