about summary refs log tree commit diff stats
path: root/src/libtools
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-08-26 17:45:13 +0200
committerGitHub <noreply@github.com>2024-08-26 17:45:13 +0200
commitb5105a1e57bba3305d5dce93ab4d2f7faab6b34a (patch)
treeab26b700d3c48f2c8e32a1084ae7c2e7a8448b06 /src/libtools
parent9beb745765e9c99bad6410094a97bf0bf9ebc1eb (diff)
downloadbox64-b5105a1e57bba3305d5dce93ab4d2f7faab6b34a.tar.gz
box64-b5105a1e57bba3305d5dce93ab4d2f7faab6b34a.zip
Added preliminary Box32 support (#1760)
* Improve the ReserveHigMemory helper function

* [BOX32] Added some wrapping infrastructure

* [BOX32] More wrapped 32bits lib infrastructure

* [BOX32] Added callback and tls 32bits handling

* [BOX32] Added more 32bits, around wrappers and elfs

* [BOX32] Added the 32bits version of myalign

* [BOX32] More wrapped libs and 32bits fixes and imrpovments

* [BOX32] Added some 32bits tests

* [BOX32] Try to enable some Box32 build and test on the CI

* [BOX32] Disable Box32 testing on CI platform that use qemu

* [BOX32] Another attempt to disable Box32 testing on CI platform that use qemu

* [BOX32] Small fix for another attempt to disable Box32 testing on CI platform that use qemu

* [BOX32] Yet another fix for another attempt to disable Box32 testing on CI platform that use qemu

* [BOX32] Fixed a typo in CI script

* [BOX32] Better scratch alighnment and enabled more tests

* [BOX32] Added (partial) wrapped 32bits librt

* [BOX32] Added mention of Box32 in README

* [BOX32] Added phtread handling, and numerous fixes to 32bits handling. [ARM64_DYNAREC] Fixed access to segment with negative offset

* [BOX32] Added system libs and cpp testing, plus some more fixes

* [BOX32] Fix previous commit

* [BOX32] Better stack adjustment for 32bits processes

* [BOX32] Added getenv wrapped 32bits function and friends

* [BOX32] Don't look for box86 for a Box32 build

* [BOX32] Don't do 32bits cppThreads test for now on CI

* [BOX32] Enabled a few more 32bits tests

* [BOX32] For ld_lib_path for both CppThreads tests

* [BOX32] [ANDROID] Some Fixes for Android Build

* [BOX32] Still need to disable cppThread_32bits test on CI for some reason

* [BOX32] [ANDROID] Don't show PreInit Array Warning (#1751)

* [BOX32] [ANDROID] One More Fix for Android Build That I forgotten to … (#1752)

* [BOX32] [ANDROID] One More Fix for Android Build That I forgotten to push before

* [BOX32] [ANDROID] Try to Create __libc_init

* [BOX32] [ANDROID] Try to disable NEEDED_LIBS for now (libdl is not wrapped)

* [BOX32] Updated generated files

* [BOX32] Added 32bits context functions

* [BOX32] Added 32bits signal handling

* [BOX32] Added some missing 32bits elfloader functions

* [BOX32] Fix build on x86_64 machine

* [BOX32] Better fix for x86_64 build

* [BOX32] Actually added missing libs, and re-enabled cppThreads_32bits test

* [BOX32] Added wrapped 32bits libdl

* [BOX32] Try to re-enabled Box32 test on CI for ARM64 builds

* [BOX32] fine-tuning Box32 test on CI for ARM64 builds

* [BOX32] More fine-tuning to Box32 test on CI for ARM64 builds

* [BOX32] Enabled Box32 test on CI for LA64 and RV64 builds too

* [BOX32] re-Disabled Box32 test on CI for LA64 and RV64 builds, not working for now

* [BOX32] Temporarily disabled cppThreads_32bits test on CI

---------

Co-authored-by: KreitinnSoftware <pablopro5051@gmail.com>
Co-authored-by: KreitinnSoftware <80591934+KreitinnSoftware@users.noreply.github.com>
Diffstat (limited to 'src/libtools')
-rwxr-xr-xsrc/libtools/myalign32.c935
-rwxr-xr-xsrc/libtools/myalign64_32.c104
-rw-r--r--src/libtools/signal32.c842
-rw-r--r--src/libtools/signals.c15
-rw-r--r--src/libtools/threads.c35
-rwxr-xr-xsrc/libtools/threads32.c854
6 files changed, 2775 insertions, 10 deletions
diff --git a/src/libtools/myalign32.c b/src/libtools/myalign32.c
new file mode 100755
index 00000000..ee0b2790
--- /dev/null
+++ b/src/libtools/myalign32.c
@@ -0,0 +1,935 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <wchar.h>
+#include <sys/epoll.h>
+#include <fts.h>
+
+#include "x64emu.h"
+#include "emu/x64emu_private.h"
+#include "myalign32.h"
+#include "debug.h"
+#include "box32.h"
+
+void myStackAlign32(const char* fmt, uint32_t* st, uint64_t* mystack)
+{
+    if(!fmt)
+        return;
+    // loop...
+    const char* p = fmt;
+    int state = 0;
+    double d;
+    while(*p)
+    {
+        switch(state) {
+            case 0:
+                switch(*p) {
+                    case '%': state = 1; ++p; break;
+                    default:
+                        ++p;
+                }
+                break;
+            case 1: // normal
+            case 2: // l
+            case 3: // ll
+            case 4: // L
+                switch(*p) {
+                    case '%': state = 0;  ++p; break; //%% = back to 0
+                    case 'l': ++state; if (state>3) state=3; ++p; break;
+                    case 'z': state = 2; ++p; break;
+                    case 'L': state = 4; ++p; break;
+                    case 'a':
+                    case 'A':
+                    case 'e':
+                    case 'E':
+                    case 'g':
+                    case 'G':
+                    case 'F':
+                    case 'f': state += 10; break;    //  float
+                    case 'd':
+                    case 'i':
+                    case 'o': state += 20; break;   // int
+                    case 'x':
+                    case 'X':
+                    case 'u': state += 40; break;   // uint
+                    case 'h': ++p; break;  // ignored...
+                    case '\'':
+                    case '0':
+                    case '1':
+                    case '2':
+                    case '3':
+                    case '4':
+                    case '5':
+                    case '6':
+                    case '7':
+                    case '8':
+                    case '9':
+                    case '.': 
+                    case '+': 
+                    case '-': ++p; break; // formating, ignored
+                    case 'm': state = 0; ++p; break; // no argument
+                    case 'n':
+                    case 'p':
+                    case 'S':
+                    case 's': state = 30; break; // pointers
+                    case '$': ++p; break; // should issue a warning, it's not handled...
+                    case '*': *(mystack++) = *(st++); ++p; break; // fetch an int in the stack....
+                    case ' ': state=0; ++p; break;
+                    default:
+                        state=20; // other stuff, put an int...
+                }
+                break;
+            case 11:    //double
+            case 12:    //%lg, still double
+            case 13:    //%llg, still double
+            case 23:    // 64bits int
+            case 43:    // 64bits uint
+                *(uint64_t*)mystack = *(uint64_t*)st;
+                st+=2; mystack+=1;
+                state = 0;
+                ++p;
+                break;
+            case 22:    // long int
+                *(int64_t*)mystack = from_long(*(long_t*)st);
+                st+=1; mystack+=1;
+                state = 0;
+                ++p;
+                break;
+            case 42:    // long uint
+                *(uint64_t*)mystack = from_ulong(*(ulong_t*)st);
+                st+=1; mystack+=1;
+                state = 0;
+                ++p;
+                break;
+            case 14:    //%LG long double
+                #ifdef HAVE_LD80BITS
+                memcpy(mystack, st, 10);
+                st+=3; mystack+=2;
+                #else
+                LD2D((void*)st, &d);
+                *(long double*)mystack = (long double)d;
+                st+=3; mystack+=2;
+                #endif
+                state = 0;
+                ++p;
+                break;
+            case 30:    //pointer
+                *(uintptr_t*)mystack = from_ptr(*st);
+                st++; mystack+=1;
+                state = 0;
+                ++p;
+                break;
+            case 20:    // fallback
+            case 21:
+            case 24:    // normal int / pointer
+            case 40:
+            case 41:
+                *mystack = *st;
+                ++mystack;
+                ++st;
+                state = 0;
+                ++p;
+                break;
+            default:
+                // whattt?
+                state = 0;
+        }
+    }
+}
+
+void myStackAlignGVariantNew32(const char* fmt, uint32_t* st, uint64_t* mystack)
+{
+    if (!fmt)
+        return;
+    
+    const char *p = fmt;
+    int state = 0;
+    int inblocks = 0;
+    int tmp;
+
+    do {
+        switch(state) {
+            case 0: // Nothing
+                switch(*p) {
+                    case 'b': // gboolean
+                    case 'y': // guchar
+                    case 'n': // gint16
+                    case 'q': // guint16
+                    case 'i': // gint32
+                    case 'u': // guint32
+                    case 'h': // gint32
+                    case 's': // const gchar*
+                    case 'o':
+                    case 'g':
+                    case 'v': // GVariant*
+                    case '*': // GVariant* of any type
+                    case '?': // GVariant* of basic type
+                    case 'r': // GVariant* of tuple type
+                        *mystack = *st;
+                        ++mystack;
+                        ++st;
+                        break;
+                    case 'x': // gint64
+                    case 't': // guint64
+                    case 'd': // gdouble
+                        *(uint64_t*)mystack = *(uint64_t*)st;
+                        st+=2; mystack+=1;
+                        break;
+                    case '{':
+                    case '(': ++inblocks; break;
+                    case '}':
+                    case ')': --inblocks; break;
+                    case 'a': state = 1; break; // GVariantBuilder* or GVariantIter**
+                    case 'm': state = 2; break; // maybe types
+                    case '@': state = 3; break; // GVariant* of type [type]
+                    case '^': state = 4; break; // pointer value
+                    case '&': break; // pointer: do nothing
+                }
+                break;
+            case 1: // Arrays
+                switch(*p) {
+                    case '{':
+                    case '(': ++tmp; break;
+                    case '}':
+                    case ')': --tmp; break;
+                }
+                if (*p == 'a') break;
+                if (tmp == 0) {
+                    *mystack = *st;
+                    ++mystack;
+                    ++st;
+                    state = 0;
+                }
+                break;
+            case 2: // Maybe-types
+                switch(*p) {
+                    case 'b': // gboolean
+                    case 'y': // guchar
+                    case 'n': // gint16
+                    case 'q': // guint16
+                    case 'i': // gint32
+                    case 'u': // guint32
+                    case 'h': // gint32
+                    case 'x': // gint64
+                    case 't': // guint64
+                    case 'd': // gdouble
+                    case '{':
+                    case '}':
+                    case '(':
+                    case ')':
+                        // Add a gboolean or gboolean*, no char increment
+                        *mystack = *st;
+                        ++mystack;
+                        ++st;
+                        --p;
+                        state = 0;
+                        break;
+                    case 'a': // GVariantBuilder* or GVariantIter**
+                    case 's': // const gchar*
+                    case 'o':
+                    case 'g':
+                    case 'v': // GVariant*
+                    case '@': // GVariant* of type [type]
+                    case '*': // GVariant* of any type
+                    case '?': // GVariant* of basic type
+                    case 'r': // GVariant* of tuple type
+                    case '&': // pointer
+                    case '^': // pointer value
+                        // Just maybe-NULL
+                        --p;
+                        state = 0;
+                        break;
+
+                    default: // Default to add a gboolean & reinit state?
+                        *mystack = *st;
+                        ++mystack;
+                        ++st;
+                        --p;
+                        state = 0;
+                }
+                break;
+            case 3: // GVariant*
+                switch(*p) {
+                    case '{':
+                    case '(': ++tmp; break;
+                    case '}':
+                    case ')': --tmp; break;
+                    case 'a': // GVariantBuilder* or GVariantIter**
+                        do { ++p; } while(*p == 'a'); // Use next character which is not an array (array definition)
+                        switch(*p) {
+                            case '{':
+                            case '(': ++tmp; break;
+                            case '}':
+                            case ')': --tmp; break;
+                        }
+                        break;
+                }
+                if (tmp == 0) {
+                    *mystack = *st;
+                    ++mystack;
+                    ++st;
+                    state = 0;
+                }
+                break;
+            case 4: // ^
+                if (*p == 'a') state = 5;
+                else if (*p == '&') state = 8;
+                else state = 0; //???
+                break;
+            case 5: // ^a
+                if ((*p == 's') || (*p == 'o') || (*p == 'y')) {
+                    *mystack = *st;
+                    ++mystack;
+                    ++st;
+                    state = 0;
+                } else if (*p == '&') state = 6;
+                else if (*p == 'a') state = 7;
+                else state = 0; //???
+                break;
+            case 6: // ^a&
+                if ((*p == 's') || (*p == 'o')) {
+                    *mystack = *st;
+                    ++mystack;
+                    ++st;
+                    state = 0;
+                } else if (*p == 'a') state = 7;
+                else state = 0; //???
+                break;
+            case 7: // ^aa / ^a&a
+                if (*p == 'y') {
+                    *mystack = *st;
+                    ++mystack;
+                    ++st;
+                    state = 0;
+                } else state = 0; //???
+            case 8: // ^&
+                if (*p == 'a') state = 9;
+                else state = 0; //???
+            case 9: // ^&a
+                if (*p == 'y') {
+                    *mystack = *st;
+                    ++mystack;
+                    ++st;
+                    state = 0;
+                } else state = 0; //???
+        }
+        ++p;
+    } while (*p && (inblocks || state));
+}
+
+void myStackAlignW32(const char* fmt, uint32_t* st, uint64_t* mystack)
+{
+    // loop...
+    const wchar_t* p = (const wchar_t*)fmt;
+    int state = 0;
+    double d;
+    while(*p)
+    {
+        switch(state) {
+            case 0:
+                switch(*p) {
+                    case '%': state = 1; ++p; break;
+                    default:
+                        ++p;
+                }
+                break;
+            case 1: // normal
+            case 2: // l
+            case 3: // ll
+            case 4: // L
+                switch(*p) {
+                    case '%': state = 0;  ++p; break; //%% = back to 0
+                    case 'l': ++state; if (state>3) state=3; ++p; break;
+                    case 'z': state = 2; ++p; break;
+                    case 'L': state = 4; ++p; break;
+                    case 'a':
+                    case 'A':
+                    case 'e':
+                    case 'E':
+                    case 'g':
+                    case 'G':
+                    case 'F':
+                    case 'f': state += 10; break;    //  float
+                    case 'd':
+                    case 'i':
+                    case 'o': state += 20; break;   // int
+                    case 'x':
+                    case 'X':
+                    case 'u': state += 40; break;   // unsigned
+                    case 'h': ++p; break;  // ignored...
+                    case '\'':
+                    case '0':
+                    case '1':
+                    case '2':
+                    case '3':
+                    case '4':
+                    case '5':
+                    case '6':
+                    case '7':
+                    case '8':
+                    case '9':
+                    case '.': 
+                    case '+': 
+                    case '-': ++p; break; // formating, ignored
+                    case 'm': state = 0; ++p; break; // no argument
+                    case 'n':
+                    case 'p':
+                    case 'S':
+                    case 's': state = 30; break; // pointers
+                    case '$': ++p; break; // should issue a warning, it's not handled...
+                    case '*': *(mystack++) = *(st++); ++p; break; //fetch an int in the stack
+                    case ' ': state=0; ++p; break;
+                    default:
+                        state=20; // other stuff, put an int...
+                }
+                break;
+            case 11:    //double
+            case 12:    //%lg, still double
+            case 13:    //%llg, still double
+            case 23:    // 64bits int
+            case 43:    // 64bits uint
+                *(uint64_t*)mystack = *(uint64_t*)st;
+                st+=2; mystack+=1;
+                state = 0;
+                ++p;
+                break;
+            case 22:    // long int
+                *(int64_t*)mystack = from_long(*(long_t*)st);
+                st+=1; mystack+=1;
+                state = 0;
+                ++p;
+                break;
+            case 42:    // long uint
+                *(uint64_t*)mystack = from_ulong(*(ulong_t*)st);
+                st+=1; mystack+=1;
+                state = 0;
+                ++p;
+                break;
+            case 14:    //%LG long double
+                #ifdef HAVE_LD80BITS
+                memcpy(mystack, st, 10);
+                st+=3; mystack+=2;
+                #else
+                LD2D((void*)st, &d);
+                *(long double*)mystack = (long double)d;
+                st+=3; mystack+=2;
+                #endif
+                state = 0;
+                ++p;
+                break;
+            case 30:    //pointer
+                *(uintptr_t*)mystack = from_ptr(*st);
+                st++; mystack+=1;
+                state = 0;
+                ++p;
+                break;
+            case 20:    // fallback
+            case 40:
+            case 21:
+            case 24:    // normal int / pointer
+                *mystack = *st;
+                ++mystack;
+                ++st;
+                state = 0;
+                ++p;
+                break;
+            default:
+                // whattt?
+                state = 0;
+        }
+    }
+}
+
+
+#if 0
+
+typedef struct __attribute__((packed)) {
+  unsigned char   *body_data;
+  long    body_storage;
+  long    body_fill;
+  long    body_returned;
+
+
+  int     *lacing_vals;
+  int64_t *granule_vals;
+  long    lacing_storage;
+  long    lacing_fill;
+  long    lacing_packet;
+  long    lacing_returned;
+
+  unsigned char    header[282];
+  int              header_fill __attribute__ ((aligned (4)));
+
+  int     e_o_s;
+  int     b_o_s;
+  long    serialno;
+  long    pageno;
+  int64_t  packetno;
+  int64_t   granulepos;
+
+} ogg_stream_state_x64;
+
+typedef struct __attribute__((packed)) vorbis_dsp_state_x64 {
+  int analysisp;
+  void *vi; //vorbis_info
+
+  float **pcm;
+  float **pcmret;
+  int      pcm_storage;
+  int      pcm_current;
+  int      pcm_returned;
+
+  int  preextrapolate;
+  int  eofflag;
+
+  long lW;
+  long W;
+  long nW;
+  long centerW;
+
+  int64_t granulepos;
+  int64_t sequence;
+
+  int64_t glue_bits;
+  int64_t time_bits;
+  int64_t floor_bits;
+  int64_t res_bits;
+
+  void       *backend_state;
+} vorbis_dsp_state_x64;
+
+typedef struct __attribute__((packed)) {
+  long endbyte;
+  int  endbit;
+
+  unsigned char *buffer;
+  unsigned char *ptr;
+  long storage;
+} oggpack_buffer_x64;
+
+typedef struct __attribute__((packed)) vorbis_block_x64 {
+
+  float  **pcm;
+  oggpack_buffer_x64 opb;
+
+  long  lW;
+  long  W;
+  long  nW;
+  int   pcmend;
+  int   mode;
+
+  int         eofflag;
+  int64_t granulepos;
+  int64_t sequence;
+  void *vd;
+  
+  void               *localstore;
+  long                localtop;
+  long                localalloc;
+  long                totaluse;
+  void *reap;
+
+  long glue_bits;
+  long time_bits;
+  long floor_bits;
+  long res_bits;
+
+  void *internal;
+
+} vorbis_block_x64;
+
+typedef struct __attribute__((packed)) OggVorbis_x64  {
+  void            *datasource; /* Pointer to a FILE *, etc. */
+  int              seekable;
+  int64_t      offset;
+  int64_t      end;
+  ogg_sync_state   oy;
+
+  /* If the FILE handle isn't seekable (eg, a pipe), only the current
+     stream appears */
+  int              links;
+  int64_t     *offsets;
+  int64_t     *dataoffsets;
+  long            *serialnos;
+  int64_t     *pcmlengths; /* overloaded to maintain binary
+                                  compatibility; x2 size, stores both
+                                  beginning and end values */
+  void     *vi; //vorbis_info
+  void  *vc;    //vorbis_comment
+
+  /* Decoding working state local storage */
+  int64_t      pcm_offset;
+  int              ready_state;
+  long             current_serialno;
+  int              current_link;
+
+  double           bittrack;
+  double           samptrack;
+
+  ogg_stream_state_x64 os; /* take physical pages, weld into a logical
+                          stream of packets */
+  vorbis_dsp_state_x64 vd; /* central working state for the packet->PCM decoder */
+  vorbis_block_x64     vb; /* local working space for packet->PCM decode */
+
+  ov_callbacks callbacks;
+
+} OggVorbis_x64;
+
+#define TRANSFERT \
+GO(datasource) \
+GO(seekable) \
+GO(offset) \
+GO(end) \
+GOM(oy, sizeof(ogg_sync_state)) \
+GO(links) \
+GO(offsets) \
+GO(dataoffsets) \
+GO(serialnos) \
+GO(pcmlengths) \
+GO(vi) \
+GO(vc) \
+GO(pcm_offset) \
+GO(ready_state) \
+GO(current_serialno) \
+GO(current_link) \
+GOM(bittrack, 16) \
+GO(os.body_data) \
+GO(os.body_storage) \
+GO(os.body_fill) \
+GO(os.body_returned) \
+GO(os.lacing_vals) \
+GO(os.granule_vals) \
+GO(os.lacing_storage) \
+GO(os.lacing_fill) \
+GO(os.lacing_packet) \
+GO(os.lacing_returned) \
+GOM(os.header, 282) \
+GO(os.header_fill) \
+GO(os.e_o_s) \
+GO(os.b_o_s) \
+GO(os.serialno) \
+GO(os.pageno) \
+GO(os.packetno) \
+GO(os.granulepos) \
+GO(vd.analysisp) \
+GO(vd.vi) \
+GO(vd.pcm) \
+GO(vd.pcmret) \
+GO(vd.pcm_storage) \
+GO(vd.pcm_current) \
+GO(vd.pcm_returned) \
+GO(vd.preextrapolate) \
+GO(vd.eofflag) \
+GO(vd.lW) \
+GO(vd.W) \
+GO(vd.nW) \
+GO(vd.centerW) \
+GO(vd.granulepos) \
+GO(vd.sequence) \
+GO(vd.glue_bits) \
+GO(vd.time_bits) \
+GO(vd.floor_bits) \
+GO(vd.res_bits) \
+GO(vd.backend_state) \
+GO(vb.pcm) \
+GO(vb.opb.endbyte) \
+GO(vb.opb.endbit) \
+GO(vb.opb.buffer) \
+GO(vb.opb.ptr) \
+GO(vb.opb.storage) \
+GO(vb.lW) \
+GO(vb.W) \
+GO(vb.nW) \
+GO(vb.pcmend) \
+GO(vb.mode) \
+GO(vb.eofflag) \
+GO(vb.granulepos) \
+GO(vb.sequence) \
+GO(vb.localstore) \
+GO(vb.localtop) \
+GO(vb.localalloc) \
+GO(vb.totaluse) \
+GO(vb.reap) \
+GO(vb.glue_bits) \
+GO(vb.time_bits) \
+GO(vb.floor_bits) \
+GO(vb.res_bits) \
+GO(vb.internal) \
+GOM(callbacks, sizeof(ov_callbacks))
+
+void AlignOggVorbis(void* dest, void* source)
+{
+     // Arm -> x64
+     OggVorbis_x64* src = (OggVorbis_x64*)source;
+     OggVorbis*     dst = (OggVorbis*)dest;
+
+     #define GO(A) dst->A = src->A;
+     #define GOM(A, S) memcpy(&dst->A, &src->A, S);
+     TRANSFERT
+     #undef GO
+     #undef GOM
+     dst->vb.vd = (src->vb.vd == &src->vd)?&dst->vd:(vorbis_dsp_state*)src->vb.vd;
+}
+void UnalignOggVorbis(void* dest, void* source)
+{
+    // x64 -> Arm
+     OggVorbis_x64* dst = (OggVorbis_x64*)dest;
+     OggVorbis*     src = (OggVorbis*)source;
+
+     #define GO(A) dst->A = src->A;
+     #define GOM(A, S) memcpy(&dst->A, &src->A, S);
+     TRANSFERT
+     #undef GO
+     #undef GOM
+     dst->vb.vd = (src->vb.vd == &src->vd)?&dst->vd:(vorbis_dsp_state_x64*)src->vb.vd;
+}
+#undef TRANSFERT
+
+#define TRANSFERT \
+GO(analysisp) \
+GO(vi) \
+GO(pcm) \
+GO(pcmret) \
+GO(pcm_storage) \
+GO(pcm_current) \
+GO(pcm_returned) \
+GO(preextrapolate) \
+GO(eofflag) \
+GO(lW) \
+GO(W) \
+GO(nW) \
+GO(centerW) \
+GO(granulepos) \
+GO(sequence) \
+GO(glue_bits) \
+GO(time_bits) \
+GO(floor_bits) \
+GO(res_bits) \
+GO(backend_state)
+
+void UnalignVorbisDspState(void* dest, void* source)
+{
+    // Arm -> x64
+     #define GO(A) ((vorbis_dsp_state_x64*)dest)->A = ((vorbis_dsp_state*)source)->A;
+     #define GOM(A, S) memcpy(&((vorbis_dsp_state_x64*)dest)->A, &((vorbis_dsp_state*)source)->A, S);
+     TRANSFERT
+     #undef GO
+     #undef GOM
+}
+void AlignVorbisDspState(void* dest, void* source)
+{
+    // x64 -> Arm
+     #define GO(A) ((vorbis_dsp_state*)dest)->A = ((vorbis_dsp_state_x64*)source)->A;
+     #define GOM(A, S) memcpy(&((vorbis_dsp_state*)dest)->A, &((vorbis_dsp_state_x64*)source)->A, S);
+     TRANSFERT
+     #undef GO
+     #undef GOM
+}
+#undef TRANSFERT
+
+#define TRANSFERT \
+GO(pcm) \
+GO(opb.endbyte) \
+GO(opb.endbit) \
+GO(opb.buffer) \
+GO(opb.ptr) \
+GO(opb.storage) \
+GO(lW) \
+GO(W) \
+GO(nW) \
+GO(pcmend) \
+GO(mode) \
+GO(eofflag) \
+GO(granulepos) \
+GO(sequence) \
+GO(vd) \
+GO(localstore) \
+GO(localtop) \
+GO(localalloc) \
+GO(totaluse) \
+GO(reap) \
+GO(glue_bits) \
+GO(time_bits) \
+GO(floor_bits) \
+GO(res_bits) \
+GO(internal)
+
+void UnalignVorbisBlock(void* dest, void* source)
+{
+    // Arm -> x64
+     #define GO(A) ((vorbis_block_x64*)dest)->A = ((vorbis_block*)source)->A;
+     #define GOM(A, S) memcpy(&((vorbis_block_x64*)dest)->A, &((vorbis_block*)source)->A, S);
+     TRANSFERT
+     #undef GO
+     #undef GOM
+}
+void AlignVorbisBlock(void* dest, void* source)
+{
+    // x64 -> Arm
+     #define GO(A) ((vorbis_block*)dest)->A = ((vorbis_block_x64*)source)->A;
+     #define GOM(A, S) memcpy(&((vorbis_block*)dest)->A, &((vorbis_block_x64*)source)->A, S);
+     TRANSFERT
+     #undef GO
+     #undef GOM
+}
+
+#undef TRANSFERT
+
+typedef union __attribute__((packed)) x64_epoll_data {
+    void    *ptr;
+    int      fd;
+    uint32_t u32;
+    uint64_t u64;
+} x64_epoll_data_t;
+
+struct __attribute__((packed)) x64_epoll_event {
+    uint32_t            events;
+    x64_epoll_data_t    data;
+};
+// Arm -> x64
+void UnalignEpollEvent(void* dest, void* source, int nbr)
+{
+    struct x64_epoll_event *x64_struct = (struct x64_epoll_event*)dest;
+    struct epoll_event *arm_struct = (struct epoll_event*)source;
+    while(nbr) {
+        x64_struct->events = arm_struct->events;
+        x64_struct->data.u64 = arm_struct->data.u64;
+        ++x64_struct;
+        ++arm_struct;
+        --nbr;
+    }
+}
+
+// x64 -> Arm
+void AlignEpollEvent(void* dest, void* source, int nbr)
+{
+    struct x64_epoll_event *x64_struct = (struct x64_epoll_event*)source;
+    struct epoll_event *arm_struct = (struct epoll_event*)dest;
+    while(nbr) {
+        arm_struct->events = x64_struct->events;
+        arm_struct->data.u64 = x64_struct->data.u64;
+        ++x64_struct;
+        ++arm_struct;
+        --nbr;
+    }
+}
+
+typedef struct __attribute__((packed)) x64_SMPEG_Info_s {
+    int has_audio;
+    int has_video;
+    int width;
+    int height;
+    int current_frame;
+    double current_fps;
+    char audio_string[80];
+    int  audio_current_frame;
+    uint32_t current_offset;
+    uint32_t total_size;
+    double current_time;
+    double total_time;
+} x64_SMPEG_Info_t;
+
+#define TRANSFERT \
+GO(has_audio) \
+GO(has_video) \
+GO(width) \
+GO(height) \
+GO(current_frame) \
+GO(current_fps) \
+GOM(audio_string, 80) \
+GO(audio_current_frame) \
+GO(current_offset) \
+GO(total_size) \
+GO(current_time) \
+GO(total_time)
+
+
+// Arm -> x64
+void UnalignSmpegInfo(void* dest, void* source)
+{
+    #define GO(A) ((x64_SMPEG_Info_t*)dest)->A = ((my_SMPEG_Info_t*)source)->A;
+    #define GOM(A, S) memcpy(&((x64_SMPEG_Info_t*)dest)->A, &((my_SMPEG_Info_t*)source)->A, S);
+    TRANSFERT
+    #undef GO
+    #undef GOM
+}
+// x64 -> Arm
+void AlignSmpegInfo(void* dest, void* source)
+{
+    #define GO(A) ((my_SMPEG_Info_t*)dest)->A = ((x64_SMPEG_Info_t*)source)->A;
+    #define GOM(A, S) memcpy(&((my_SMPEG_Info_t*)dest)->A, &((x64_SMPEG_Info_t*)source)->A, S);
+    TRANSFERT
+    #undef GO
+    #undef GOM
+}
+#undef TRANSFERT
+
+#define TRANSFERT   \
+GOV(fts_cycle)      \
+GOV(fts_parent)     \
+GOV(fts_link)       \
+GO(fts_number)      \
+GO(fts_pointer)     \
+GO(fts_accpath)     \
+GO(fts_path)        \
+GO(fts_errno)       \
+GO(fts_symfd)       \
+GO(fts_pathlen)     \
+GO(fts_namelen)     \
+GO(fts_ino)         \
+GO(fts_dev)         \
+GO(fts_nlink)       \
+GO(fts_level)       \
+GO(fts_info)        \
+GO(fts_flags)       \
+GO(fts_instr)       \
+GO(fts_statp)       \
+GOM(fts_name, sizeof(void*))
+
+// Arm -> x64
+void UnalignFTSENT(void* dest, void* source)
+{
+    #define GO(A) ((x64_ftsent_t*)dest)->A = ((FTSENT*)source)->A;
+    #define GOV(A) ((x64_ftsent_t*)dest)->A = (void*)((FTSENT*)source)->A;
+    #define GOM(A, S) memcpy(&((x64_ftsent_t*)dest)->A, &((FTSENT*)source)->A, S);
+    TRANSFERT
+    #undef GO
+    #undef GOV
+    #undef GOM
+}
+// x64 -> Arm
+void AlignFTSENT(void* dest, void* source)
+{
+    #define GO(A) ((FTSENT*)dest)->A = ((x64_ftsent_t*)source)->A;
+    #define GOV(A) ((FTSENT*)dest)->A = (void*)((x64_ftsent_t*)source)->A;
+    #define GOM(A, S) memcpy(&((FTSENT*)dest)->A, &((x64_ftsent_t*)source)->A, S);
+    TRANSFERT
+    #undef GO
+    #undef GOV
+    #undef GOM
+}
+#undef TRANSFERT
+
+void alignNGValue(my_GValue_t* v, void* value, int n)
+{
+    while(n) {
+        v->g_type = *(int*)value;
+        memcpy(v->data, value+4, 2*sizeof(double));
+        ++v;
+        value+=4+2*sizeof(double);
+        --n;
+    }
+}
+void unalignNGValue(void* value, my_GValue_t* v, int n)
+{
+    while(n) {
+        *(int*)value = v->g_type;
+        memcpy(value+4, v->data, 2*sizeof(double));
+        ++v;
+        value+=4+2*sizeof(double);
+        --n;
+    }
+}
+
+#endif
\ No newline at end of file
diff --git a/src/libtools/myalign64_32.c b/src/libtools/myalign64_32.c
new file mode 100755
index 00000000..8f11c5b2
--- /dev/null
+++ b/src/libtools/myalign64_32.c
@@ -0,0 +1,104 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <asm/stat.h>
+#include <sys/vfs.h>
+
+#include "x64emu.h"
+#include "emu/x64emu_private.h"
+#include "myalign32.h"
+#include "box32.h"
+
+void UnalignStat64_32(const void* source, void* dest)
+{
+    struct i386_stat64 *i386st = (struct i386_stat64*)dest;
+    struct stat *st = (struct stat*) source;
+    
+    memset(i386st->__pad0, 0, sizeof(i386st->__pad0));
+	memset(i386st->__pad3, 0, sizeof(i386st->__pad3));
+    i386st->st_dev      = st->st_dev;
+    i386st->__st_ino    = st->st_ino;
+    i386st->st_mode     = st->st_mode;
+    i386st->st_nlink    = st->st_nlink;
+    i386st->st_uid      = st->st_uid;
+    i386st->st_gid      = st->st_gid;
+    i386st->st_rdev     = st->st_rdev;
+    i386st->st_size     = st->st_size;
+    i386st->st_blksize  = st->st_blksize;
+    i386st->st_blocks   = st->st_blocks;
+    i386st->st_atime    = st->st_atime;
+    i386st->st_atime_nsec   = st->st_atime_nsec;
+    i386st->st_mtime    = st->st_mtime;
+    i386st->st_mtime_nsec   = st->st_mtime_nsec;
+    i386st->st_ctime    = st->st_ctime;
+    i386st->st_ctime_nsec   = st->st_ctime_nsec;
+    i386st->st_ino      = st->st_ino;
+}
+
+struct native_fsid {
+  int     val[2];
+};
+
+struct native_statfs64 {
+  uint32_t    f_type;
+  uint32_t    f_bsize;
+  uint64_t    f_blocks;
+  uint64_t    f_bfree;
+  uint64_t    f_bavail;
+  uint64_t    f_files;
+  uint64_t    f_ffree;
+  struct native_fsid f_fsid;
+  uint32_t    f_namelen;
+  uint32_t    f_frsize;
+  uint32_t    f_flags;
+  uint32_t    f_spare[4];
+};  // f_flags is not always defined, but then f_spare is [5] in that case
+
+
+void UnalignStatFS64_32(const void* source, void* dest)
+{
+    struct i386_statfs64 *i386st = (struct i386_statfs64*)dest;
+    struct native_statfs64 *st = (struct native_statfs64*) source;
+
+    i386st->f_type      = st->f_type;
+    i386st->f_bsize     = st->f_bsize;
+    i386st->f_blocks    = st->f_blocks;
+    i386st->f_bfree     = st->f_bfree;
+    i386st->f_bavail    = st->f_bavail;
+    i386st->f_files     = st->f_files;
+    i386st->f_ffree     = st->f_ffree;
+    memcpy(&i386st->f_fsid, &st->f_fsid, sizeof(i386st->f_fsid));
+    i386st->f_namelen   = st->f_namelen;
+    i386st->f_frsize    = st->f_frsize;
+    i386st->f_flags     = st->f_flags;
+    i386st->f_spare[0]  = st->f_spare[0];
+    i386st->f_spare[1]  = st->f_spare[1];
+    i386st->f_spare[2]  = st->f_spare[2];
+    i386st->f_spare[3]  = st->f_spare[3];
+}
+#if 0
+#define TRANSFERT   \
+GO(l_type)          \
+GO(l_whence)        \
+GO(l_start)         \
+GO(l_len)           \
+GO(l_pid)
+
+// Arm -> x64
+void UnalignFlock64_32(void* dest, void* source)
+{
+    #define GO(A) ((x64_flock64_t*)dest)->A = ((my_flock64_t*)source)->A;
+    TRANSFERT
+    #undef GO
+}
+
+// x64 -> Arm
+void AlignFlock64_32(void* dest, void* source)
+{
+    #define GO(A) ((my_flock64_t*)dest)->A = ((x64_flock64_t*)source)->A;
+    TRANSFERT
+    #undef GO
+}
+#undef TRANSFERT
+#endif
\ No newline at end of file
diff --git a/src/libtools/signal32.c b/src/libtools/signal32.c
new file mode 100644
index 00000000..54d68143
--- /dev/null
+++ b/src/libtools/signal32.c
@@ -0,0 +1,842 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <ucontext.h>
+#include <setjmp.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#ifndef ANDROID
+#include <execinfo.h>
+#endif
+
+#include "box32context.h"
+#include "debug.h"
+#include "x64emu.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "signals.h"
+#include "box64stack.h"
+#include "dynarec.h"
+#include "callback.h"
+#include "x64run.h"
+#include "elfloader.h"
+#include "threads.h"
+#include "emu/x87emu_private.h"
+#include "custommem.h"
+#ifdef DYNAREC
+#include "dynablock.h"
+#include "../dynarec/dynablock_private.h"
+#include "dynarec_native.h"
+#endif
+
+
+/* Definitions taken from the kernel headers.  */
+enum
+{
+  I386_GS = 0,
+# define I386_GS         I386_GS
+  I386_FS,
+# define I386_FS         I386_FS
+  I386_ES,
+# define I386_ES         I386_ES
+  I386_DS,
+# define I386_DS         I386_DS
+  I386_EDI,
+# define I386_EDI        I386_EDI
+  I386_ESI,
+# define I386_ESI        I386_ESI
+  I386_EBP,
+# define I386_EBP        I386_EBP
+  I386_ESP,
+# define I386_ESP        I386_ESP
+  I386_EBX,
+# define I386_EBX        I386_EBX
+  I386_EDX,
+# define I386_EDX        I386_EDX
+  I386_ECX,
+# define I386_ECX        I386_ECX
+  I386_EAX,
+# define I386_EAX        I386_EAX
+  I386_TRAPNO,
+# define I386_TRAPNO        I386_TRAPNO
+  I386_ERR,
+# define I386_ERR        I386_ERR
+  I386_EIP,
+# define I386_EIP        I386_EIP
+  I386_CS,
+# define I386_CS                I386_CS
+  I386_EFL,
+# define I386_EFL        I386_EFL
+  I386_UESP,
+# define I386_UESP        I386_UESP
+  I386_SS
+# define I386_SS        I386_SS
+};
+
+typedef uint32_t i386_gregset_t[19];
+struct i386_fpreg
+{
+  uint16_t significand[4];
+  uint16_t exponent;
+}__attribute__((packed));
+
+struct i386_fpxreg
+{
+  unsigned short significand[4];
+  unsigned short exponent;
+  unsigned short padding[3];
+}__attribute__((packed));
+
+struct i386_xmmreg
+{
+  uint32_t          element[4];
+}__attribute__((packed));
+
+struct i386_fpstate
+{
+  /* Regular FPU environment.  */
+  uint32_t          cw;
+  uint32_t          sw;
+  uint32_t          tag;
+  uint32_t          ipoff;
+  uint32_t          cssel;
+  uint32_t          dataoff;
+  uint32_t          datasel;
+  struct i386_fpreg _st[8];
+  uint32_t          status_magic;
+  /* FXSR FPU environment.  */
+  uint32_t          _fxsr_env[6];
+  uint32_t          mxcsr;
+  uint32_t          reserved;
+  struct i386_fpxreg _fxsr_st[8];
+  struct i386_xmmreg _xmm[8];
+  uint32_t          padding[56];
+}__attribute__((packed));
+
+typedef struct i386_fpstate *i386_fpregset_t;
+
+static void save_fpreg(x64emu_t* emu, struct i386_fpstate* state)
+{
+    emu->sw.f.F87_TOP = emu->top&7;
+    state->sw = emu->sw.x16;
+    state->cw = emu->cw.x16;
+    // save SSE and MMX regs
+    fpu_fxsave32(emu, &state->_fxsr_env);
+}
+static void load_fpreg(x64emu_t* emu, struct i386_fpstate* state)
+{
+    // copy SSE and MMX regs
+    fpu_fxrstor32(emu, &state->_fxsr_env);
+    emu->cw.x16 = state->cw;
+    emu->sw.x16 = state->sw;
+    emu->top = emu->sw.f.F87_TOP&7;
+}
+
+typedef struct
+  {
+    ptr_t ss_sp;
+    int ss_flags;
+    long_t ss_size;
+  } i386_stack_t;
+
+typedef struct x64_stack_s
+{
+    void *ss_sp;
+    int ss_flags;
+    size_t ss_size;
+} x64_stack_t;
+
+
+/*
+another way to see the sigcontext
+struct sigcontext
+{
+  unsigned short gs, __gsh;
+  unsigned short fs, __fsh;
+  unsigned short es, __esh;
+  unsigned short ds, __dsh;
+  unsigned long edi;
+  unsigned long esi;
+  unsigned long ebp;
+  unsigned long esp;
+  unsigned long ebx;
+  unsigned long edx;
+  unsigned long ecx;
+  unsigned long eax;
+  unsigned long trapno;
+  unsigned long err;
+  unsigned long eip;
+  unsigned short cs, __csh;
+  unsigned long eflags;
+  unsigned long esp_at_signal;
+  unsigned short ss, __ssh;
+  struct _fpstate * fpstate;
+  unsigned long oldmask;
+  unsigned long cr2;
+};
+*/
+typedef struct
+  {
+    i386_gregset_t gregs;
+    ptr_t fpregs;   //i386_fpregset_t
+    uint32_t oldmask;
+    uint32_t cr2;
+  } i386_mcontext_t;
+
+// /!\ signal sig_set is different than glibc __sig_set
+#define _NSIG_WORDS (64 / 32)
+typedef unsigned long i386_old_sigset_t;
+typedef struct {
+    unsigned long sig[_NSIG_WORDS];
+} i386_sigset_t;
+
+struct i386_xsave_hdr_struct {
+ 	uint64_t xstate_bv;
+ 	uint64_t reserved1[2];
+ 	uint64_t reserved2[5];
+};
+
+struct i386_xstate {
+	/*
+	 * Applications need to refer to fpstate through fpstate pointer
+	 * in sigcontext. Not here directly.
+	 */
+ 	struct i386_fpstate fpstate;
+ 	struct i386_xsave_hdr_struct xsave_hdr;
+ 	/* new processor state extensions will go here */
+} __attribute__ ((aligned (64)));
+
+struct i386_xstate_cntxt {
+	ptr_t               xstate; //struct  i386_xstate *xstate;
+	uint32_t	        size;
+	uint32_t 	        lmask;
+	uint32_t	        hmask;
+};
+
+typedef struct i386_ucontext_s
+{
+    uint32_t uc_flags;
+    ptr_t uc_link;  //struct i386_ucontext_s *uc_link;
+    i386_stack_t uc_stack;
+    i386_mcontext_t uc_mcontext;
+    i386_sigset_t uc_sigmask;
+	/* Allow for uc_sigmask growth.  Glibc uses a 1024-bit sigset_t.  */
+	int		  unused[32 - (sizeof (sigset_t) / sizeof (int))];
+	//struct i386_xstate_cntxt  uc_xstate;
+    struct i386_xstate  xstate;
+} i386_ucontext_t;
+
+typedef struct i386_sigframe_s {
+    ptr_t           pretcode;   // pointer to retcode
+    int             sig;
+    i386_mcontext_t cpustate;
+    struct i386_xstate fpstate;
+    ptr_t           extramask[64-1];
+    char            retcode[8];
+} i386_sigframe_t;
+
+struct kernel_sigaction {
+        void (*k_sa_handler) (int);
+        unsigned long sa_flags;
+        void (*sa_restorer) (void);
+        unsigned long sa_mask;
+        unsigned long sa_mask2;
+};
+#ifdef DYNAREC
+uintptr_t getX64Address(dynablock_t* db, uintptr_t arm_addr);
+#endif
+
+x64_stack_t* sigstack_getstack();
+int my_sigaltstack(x64emu_t* emu, const x64_stack_t* ss, x64_stack_t* oss);
+EXPORT int my32_sigaltstack(x64emu_t* emu, const i386_stack_t* ss, i386_stack_t* oss)
+{
+    x64_stack_t ss_ = {0};
+    x64_stack_t oss_ = {0};
+    if(ss) {
+        ss_.ss_flags = ss->ss_flags;
+        ss_.ss_sp = from_ptrv(ss->ss_sp);
+        ss_.ss_size = ss->ss_size;
+    }
+    int ret = my_sigaltstack(emu, ss?(&ss_):NULL, oss?(&oss_):NULL);
+    if(!ret && oss) {
+        oss->ss_flags = oss_.ss_flags;
+        oss->ss_sp = to_ptrv(oss_.ss_sp);
+        oss->ss_size = oss_.ss_size;
+    }
+    return ret;
+}
+
+
+uint32_t RunFunctionHandler32(int* exit, int dynarec, i386_ucontext_t* sigcontext, ptr_t fnc, int nargs, ...)
+{
+    if(fnc==0 || fnc==1) {
+        va_list va;
+        va_start (va, nargs);
+        int sig = va_arg(va, int);
+        va_end (va);
+        printf_log(LOG_NONE, "%04d|BOX32: Warning, calling Signal %d function handler %s\n", GetTID(), sig, fnc?"SIG_IGN":"SIG_DFL");
+        if(fnc==0) {
+            printf_log(LOG_NONE, "Unhandled signal caught, aborting\n");
+            abort();
+        }
+        return 0;
+    }
+#ifdef HAVE_TRACE
+    uintptr_t old_start = trace_start, old_end = trace_end;
+#if 0
+    trace_start = 0; trace_end = 1; // disabling trace, globably for now...
+#endif
+#endif
+#ifndef USE_CUSTOM_MEM
+    // because a signal can interupt a malloc-like function
+    // Dynarec cannot be used in signal handling unless custom malloc is used
+    dynarec = 0;
+#endif
+
+    x64emu_t *emu = thread_get_emu();
+    #ifdef DYNAREC
+    if(box64_dynarec_test)
+        emu->test.test = 0;
+    #endif
+
+    /*SetFS(emu, default_fs);*/
+    for (int i=0; i<6; ++i)
+        emu->segs_serial[i] = 0;
+
+    int align = nargs&1;
+
+    R_ESP -= nargs * sizeof(ptr_t);
+
+    uint32_t *p = (uint32_t*)from_ptrv(R_ESP);
+
+    va_list va;
+    va_start (va, nargs);
+    for (int i=0; i<nargs; ++i) {
+        uint32_t v = va_arg(va, uint32_t);
+        *p = v;
+        p++;
+    }
+    va_end (va);
+
+    printf_log(LOG_DEBUG, "%04d|signal #%d function handler %p called, RSP=%p\n", GetTID(), R_EDI, from_ptrv(fnc), from_ptrv(R_ESP));
+
+    int oldquitonlongjmp = emu->flags.quitonlongjmp;
+    emu->flags.quitonlongjmp = 2;
+    int old_cs = R_CS;
+    R_CS = 0x23;
+
+    emu->eflags.x64 &= ~(1<<F_TF); // this one needs to cleared
+
+    if(dynarec)
+        DynaCall(emu, fnc);
+    else
+        EmuCall(emu, fnc);
+
+    if(!emu->flags.longjmp)
+        R_ESP+=nargs*sizeof(ptr_t);
+
+    if(!emu->flags.longjmp && R_CS==0x23)
+        R_CS = old_cs;
+
+    emu->flags.quitonlongjmp = oldquitonlongjmp;
+
+    #ifdef DYNAREC
+    if(box64_dynarec_test) {
+        emu->test.test = 0;
+        emu->test.clean = 0;
+    }
+    #endif
+
+    if(emu->flags.longjmp) {
+        // longjmp inside signal handler, lets grab all relevent value and do the actual longjmp in the signal handler
+        emu->flags.longjmp = 0;
+        if(sigcontext) {
+            sigcontext->uc_mcontext.gregs[I386_EAX] = R_EAX;
+            sigcontext->uc_mcontext.gregs[I386_ECX] = R_ECX;
+            sigcontext->uc_mcontext.gregs[I386_EDX] = R_EDX;
+            sigcontext->uc_mcontext.gregs[I386_EDI] = R_EDI;
+            sigcontext->uc_mcontext.gregs[I386_ESI] = R_ESI;
+            sigcontext->uc_mcontext.gregs[I386_EBP] = R_EBP;
+            sigcontext->uc_mcontext.gregs[I386_ESP] = R_ESP;
+            sigcontext->uc_mcontext.gregs[I386_EBX] = R_EBX;
+            sigcontext->uc_mcontext.gregs[I386_EIP] = R_EIP;
+            // flags
+            sigcontext->uc_mcontext.gregs[I386_EFL] = emu->eflags.x64;
+            // get segments
+            sigcontext->uc_mcontext.gregs[I386_CS] = R_CS;
+            sigcontext->uc_mcontext.gregs[I386_DS] = R_DS;
+            sigcontext->uc_mcontext.gregs[I386_ES] = R_ES;
+            sigcontext->uc_mcontext.gregs[I386_SS] = R_SS;
+            sigcontext->uc_mcontext.gregs[I386_FS] = R_FS;
+            sigcontext->uc_mcontext.gregs[I386_GS] = R_GS;
+        } else {
+            printf_log(LOG_NONE, "Warning, longjmp in signal but no sigcontext to change\n");
+        }
+    }
+    if(exit)
+        *exit = emu->exit;
+
+    uint32_t ret = R_EAX;
+
+#ifdef HAVE_TRACE
+    trace_start = old_start; trace_end = old_end;
+#endif
+
+    return ret;
+}
+
+#define is_memprot_locked (1<<1)
+#define is_dyndump_locked (1<<8)
+void my_sigactionhandler_oldcode_32(int32_t sig, int simple, siginfo_t* info, void * ucntx, int* old_code, void* cur_db)
+{
+    int Locks = unlockMutex();
+
+    printf_log(LOG_DEBUG, "Sigactionhanlder for signal #%d called (jump to %p/%s)\n", sig, (void*)my_context->signals[sig], GetNativeName((void*)my_context->signals[sig]));
+
+    uintptr_t restorer = my_context->restorer[sig];
+    // get that actual ESP first!
+    x64emu_t *emu = thread_get_emu();
+    uintptr_t frame = R_RSP;
+#if defined(DYNAREC)
+#if defined(ARM64)
+    dynablock_t* db = (dynablock_t*)cur_db;//FindDynablockFromNativeAddress(pc);
+    ucontext_t *p = (ucontext_t *)ucntx;
+    void* pc = NULL;
+    if(p) {
+        pc = (void*)p->uc_mcontext.pc;
+        if(db)
+            frame = (uintptr_t)p->uc_mcontext.regs[10+_SP];
+    }
+#elif defined(LA64)
+    dynablock_t* db = (dynablock_t*)cur_db;//FindDynablockFromNativeAddress(pc);
+    ucontext_t *p = (ucontext_t *)ucntx;
+    void* pc = NULL;
+    if(p) {
+        pc = (void*)p->uc_mcontext.__pc;
+        if(db)
+            frame = (uintptr_t)p->uc_mcontext.__gregs[12+_SP];
+    }
+#elif defined(RV64)
+    dynablock_t* db = (dynablock_t*)cur_db;//FindDynablockFromNativeAddress(pc);
+    ucontext_t *p = (ucontext_t *)ucntx;
+    void* pc = NULL;
+    if(p) {
+        pc = (void*)p->uc_mcontext.__gregs[0];
+        if(db)
+            frame = (uintptr_t)p->uc_mcontext.__gregs[16+_SP];
+    }
+#else
+#error Unsupported architecture
+#endif
+#else
+    (void)ucntx; (void)cur_db;
+#endif
+    // setup libc context stack frame, on caller stack
+    frame = frame&~15;
+
+    // stack tracking
+    x64_stack_t *new_ss = my_context->onstack[sig]?sigstack_getstack():NULL;
+    int used_stack = 0;
+    if(new_ss) {
+        if(new_ss->ss_flags == SS_ONSTACK) { // already using it!
+            frame = ((uintptr_t)emu->regs[_SP].q[0] - 128) & ~0x0f;
+        } else {
+            frame = (uintptr_t)(((uintptr_t)new_ss->ss_sp + new_ss->ss_size - 16) & ~0x0f);
+            used_stack = 1;
+            new_ss->ss_flags = SS_ONSTACK;
+        }
+    } else {
+        frame -= 0x200; // redzone
+    }
+
+    // TODO: do I need to really setup 2 stack frame? That doesn't seems right!
+    // setup stack frame
+    frame -= 512+64+16*16;
+    void* xstate = (void*)frame;
+    frame -= sizeof(siginfo_t);
+    siginfo_t* info2 = (siginfo_t*)frame;
+    memcpy(info2, info, sizeof(siginfo_t));
+    // try to fill some sigcontext....
+    frame -= sizeof(i386_ucontext_t);
+    i386_ucontext_t   *sigcontext = (i386_ucontext_t*)frame;
+    // get general register
+    sigcontext->uc_mcontext.gregs[I386_EAX] = R_EAX;
+    sigcontext->uc_mcontext.gregs[I386_ECX] = R_ECX;
+    sigcontext->uc_mcontext.gregs[I386_EDX] = R_EDX;
+    sigcontext->uc_mcontext.gregs[I386_EDI] = R_EDI;
+    sigcontext->uc_mcontext.gregs[I386_ESI] = R_ESI;
+    sigcontext->uc_mcontext.gregs[I386_EBP] = R_EBP;
+    sigcontext->uc_mcontext.gregs[I386_ESP] = R_ESP;
+    sigcontext->uc_mcontext.gregs[I386_EBX] = R_EBX;
+    sigcontext->uc_mcontext.gregs[I386_EIP] = R_EIP;//emu->old_ip;   // old_ip should be more accurate as the "current" IP, but it's not always up-to-date
+    // flags
+    sigcontext->uc_mcontext.gregs[I386_EFL] = emu->eflags.x64;
+    // get segments
+    sigcontext->uc_mcontext.gregs[I386_CS] = R_CS;
+    sigcontext->uc_mcontext.gregs[I386_DS] = R_DS;
+    sigcontext->uc_mcontext.gregs[I386_ES] = R_ES;
+    sigcontext->uc_mcontext.gregs[I386_SS] = R_SS;
+    sigcontext->uc_mcontext.gregs[I386_FS] = R_FS;
+    sigcontext->uc_mcontext.gregs[I386_GS] = R_GS;
+#if defined(DYNAREC)
+#if defined(ARM64)
+    if(db && p) {
+        sigcontext->uc_mcontext.gregs[I386_EAX] = p->uc_mcontext.regs[10];
+        sigcontext->uc_mcontext.gregs[I386_ECX] = p->uc_mcontext.regs[11];
+        sigcontext->uc_mcontext.gregs[I386_EDX] = p->uc_mcontext.regs[12];
+        sigcontext->uc_mcontext.gregs[I386_EBX] = p->uc_mcontext.regs[13];
+        sigcontext->uc_mcontext.gregs[I386_ESP] = p->uc_mcontext.regs[14];
+        sigcontext->uc_mcontext.gregs[I386_EBP] = p->uc_mcontext.regs[15];
+        sigcontext->uc_mcontext.gregs[I386_ESI] = p->uc_mcontext.regs[16];
+        sigcontext->uc_mcontext.gregs[I386_EDI] = p->uc_mcontext.regs[17];
+        sigcontext->uc_mcontext.gregs[I386_EIP] = getX64Address(db, (uintptr_t)pc);
+    }
+#elif defined(LA64)
+    if(db && p) {
+        sigcontext->uc_mcontext.gregs[I386_EAX] = p->uc_mcontext.__gregs[12];
+        sigcontext->uc_mcontext.gregs[I386_ECX] = p->uc_mcontext.__gregs[13];
+        sigcontext->uc_mcontext.gregs[I386_EDX] = p->uc_mcontext.__gregs[14];
+        sigcontext->uc_mcontext.gregs[I386_EBX] = p->uc_mcontext.__gregs[15];
+        sigcontext->uc_mcontext.gregs[I386_ESP] = p->uc_mcontext.__gregs[16];
+        sigcontext->uc_mcontext.gregs[I386_EBP] = p->uc_mcontext.__gregs[17];
+        sigcontext->uc_mcontext.gregs[I386_ESI] = p->uc_mcontext.__gregs[18];
+        sigcontext->uc_mcontext.gregs[I386_EDI] = p->uc_mcontext.__gregs[19];
+        sigcontext->uc_mcontext.gregs[I386_EIP] = getX64Address(db, (uintptr_t)pc);
+    }
+#elif defined(RV64)
+    if(db && p) {
+        sigcontext->uc_mcontext.gregs[I386_EAX] = p->uc_mcontext.__gregs[16];
+        sigcontext->uc_mcontext.gregs[I386_ECX] = p->uc_mcontext.__gregs[17];
+        sigcontext->uc_mcontext.gregs[I386_EDX] = p->uc_mcontext.__gregs[18];
+        sigcontext->uc_mcontext.gregs[I386_EBX] = p->uc_mcontext.__gregs[19];
+        sigcontext->uc_mcontext.gregs[I386_ESP] = p->uc_mcontext.__gregs[20];
+        sigcontext->uc_mcontext.gregs[I386_EBP] = p->uc_mcontext.__gregs[21];
+        sigcontext->uc_mcontext.gregs[I386_ESI] = p->uc_mcontext.__gregs[22];
+        sigcontext->uc_mcontext.gregs[I386_EDI] = p->uc_mcontext.__gregs[23];
+        sigcontext->uc_mcontext.gregs[I386_EIP] = getX64Address(db, (uintptr_t)pc);
+    }
+#else
+#error Unsupported architecture
+#endif
+#endif
+    // get FloatPoint status
+    sigcontext->uc_mcontext.fpregs = to_ptrv(xstate);//(struct x64_libc_fpstate*)&sigcontext->xstate;
+    fpu_xsave_mask(emu, xstate, 1, 0b111);
+    memcpy(&sigcontext->xstate, xstate, sizeof(sigcontext->xstate));
+    ((struct i386_fpstate*)xstate)->status_magic = 0x46505853;   // magic number to signal an XSTATE type of fpregs
+    // get signal mask
+
+    if(new_ss) {
+        sigcontext->uc_stack.ss_sp = to_ptrv(new_ss->ss_sp);
+        sigcontext->uc_stack.ss_size = new_ss->ss_size;
+        sigcontext->uc_stack.ss_flags = new_ss->ss_flags;
+    } else
+        sigcontext->uc_stack.ss_flags = SS_DISABLE;
+    // Try to guess some X64_TRAPNO
+    /*
+    TRAP_x86_DIVIDE     = 0,   // Division by zero exception
+    TRAP_x86_TRCTRAP    = 1,   // Single-step exception
+    TRAP_x86_NMI        = 2,   // NMI interrupt
+    TRAP_x86_BPTFLT     = 3,   // Breakpoint exception
+    TRAP_x86_OFLOW      = 4,   // Overflow exception
+    TRAP_x86_BOUND      = 5,   // Bound range exception
+    TRAP_x86_PRIVINFLT  = 6,   // Invalid opcode exception
+    TRAP_x86_DNA        = 7,   // Device not available exception
+    TRAP_x86_DOUBLEFLT  = 8,   // Double fault exception
+    TRAP_x86_FPOPFLT    = 9,   // Coprocessor segment overrun
+    TRAP_x86_TSSFLT     = 10,  // Invalid TSS exception
+    TRAP_x86_SEGNPFLT   = 11,  // Segment not present exception
+    TRAP_x86_STKFLT     = 12,  // Stack fault
+    TRAP_x86_PROTFLT    = 13,  // General protection fault
+    TRAP_x86_PAGEFLT    = 14,  // Page fault
+    TRAP_x86_ARITHTRAP  = 16,  // Floating point exception
+    TRAP_x86_ALIGNFLT   = 17,  // Alignment check exception
+    TRAP_x86_MCHK       = 18,  // Machine check exception
+    TRAP_x86_CACHEFLT   = 19   // SIMD exception (via SIGFPE) if CPU is SSE capable otherwise Cache flush exception (via SIGSEV)
+    */
+    uint32_t prot = getProtection((uintptr_t)info->si_addr);
+    if(sig==SIGBUS)
+        sigcontext->uc_mcontext.gregs[I386_TRAPNO] = 17;
+    else if(sig==SIGSEGV) {
+        if((uintptr_t)info->si_addr == sigcontext->uc_mcontext.gregs[I386_EIP]) {
+            sigcontext->uc_mcontext.gregs[I386_ERR] = (info->si_errno==0x1234)?0:((info->si_errno==0xdead)?(0x2|(info->si_code<<3)):0x0010);    // execution flag issue (probably), unless it's a #GP(0)
+            sigcontext->uc_mcontext.gregs[I386_TRAPNO] = ((info->si_code==SEGV_ACCERR) || (info->si_errno==0x1234) || (info->si_errno==0xdead) || ((uintptr_t)info->si_addr==0))?13:14;
+        } else if(info->si_code==SEGV_ACCERR && !(prot&PROT_WRITE)) {
+            sigcontext->uc_mcontext.gregs[I386_ERR] = 0x0002;    // write flag issue
+            sigcontext->uc_mcontext.gregs[I386_TRAPNO] = 14;
+        } else {
+            if((info->si_code!=SEGV_ACCERR) && labs((intptr_t)info->si_addr-(intptr_t)sigcontext->uc_mcontext.gregs[I386_ESP])<16)
+                sigcontext->uc_mcontext.gregs[I386_TRAPNO] = 12; // stack overflow probably
+            else
+                sigcontext->uc_mcontext.gregs[I386_TRAPNO] = (info->si_code == SEGV_ACCERR)?13:14;
+            //I386_ERR seems to be INT:8 CODE:8. So for write access segfault it's 0x0002 For a read it's 0x0004 (and 8 for exec). For an int 2d it could be 0x2D01 for example
+            sigcontext->uc_mcontext.gregs[I386_ERR] = 0x0004;    // read error? there is no execute control in box64 anyway
+        }
+        if(info->si_code == SEGV_ACCERR && old_code)
+            *old_code = -1;
+        if(info->si_errno==0x1234) {
+            info2->si_errno = 0;
+        } else if(info->si_errno==0xdead) {
+            // INT x
+            uint8_t int_n = info2->si_code;
+            info2->si_errno = 0;
+            info2->si_code = info->si_code;
+            info2->si_addr = NULL;
+            // some special cases...
+            if(int_n==3) {
+                info2->si_signo = SIGTRAP;
+                sigcontext->uc_mcontext.gregs[I386_TRAPNO] = 3;
+                sigcontext->uc_mcontext.gregs[I386_ERR] = 0;
+            } else if(int_n==0x04) {
+                sigcontext->uc_mcontext.gregs[I386_TRAPNO] = 4;
+                sigcontext->uc_mcontext.gregs[I386_ERR] = 0;
+            } else if (int_n==0x29 || int_n==0x2c || int_n==0x2d) {
+                sigcontext->uc_mcontext.gregs[I386_ERR] = 0x02|(int_n<<3);
+            } else {
+                sigcontext->uc_mcontext.gregs[I386_ERR] = 0x0a|(int_n<<3);
+            }
+        } else if(info->si_errno==0xcafe) {
+            info2->si_errno = 0;
+            sigcontext->uc_mcontext.gregs[I386_TRAPNO] = 0;
+            info2->si_signo = SIGFPE;
+        }
+    } else if(sig==SIGFPE) {
+        if (info->si_code == FPE_INTOVF)
+            sigcontext->uc_mcontext.gregs[I386_TRAPNO] = 4;
+        else
+            sigcontext->uc_mcontext.gregs[I386_TRAPNO] = 19;
+    } else if(sig==SIGILL)
+        sigcontext->uc_mcontext.gregs[I386_TRAPNO] = 6;
+    else if(sig==SIGTRAP) {
+        info2->si_code = 128;
+        sigcontext->uc_mcontext.gregs[I386_TRAPNO] = info->si_code;
+        sigcontext->uc_mcontext.gregs[I386_ERR] = 0;
+    }
+    //TODO: SIGABRT generate what?
+    printf_log(LOG_DEBUG, "Signal %d: si_addr=%p, TRAPNO=%d, ERR=%d, RIP=%p\n", sig, (void*)info2->si_addr, sigcontext->uc_mcontext.gregs[I386_TRAPNO], sigcontext->uc_mcontext.gregs[I386_ERR],from_ptrv(sigcontext->uc_mcontext.gregs[I386_EIP]));
+    // call the signal handler
+    i386_ucontext_t sigcontext_copy = *sigcontext;
+    // save old value from emu
+    #define GO(A) uint32_t old_##A = R_##A
+    GO(EAX);
+    GO(EDI);
+    GO(ESI);
+    GO(EDX);
+    GO(ECX);
+    GO(EBP);
+    #undef GO
+    // set stack pointer
+    R_ESP = frame;
+    // set frame pointer
+    R_EBP = sigcontext->uc_mcontext.gregs[I386_EBP];
+
+    int exits = 0;
+    int ret;
+    int dynarec = 0;
+    #ifdef DYNAREC
+    if(sig!=SIGSEGV && !(Locks&is_dyndump_locked) && !(Locks&is_memprot_locked))
+        dynarec = 1;
+    #endif
+    ret = RunFunctionHandler32(&exits, dynarec, sigcontext, my_context->signals[info2->si_signo], 3, info2->si_signo, info2, sigcontext);
+    // restore old value from emu
+    if(used_stack)  // release stack
+        new_ss->ss_flags = 0;
+    #define GO(A) R_##A = old_##A
+    GO(EAX);
+    GO(EDI);
+    GO(ESI);
+    GO(EDX);
+    GO(ECX);
+    GO(EBP);
+    #undef GO
+
+    if(memcmp(sigcontext, &sigcontext_copy, sizeof(i386_ucontext_t))) {
+        if(emu->jmpbuf) {
+            #define GO(R)   emu->regs[_##R].q[0]=sigcontext->uc_mcontext.gregs[I386_E##R]
+            GO(AX);
+            GO(CX);
+            GO(DX);
+            GO(DI);
+            GO(SI);
+            GO(BP);
+            GO(SP);
+            GO(BX);
+            #undef GO
+            emu->ip.q[0]=sigcontext->uc_mcontext.gregs[I386_EIP];
+            // flags
+            emu->eflags.x64=sigcontext->uc_mcontext.gregs[I386_EFL];
+            // get segments
+            #define GO(S) if(emu->segs[_##S]!=sigcontext->uc_mcontext.gregs[I386_##S])  emu->segs[_##S]=sigcontext->uc_mcontext.gregs[I386_##S]
+            GO(CS);
+            GO(DS);
+            GO(ES);
+            GO(SS);
+            GO(GS);
+            GO(FS);
+            #undef GO
+            for(int i=0; i<6; ++i)
+                emu->segs_serial[i] = 0;
+            printf_log(LOG_DEBUG, "Context has been changed in Sigactionhanlder, doing siglongjmp to resume emu at %p, RSP=%p\n", (void*)R_RIP, (void*)R_RSP);
+            if(old_code)
+                *old_code = -1;    // re-init the value to allow another segfault at the same place
+            //relockMutex(Locks);   // do not relock mutex, because of the siglongjmp, whatever was running is canceled
+            #ifdef DYNAREC
+            if(Locks & is_dyndump_locked)
+                CancelBlock64(1);
+            #endif
+            #ifdef RV64
+            emu->xSPSave = emu->old_savedsp;
+            #endif
+            #ifdef ANDROID
+            siglongjmp(*emu->jmpbuf, 1);
+            #else
+            siglongjmp(emu->jmpbuf, 1);
+            #endif
+        }
+        printf_log(LOG_INFO, "Warning, context has been changed in Sigactionhanlder%s\n", (sigcontext->uc_mcontext.gregs[I386_EIP]!=sigcontext_copy.uc_mcontext.gregs[I386_EIP])?" (EIP changed)":"");
+    }
+    // restore regs...
+    #define GO(R)   R_##R=sigcontext->uc_mcontext.gregs[I386_##R]
+    GO(EAX);
+    GO(ECX);
+    GO(EDX);
+    GO(EDI);
+    GO(ESI);
+    GO(EBP);
+    GO(ESP);
+    GO(EBX);
+    #undef GO
+    emu->eflags.x64=sigcontext->uc_mcontext.gregs[I386_EFL];
+    #define GO(R)   R_##R=sigcontext->uc_mcontext.gregs[I386_##R]
+    GO(CS);
+    GO(DS);
+    GO(ES);
+    GO(SS);
+    GO(GS);
+    GO(FS);
+    #undef GO
+
+    printf_log(LOG_DEBUG, "Sigactionhanlder main function returned (exit=%d, restorer=%p)\n", exits, (void*)restorer);
+    if(exits) {
+        //relockMutex(Locks);   // the thread will exit, so no relock there
+        #ifdef DYNAREC
+        if(Locks & is_dyndump_locked)
+            CancelBlock64(1);
+        #endif
+        exit(ret);
+    }
+    if(restorer)
+        RunFunctionHandler32(&exits, 0, NULL, restorer, 0);
+    relockMutex(Locks);
+}
+
+EXPORT int my32_getcontext(x64emu_t* emu, void* ucp)
+{
+//    printf_log(LOG_NONE, "Warning: call to partially implemented getcontext\n");
+    i386_ucontext_t *u = (i386_ucontext_t*)ucp;
+    // stack traking
+    u->uc_stack.ss_sp = 0;
+    u->uc_stack.ss_size = 0;    // this need to filled
+    // get general register
+    u->uc_mcontext.gregs[I386_EAX] = R_EAX;
+    u->uc_mcontext.gregs[I386_ECX] = R_ECX;
+    u->uc_mcontext.gregs[I386_EDX] = R_EDX;
+    u->uc_mcontext.gregs[I386_EDI] = R_EDI;
+    u->uc_mcontext.gregs[I386_ESI] = R_ESI;
+    u->uc_mcontext.gregs[I386_EBP] = R_EBP;
+    u->uc_mcontext.gregs[I386_EIP] = *(uint32_t*)from_ptrv(R_ESP);
+    u->uc_mcontext.gregs[I386_ESP] = R_ESP+4;
+    u->uc_mcontext.gregs[I386_EBX] = R_EBX;
+    // get segments
+    u->uc_mcontext.gregs[I386_GS] = R_GS;
+    u->uc_mcontext.gregs[I386_FS] = R_FS;
+    u->uc_mcontext.gregs[I386_ES] = R_ES;
+    u->uc_mcontext.gregs[I386_DS] = R_DS;
+    u->uc_mcontext.gregs[I386_CS] = R_CS;
+    u->uc_mcontext.gregs[I386_SS] = R_SS;
+    // get FloatPoint status
+    if(u->uc_mcontext.fpregs)
+        save_fpreg(emu, from_ptrv(u->uc_mcontext.fpregs));
+    // get signal mask
+    sigprocmask(SIG_SETMASK, NULL, (sigset_t*)&u->uc_sigmask);
+    // ensure uc_link is properly initialized
+    u->uc_link = to_ptrv(emu->uc_link);
+
+    return 0;
+}
+
+EXPORT int my32_setcontext(x64emu_t* emu, void* ucp)
+{
+//    printf_log(LOG_NONE, "Warning: call to partially implemented setcontext\n");
+    i386_ucontext_t *u = (i386_ucontext_t*)ucp;
+    // stack tracking
+    emu->init_stack = from_ptrv(u->uc_stack.ss_sp);
+    emu->size_stack = from_ulong(u->uc_stack.ss_size);
+    // set general register
+    R_EAX = u->uc_mcontext.gregs[I386_EAX];
+    R_ECX = u->uc_mcontext.gregs[I386_ECX];
+    R_EDX = u->uc_mcontext.gregs[I386_EDX];
+    R_EDI = u->uc_mcontext.gregs[I386_EDI];
+    R_ESI = u->uc_mcontext.gregs[I386_ESI];
+    R_EBP = u->uc_mcontext.gregs[I386_EBP];
+    R_EIP = u->uc_mcontext.gregs[I386_EIP];
+    R_ESP = u->uc_mcontext.gregs[I386_ESP];
+    R_EBX = u->uc_mcontext.gregs[I386_EBX];
+    // get segments
+    R_GS = u->uc_mcontext.gregs[I386_GS];
+    R_FS = u->uc_mcontext.gregs[I386_FS];
+    R_ES = u->uc_mcontext.gregs[I386_ES];
+    R_DS = u->uc_mcontext.gregs[I386_DS];
+    R_CS = u->uc_mcontext.gregs[I386_CS];
+    R_SS = u->uc_mcontext.gregs[I386_SS];
+    // set FloatPoint status
+    if(u->uc_mcontext.fpregs)
+        load_fpreg(emu, from_ptrv(u->uc_mcontext.fpregs));
+    // set signal mask
+    sigprocmask(SIG_SETMASK, (sigset_t*)&u->uc_sigmask, NULL);
+    // set uc_link
+    emu->uc_link = from_ptrv(u->uc_link);
+    errno = 0;
+    return R_EAX;
+}
+
+EXPORT int my32_makecontext(x64emu_t* emu, void* ucp, void* fnc, int32_t argc, int32_t* argv)
+{
+//    printf_log(LOG_NONE, "Warning: call to unimplemented makecontext\n");
+    i386_ucontext_t *u = (i386_ucontext_t*)ucp;
+    // setup stack
+    u->uc_mcontext.gregs[I386_ESP] = to_ptr(u->uc_stack.ss_sp + u->uc_stack.ss_size - 4);
+    // setup the function
+    u->uc_mcontext.gregs[I386_EIP] = to_ptrv(fnc);
+    // setup args
+    uint32_t* esp = (uint32_t*)from_ptr(u->uc_mcontext.gregs[I386_ESP]);
+    for (int i=0; i<argc; ++i) {
+        // push value
+        --esp;
+        *esp = argv[(argc-1)-i];
+    }
+    // push the return value
+    --esp;
+    *esp = to_ptr(my_context->exit_bridge);
+    u->uc_mcontext.gregs[I386_ESP] = (uintptr_t)esp;
+    
+    return 0;
+}
+
+EXPORT int my32_swapcontext(x64emu_t* emu, void* ucp1, void* ucp2)
+{
+//    printf_log(LOG_NONE, "Warning: call to unimplemented swapcontext\n");
+    // grab current context in ucp1
+    my32_getcontext(emu, ucp1);
+    // activate ucp2
+    my32_setcontext(emu, ucp2);
+    return 0;
+}
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index e58081c1..a3030e40 100644
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -269,6 +269,10 @@ static void sigstack_key_alloc() {
     pthread_key_create(&sigstack_key, sigstack_destroy);
 }
 
+x64_stack_t* sigstack_getstack() {
+    return (x64_stack_t*)pthread_getspecific(sigstack_key);
+}
+
 // this allow handling "safe" function that just abort if accessing a bad address
 static __thread JUMPBUFF signal_jmpbuf;
 #ifdef ANDROID
@@ -923,8 +927,17 @@ int sigbus_specialcases(siginfo_t* info, void * ucntx, void* pc, void* _fpsimd)
     return 0;
 }
 
+#ifdef BOX32
+void my_sigactionhandler_oldcode_32(int32_t sig, int simple, siginfo_t* info, void * ucntx, int* old_code, void* cur_db);
+#endif
 void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void * ucntx, int* old_code, void* cur_db)
 {
+    #ifdef BOX32
+    if(box64_is32bits) {
+        my_sigactionhandler_oldcode_32(sig, simple, info, ucntx, old_code, cur_db);
+        return;
+    }
+    #endif
     int Locks = unlockMutex();
 
     printf_log(LOG_DEBUG, "Sigactionhanlder for signal #%d called (jump to %p/%s)\n", sig, (void*)my_context->signals[sig], GetNativeName((void*)my_context->signals[sig]));
@@ -2184,7 +2197,7 @@ EXPORT int my_getcontext(x64emu_t* emu, void* ucp)
     // get signal mask
     sigprocmask(SIG_SETMASK, NULL, (sigset_t*)&u->uc_sigmask);
     // ensure uc_link is properly initialized
-    u->uc_link = emu->uc_link;
+    u->uc_link = (x64_ucontext_t*)emu->uc_link;
 
     return 0;
 }
diff --git a/src/libtools/threads.c b/src/libtools/threads.c
index 074068c0..0973d658 100644
--- a/src/libtools/threads.c
+++ b/src/libtools/threads.c
@@ -29,6 +29,9 @@
 #include "dynablock.h"
 #include "dynarec/native_lock.h"
 #endif
+#ifdef BOX32
+#include "box32.h"
+#endif
 
 //void _pthread_cleanup_push_defer(void* buffer, void* routine, void* arg);	// declare hidden functions
 //void _pthread_cleanup_pop_restore(void* buffer, int exec);
@@ -124,14 +127,6 @@ int GetStackSize(x64emu_t* emu, uintptr_t attr, void** stack, size_t* stacksize)
 
 void my_longjmp(x64emu_t* emu, /*struct __jmp_buf_tag __env[1]*/void *p, int32_t __val);
 
-typedef struct emuthread_s {
-	uintptr_t 	fnc;
-	void*		arg;
-	x64emu_t*	emu;
-	int			cancel_cap, cancel_size;
-	x64_unwind_buff_t **cancels;
-} emuthread_t;
-
 static pthread_key_t thread_key;
 
 static void emuthread_destroy(void* p)
@@ -144,6 +139,10 @@ static void emuthread_destroy(void* p)
 	if (my_context && (ptr = pthread_getspecific(my_context->tlskey)) != NULL)
         free_tlsdatasize(ptr);*/
 	// free x64emu
+	#ifdef BOX32
+	if(box64_is32bits && !et->join)
+		to_hash_d(et->self);
+	#endif
 	if(et) {
 		FreeX64Emu(&et->emu);
 		box_free(et);
@@ -158,9 +157,13 @@ static void emuthread_cancel(void* p)
 	// check cancels threads
 	for(int i=et->cancel_size-1; i>=0; --i) {
 		et->emu->flags.quitonlongjmp = 0;
-		my_longjmp(et->emu, et->cancels[i]->__cancel_jmp_buf, 1);
+		my_longjmp(et->emu, ((x64_unwind_buff_t*)et->cancels[i])->__cancel_jmp_buf, 1);
 		DynaRun(et->emu);	// will return after a __pthread_unwind_next()
 	}
+	#ifdef BOX32
+	if(box64_is32bits)
+		to_hash_d(et->self);
+	#endif
 	box_free(et->cancels);
 	et->cancels=NULL;
 	et->cancel_size = et->cancel_cap = 0;
@@ -182,6 +185,12 @@ void thread_set_emu(x64emu_t* emu)
 	}
 	et->emu = emu;
 	et->emu->type = EMUTYPE_MAIN;
+	#ifdef BOX32
+	if(box64_is32bits) {
+		et->self = (uintptr_t)pthread_self();
+		et->hself = to_hash(et->self);
+	}
+	#endif
 	pthread_setspecific(thread_key, et);
 }
 
@@ -1097,6 +1106,10 @@ EXPORT int my_pthread_barrier_init(x64emu_t* emu, pthread_barrier_t* bar, my_bar
 
 void init_pthread_helper()
 {
+	#ifdef BOX32
+	if(box64_is32bits)
+		init_pthread_helper_32();
+	#endif
 	real_pthread_cleanup_push_defer = (vFppp_t)dlsym(NULL, "_pthread_cleanup_push_defer");
 	real_pthread_cleanup_pop_restore = (vFpi_t)dlsym(NULL, "_pthread_cleanup_pop_restore");
 	real_pthread_cond_clockwait = (iFppip_t)dlsym(NULL, "pthread_cond_clockwait");
@@ -1129,6 +1142,10 @@ void clean_current_emuthread()
 
 void fini_pthread_helper(box64context_t* context)
 {
+	#ifdef BOX32
+	if(box64_is32bits)
+		fini_pthread_helper_32(context);
+	#endif
 	CleanStackSize(context);
 	clean_current_emuthread();
 }
diff --git a/src/libtools/threads32.c b/src/libtools/threads32.c
new file mode 100755
index 00000000..1fc3a2df
--- /dev/null
+++ b/src/libtools/threads32.c
@@ -0,0 +1,854 @@
+// __USE_UNIX98 is needed for sttype / gettype definition
+#define __USE_UNIX98
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <signal.h>
+#include <errno.h>
+#include <setjmp.h>
+#include <sys/mman.h>
+#include <dlfcn.h>
+
+#include "debug.h"
+#include "box32context.h"
+#include "threads.h"
+#include "emu/x64emu_private.h"
+#include "tools/bridge_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "custommem.h"
+#include "khash.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec.h"
+#include "bridge.h"
+#ifdef DYNAREC
+#include "dynablock.h"
+#endif
+
+typedef void (*vFppp_t)(void*, void*, void*);
+typedef void (*vFpi_t)(void*, int);
+//starting with glibc 2.34+, those 2 functions are in libc.so as versionned symbol only
+// So use dlsym to get the symbol unversionned, as simple link will not work.
+static vFppp_t real_pthread_cleanup_push_defer = NULL;
+static vFpi_t real_pthread_cleanup_pop_restore = NULL;
+// those function can be used simply
+void _pthread_cleanup_push(void* buffer, void* routine, void* arg);	// declare hidden functions
+void _pthread_cleanup_pop(void* buffer, int exec);
+
+typedef struct threadstack_s {
+	void* 	stack;
+	size_t 	stacksize;
+} threadstack_t;
+
+// longjmp / setjmp
+typedef struct jump_buff_i386_s {
+ uint32_t save_ebx;
+ uint32_t save_esi;
+ uint32_t save_edi;
+ uint32_t save_ebp;
+ uint32_t save_esp;
+ uint32_t save_eip;
+} jump_buff_i386_t;
+
+// sigset_t should have the same size on 32bits and 64bits machine (64bits)
+typedef struct __jmp_buf_tag_s {
+    jump_buff_i386_t __jmpbuf;
+    int              __mask_was_saved;
+    sigset_t         __saved_mask;
+} __jmp_buf_tag_t;
+
+typedef struct x64_unwind_buff_s {
+	struct {
+		jump_buff_i386_t	__cancel_jmp_buf;	
+		int					__mask_was_saved;
+	} __cancel_jmp_buf[1];
+	ptr_t __pad[2];
+	void* __pad3;
+} x64_unwind_buff_t __attribute__((__aligned__));
+
+static pthread_attr_t* get_attr(void* attr);
+static void del_attr(void* attr);
+
+typedef void(*vFv_t)();
+
+KHASH_MAP_INIT_INT(threadstack, threadstack_t)
+#ifndef ANDROID
+KHASH_MAP_INIT_INT(cancelthread, __pthread_unwind_buf_t*)
+#endif
+
+void CleanStackSize(box64context_t* context);
+void FreeStackSize(kh_threadstack_t* map, uintptr_t attr);
+void AddStackSize(kh_threadstack_t* map, uintptr_t attr, void* stack, size_t stacksize);
+int GetStackSize(x64emu_t* emu, uintptr_t attr, void** stack, size_t* stacksize);
+
+static pthread_key_t thread_key;
+
+void my32_longjmp(x64emu_t* emu, /*struct __jmp_buf_tag __env[1]*/void *p, int32_t __val);
+
+static void emuthread_destroy(void* p)
+{
+	emuthread_t *et = (emuthread_t*)p;
+	if(!et)
+		return;
+	// destroy the hash key if thread is not joinable
+	if(!et->join)
+		to_hash_d(et->self);
+	// destroy thread emu and all
+	if(et) {
+		FreeX64Emu(&et->emu);
+		free(et);
+	}
+}
+
+static void emuthread_cancel(void* p)
+{
+	emuthread_t *et = (emuthread_t*)p;
+	if(!et)
+		return;
+	// check cancels threads
+	for(int i=et->cancel_size-1; i>=0; --i) {
+		et->emu->flags.quitonlongjmp = 0;
+		my32_longjmp(et->emu, ((x64_unwind_buff_t*)et->cancels[i])->__cancel_jmp_buf, 1);
+		DynaRun(et->emu);	// will return after a __pthread_unwind_next()
+	}
+	free(et->cancels);
+	to_hash_d(et->self);
+	et->cancels=NULL;
+	et->cancel_size = et->cancel_cap = 0;
+}
+
+static void* pthread_routine(void* p)
+{
+	// free current emuthread if it exist
+	{
+		void* t = pthread_getspecific(thread_key);
+		if(t) {
+			// not sure how this could happens
+			printf_log(LOG_INFO, "Clean of an existing ET for Thread %04d\n", GetTID());
+			emuthread_destroy(t);
+		}
+	}
+	pthread_setspecific(thread_key, p);
+	// call the function
+	emuthread_t *et = (emuthread_t*)p;
+	et->emu->type = EMUTYPE_MAIN;
+	et->self = (uintptr_t)pthread_self();
+	et->hself = to_hash(et->self);
+	// setup callstack and run...
+	x64emu_t* emu = et->emu;
+	Push_32(emu, 0);	// PUSH 0 (backtrace marker: return address is 0)
+	Push_32(emu, 0);	// PUSH BP
+	R_EBP = R_ESP;	// MOV BP, SP
+    R_ESP -= 32;	// guard area
+	R_ESP &=~15;
+	Push_32(emu, to_ptrv(et->arg));
+	PushExit_32(emu);
+	R_EIP = to_ptr(et->fnc);
+	pthread_cleanup_push(emuthread_cancel, p);
+	DynaRun(et->emu);
+	pthread_cleanup_pop(0);
+	void* ret = from_ptrv(R_EAX);
+	return ret;
+}
+
+EXPORT int my32_pthread_attr_destroy(x64emu_t* emu, void* attr)
+{
+	if(emu->context->stacksizes)
+		FreeStackSize(emu->context->stacksizes, (uintptr_t)attr);
+	int ret = pthread_attr_destroy(get_attr(attr));
+	del_attr(attr);
+	return ret;
+}
+
+EXPORT int my32_pthread_attr_getstack(x64emu_t* emu, void* attr, void** stackaddr, size_t* stacksize)
+{
+	int ret = pthread_attr_getstack(get_attr(attr), stackaddr, stacksize);
+	if (ret==0)
+		GetStackSize(emu, (uintptr_t)attr, stackaddr, stacksize);
+	return ret;
+}
+
+EXPORT int my32_pthread_attr_setstack(x64emu_t* emu, void* attr, void* stackaddr, size_t stacksize)
+{
+	if(!emu->context->stacksizes) {
+		emu->context->stacksizes = kh_init(threadstack);
+	}
+	AddStackSize(emu->context->stacksizes, (uintptr_t)attr, stackaddr, stacksize);
+	//Don't call actual setstack...
+	//return pthread_attr_setstack(attr, stackaddr, stacksize);
+	return pthread_attr_setstacksize(get_attr(attr), stacksize);
+}
+
+EXPORT int my32_pthread_create(x64emu_t *emu, void* t, void* attr, void* start_routine, void* arg)
+{
+	int stacksize = 2*1024*1024;	//default stack size is 2Mo
+	void* attr_stack;
+	size_t attr_stacksize;
+	int own;
+	void* stack;
+
+	if(attr) {
+		size_t stsize;
+		if(pthread_attr_getstacksize(get_attr(attr), &stsize)==0)
+			stacksize = stsize;
+		if(stacksize<512*1024)	// emu and all needs some stack space, don't go too low
+			pthread_attr_setstacksize(get_attr(attr), 512*1024);
+	}
+	if(GetStackSize(emu, (uintptr_t)attr, &attr_stack, &attr_stacksize))
+	{
+		stack = attr_stack;
+		stacksize = attr_stacksize;
+		own = 0;
+	} else {
+		//stack = malloc(stacksize);
+		stack = mmap64(NULL, stacksize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN, -1, 0);
+		own = 1;
+	}
+
+	emuthread_t *et = (emuthread_t*)calloc(1, sizeof(emuthread_t));
+    x64emu_t *emuthread = NewX64Emu(my_context, (uintptr_t)start_routine, (uintptr_t)stack, stacksize, own);
+	SetupX64Emu(emuthread, emu);
+	et->emu = emuthread;
+	et->fnc = (uintptr_t)start_routine;
+	et->arg = arg;
+	if(!attr)
+		et->join = 1;
+	else {
+		int j;
+		pthread_attr_getdetachstate(get_attr(attr), &j);
+		if(j==PTHREAD_CREATE_JOINABLE)
+			et->join = 1;
+		else
+			et->join = 0;
+	}
+	#ifdef DYNAREC
+	if(box64_dynarec) {
+		// pre-creation of the JIT code for the entry point of the thread
+		dynablock_t *current = NULL;
+		DBGetBlock(emu, (uintptr_t)start_routine, 1, 1);
+	}
+	#endif
+	// create thread
+	return pthread_create((pthread_t*)t, get_attr(attr), 
+		pthread_routine, et);
+}
+
+EXPORT int my32_pthread_detach(x64emu_t* emu, pthread_t p)
+{
+	if(pthread_equal(p ,pthread_self())) {
+		emuthread_t *et = (emuthread_t*)pthread_getspecific(thread_key);
+		et->join = 0;
+	}
+	return pthread_detach(p);
+}
+
+void* my32_prepare_thread(x64emu_t *emu, void* f, void* arg, int ssize, void** pet)
+{
+	int stacksize = (ssize)?ssize:(2*1024*1024);	//default stack size is 2Mo
+	//void* stack = malloc(stacksize);
+	void* stack = mmap64(NULL, stacksize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN, -1, 0);
+	emuthread_t *et = (emuthread_t*)calloc(1, sizeof(emuthread_t));
+    x64emu_t *emuthread = NewX64Emu(emu->context, (uintptr_t)f, (uintptr_t)stack, stacksize, 1);
+	SetupX64Emu(emuthread, emu);
+	et->emu = emuthread;
+	et->fnc = (uintptr_t)f;
+	et->arg = arg;
+	#ifdef DYNAREC
+	if(box64_dynarec) {
+		// pre-creation of the JIT code for the entry point of the thread
+		dynablock_t *current = NULL;
+		DBGetBlock(emu, (uintptr_t)f, 1, 1);
+	}
+	#endif
+	*pet =  et;
+	return pthread_routine;
+}
+
+void my32_longjmp(x64emu_t* emu, /*struct __jmp_buf_tag __env[1]*/void *p, int32_t __val);
+
+EXPORT void my32___pthread_register_cancel(x64emu_t* emu, x64_unwind_buff_t* buff)
+{
+	buff = (x64_unwind_buff_t*)from_ptr(R_EAX);	// param is in fact on register
+	emuthread_t *et = (emuthread_t*)pthread_getspecific(thread_key);
+	if(et->cancel_cap == et->cancel_size) {
+		et->cancel_cap+=8;
+		et->cancels = realloc(et->cancels, sizeof(x64_unwind_buff_t*)*et->cancel_cap);
+	}
+	et->cancels[et->cancel_size++] = buff;
+}
+
+EXPORT void my32___pthread_unregister_cancel(x64emu_t* emu, x64_unwind_buff_t* buff)
+{
+	emuthread_t *et = (emuthread_t*)pthread_getspecific(thread_key);
+	for (int i=et->cancel_size-1; i>=0; --i) {
+		if(et->cancels[i] == buff) {
+			if(i!=et->cancel_size-1)
+				memmove(et->cancels+i, et->cancels+i+1, sizeof(x64_unwind_buff_t*)*(et->cancel_size-i-1));
+			et->cancel_size--;
+		}
+	}
+}
+
+EXPORT void my32___pthread_unwind_next(x64emu_t* emu, void* p)
+{
+	emu->quit = 1;
+}
+
+KHASH_MAP_INIT_INT(once, int)
+
+#define SUPER() \
+GO(0)			\
+GO(1)			\
+GO(2)			\
+GO(3)			\
+GO(4)			\
+GO(5)			\
+GO(6)			\
+GO(7)			\
+GO(8)			\
+GO(9)			\
+GO(10)			\
+GO(11)			\
+GO(12)			\
+GO(13)			\
+GO(14)			\
+GO(15)			\
+GO(16)			\
+GO(17)			\
+GO(18)			\
+GO(19)			\
+GO(20)			\
+GO(21)			\
+GO(22)			\
+GO(23)			\
+GO(24)			\
+GO(25)			\
+GO(26)			\
+GO(27)			\
+GO(28)			\
+GO(29)			
+
+// cleanup_routine
+#define GO(A)   \
+static uintptr_t my32_cleanup_routine_fct_##A = 0;  						\
+static void my32_cleanup_routine_##A(void* a)    							\
+{                                       								\
+    RunFunctionFmt(my32_cleanup_routine_fct_##A, "p", to_ptrv(a));	\
+}
+SUPER()
+#undef GO
+static void* findcleanup_routineFct(void* fct)
+{
+    if(!fct) return fct;
+    if(GetNativeFnc((uintptr_t)fct))  return GetNativeFnc((uintptr_t)fct);
+    #define GO(A) if(my32_cleanup_routine_fct_##A == (uintptr_t)fct) return my32_cleanup_routine_##A;
+    SUPER()
+    #undef GO
+    #define GO(A) if(my32_cleanup_routine_fct_##A == 0) {my32_cleanup_routine_fct_##A = (uintptr_t)fct; return my32_cleanup_routine_##A; }
+    SUPER()
+    #undef GO
+    printf_log(LOG_NONE, "Warning, no more slot for pthread cleanup_routine callback\n");
+    return NULL;
+}
+
+// key_destructor
+#define GO(A)   \
+static uintptr_t my32_key_destructor_fct_##A = 0;  						\
+static void my32_key_destructor_##A(void* a)    							\
+{                                       								\
+    RunFunctionFmt(my32_key_destructor_fct_##A, "p", to_ptrv(a));	\
+}
+SUPER()
+#undef GO
+static void* findkey_destructorFct(void* fct)
+{
+    if(!fct) return fct;
+    if(GetNativeFnc((uintptr_t)fct))  return GetNativeFnc((uintptr_t)fct);
+    #define GO(A) if(my32_key_destructor_fct_##A == (uintptr_t)fct) return my32_key_destructor_##A;
+    SUPER()
+    #undef GO
+    #define GO(A) if(my32_key_destructor_fct_##A == 0) {my32_key_destructor_fct_##A = (uintptr_t)fct; return my32_key_destructor_##A; }
+    SUPER()
+    #undef GO
+    printf_log(LOG_NONE, "Warning, no more slot for pthread key_destructor callback\n");
+    return NULL;
+}
+
+#undef SUPER
+
+
+int EXPORT my32_pthread_once(x64emu_t* emu, int* once, void* cb)
+{
+	if(*once)	// quick test first
+		return 0;
+	// slow test now
+	#ifdef DYNAREC
+	int old = native_lock_xchg_d(once, 1);
+	#else
+	int old = *once;	// outside of the mutex in case once is badly formed
+	pthread_mutex_lock(&my_context->mutex_lock);
+	old = *once;
+	*once = 1;
+	pthread_mutex_unlock(&my_context->mutex_lock);
+	#endif
+	if(old)
+		return 0;
+    // make some room and align R_RSP before doing the call (maybe it would be simpler to just use Callback functions)
+    Push_32(emu, R_EBP); // push rbp
+    R_EBP = R_ESP;      // mov rbp, rsp
+    R_ESP -= 0x200;
+    R_ESP &= ~63LL;
+	DynaCall(emu, (uintptr_t)cb);
+	R_ESP = R_EBP;          // mov rsp, rbp
+	R_EBP = Pop32(emu);     // pop rbp
+	return 0;
+}
+EXPORT int my32___pthread_once(x64emu_t* emu, void* once, void* cb) __attribute__((alias("my32_pthread_once")));
+
+EXPORT int my32_pthread_key_create(x64emu_t* emu, void* key, void* dtor)
+{
+	return pthread_key_create(key, findkey_destructorFct(dtor));
+}
+EXPORT int my32___pthread_key_create(x64emu_t* emu, void* key, void* dtor) __attribute__((alias("my32_pthread_key_create")));
+
+// phtread_cond_init with null attr seems to only write 1 (NULL) dword on x64, while it's 48 bytes on ARM. 
+// Not sure why as sizeof(pthread_cond_init) is 48 on both platform... But Neverwinter Night init seems to rely on that
+// What about cond that are statically initialized? 
+// Note, this is is a versionned function (the pthread_cond_*), and this seems to correspond to an old behaviour
+
+KHASH_MAP_INIT_INT(mapcond, pthread_cond_t*);
+
+// should all access to that map be behind a mutex?
+kh_mapcond_t *mapcond = NULL;
+
+static pthread_cond_t* add_cond(void* cond)
+{
+	mutex_lock(&my_context->mutex_thread);
+	khint_t k;
+	int ret;
+	pthread_cond_t *c;
+	k = kh_put(mapcond, mapcond, (uintptr_t)cond, &ret);
+	if(!ret)
+		c = kh_value(mapcond, k);	// already there... reinit an existing one?
+	else 
+		c = kh_value(mapcond, k) = (pthread_cond_t*)calloc(1, sizeof(pthread_cond_t));
+	//*(ptr_t*)cond = to_ptrv(cond);
+	mutex_unlock(&my_context->mutex_thread);
+	return c;
+}
+static pthread_cond_t* get_cond(void* cond)
+{
+	pthread_cond_t* ret;
+	int r;
+	mutex_lock(&my_context->mutex_thread);
+	khint_t k = kh_get(mapcond, mapcond, *(uintptr_t*)cond);
+	if(k==kh_end(mapcond)) {
+		khint_t k = kh_get(mapcond, mapcond, (uintptr_t)cond);
+		if(k==kh_end(mapcond)) {
+			printf_log(LOG_DEBUG, "BOX32: Note: phtread_cond not found, create a new empty one\n");
+			ret = (pthread_cond_t*)calloc(1, sizeof(pthread_cond_t));
+			k = kh_put(mapcond, mapcond, (uintptr_t)cond, &r);
+			kh_value(mapcond, k) = ret;
+			//*(ptr_t*)cond = to_ptrv(cond);
+			pthread_cond_init(ret, NULL);
+		} else
+			ret = kh_value(mapcond, k);
+	} else
+		ret = kh_value(mapcond, k);
+	mutex_unlock(&my_context->mutex_thread);
+	return ret;
+}
+static void del_cond(void* cond)
+{
+	if(!mapcond)
+		return;
+	mutex_lock(&my_context->mutex_thread);
+	khint_t k = kh_get(mapcond, mapcond, *(uintptr_t*)cond);
+	if(k!=kh_end(mapcond)) {
+		free(kh_value(mapcond, k));
+		kh_del(mapcond, mapcond, k);
+	}
+	mutex_unlock(&my_context->mutex_thread);
+}
+pthread_mutex_t* getAlignedMutex(pthread_mutex_t* m);
+
+EXPORT int my32_pthread_cond_broadcast_old(x64emu_t* emu, void* cond)
+{
+	pthread_cond_t * c = get_cond(cond);
+	return pthread_cond_broadcast(c);
+}
+EXPORT int my32_pthread_cond_destroy_old(x64emu_t* emu, void* cond)
+{
+	pthread_cond_t * c = get_cond(cond);
+	int ret = pthread_cond_destroy(c);
+	if(c!=cond) del_cond(cond);
+	return ret;
+}
+EXPORT int my32_pthread_cond_init_old(x64emu_t* emu, void* cond, void* attr)
+{
+	pthread_cond_t *c = add_cond(cond);
+	return pthread_cond_init(c, (const pthread_condattr_t*)attr);
+}
+EXPORT int my32_pthread_cond_signal_old(x64emu_t* emu, void* cond)
+{
+	pthread_cond_t * c = get_cond(cond);
+	return pthread_cond_signal(c);
+}
+EXPORT int my32_pthread_cond_timedwait_old(x64emu_t* emu, void* cond, void* mutex, void* abstime)
+{
+	pthread_cond_t * c = get_cond(cond);
+	return pthread_cond_timedwait(c, getAlignedMutex((pthread_mutex_t*)mutex), (const struct timespec*)abstime);
+}
+EXPORT int my32_pthread_cond_wait_old(x64emu_t* emu, void* cond, void* mutex)
+{
+	pthread_cond_t * c = get_cond(cond);
+	return pthread_cond_wait(c, getAlignedMutex((pthread_mutex_t*)mutex));
+}
+
+EXPORT int my32_pthread_cond_timedwait(x64emu_t* emu, void* cond, void* mutex, void* abstime)
+{
+	return pthread_cond_timedwait((pthread_cond_t*)cond, getAlignedMutex((pthread_mutex_t*)mutex), (const struct timespec*)abstime);
+}
+EXPORT int my32_pthread_cond_wait(x64emu_t* emu, void* cond, void* mutex)
+{
+	return pthread_cond_wait((pthread_cond_t*)cond, getAlignedMutex((pthread_mutex_t*)mutex));
+}
+
+EXPORT int my32_pthread_mutexattr_setkind_np(x64emu_t* emu, void* t, int kind)
+{
+    // does "kind" needs some type of translation?
+    return pthread_mutexattr_settype(t, kind);
+}
+
+// pthread_attr_t on x64 is 36 bytes
+static uint64_t ATTR_SIGN = 0xA055E10CDE98LL;	// random signature
+typedef struct my32_x64_attr_s {
+	uint64_t		sign;
+	pthread_attr_t*	attr;
+} my32_x64_attr_t;
+
+static pthread_attr_t* get_attr(void* attr)
+{
+	if(!attr)
+		return NULL;
+	my32_x64_attr_t* my32_attr = (my32_x64_attr_t*)attr;
+	if(my32_attr->sign!=ATTR_SIGN) {
+		my32_attr->attr = (pthread_attr_t*)calloc(1, sizeof(pthread_attr_t));
+		my32_attr->sign = ATTR_SIGN;
+	}
+	return my32_attr->attr;
+}
+static void del_attr(void* attr)
+{
+	if(!attr)
+		return;
+	my32_x64_attr_t* my32_attr = (my32_x64_attr_t*)attr;
+	if(my32_attr->sign==ATTR_SIGN) {
+		my32_attr->sign = 0;
+		free(my32_attr->attr);
+	}
+}
+
+EXPORT int my32_pthread_attr_init(x64emu_t* emu, void* attr)
+{
+	return pthread_attr_init(get_attr(attr));
+}
+
+EXPORT int my32_pthread_attr_getdetachstate(x64emu_t* emu, void* attr, void* p)
+{
+	return pthread_attr_getdetachstate(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_getguardsize(x64emu_t* emu, void* attr, void* p)
+{
+	return pthread_attr_getguardsize(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_getinheritsched(x64emu_t* emu, void* attr, void* p)
+{
+	return pthread_attr_getinheritsched(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_getschedparam(x64emu_t* emu, void* attr, void* p)
+{
+	return pthread_attr_getschedparam(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_getschedpolicy(x64emu_t* emu, void* attr, void* p)
+{
+	return pthread_attr_getschedpolicy(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_getscope(x64emu_t* emu, void* attr, void* p)
+{
+	return pthread_attr_getscope(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_getstackaddr(x64emu_t* emu, void* attr, ptr_t* p)
+{
+	size_t size;
+	void* pp;
+	int ret = pthread_attr_getstack(get_attr(attr), &pp, &size);
+	*p = to_ptrv(pp);
+	return ret;
+}
+EXPORT int my32_pthread_attr_getstacksize(x64emu_t* emu, void* attr, ulong_t* p)
+{
+	size_t size;
+	void* pp;
+	int ret = pthread_attr_getstack(get_attr(attr), &pp, &size);
+	*p = to_ulong(size);
+	return ret;
+}
+EXPORT int my32_pthread_attr_setdetachstate(x64emu_t* emu, void* attr, int p)
+{
+	return pthread_attr_setdetachstate(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_setguardsize(x64emu_t* emu, void* attr, size_t p)
+{
+	return pthread_attr_setguardsize(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_setinheritsched(x64emu_t* emu, void* attr, int p)
+{
+	return pthread_attr_setinheritsched(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_setschedparam(x64emu_t* emu, void* attr, void* param)
+{
+    int policy;
+    pthread_attr_getschedpolicy(get_attr(attr), &policy);
+    int pmin = sched_get_priority_min(policy);
+    int pmax = sched_get_priority_max(policy);
+    if(param) {
+        int p = *(int*)param;
+        if(p>=pmin && p<=pmax)
+            return pthread_attr_setschedparam(get_attr(attr), param);
+    }
+    printf_log(LOG_INFO, "Warning, call to pthread_attr_setschedparam(%p, %p[%d]) ignored\n", attr, param, param?(*(int*)param):-1);
+    return 0;   // faking success
+}
+EXPORT int my32_pthread_attr_setschedpolicy(x64emu_t* emu, void* attr, int p)
+{
+	return pthread_attr_setschedpolicy(get_attr(attr), p);
+}
+EXPORT int my32_pthread_attr_setstackaddr(x64emu_t* emu, void* attr, void* p)
+{
+	ulong_t size = 2*1024*1024;
+	my32_pthread_attr_getstacksize(emu, attr, &size);
+	return pthread_attr_setstack(get_attr(attr), p, size);
+}
+EXPORT int my32_pthread_attr_setstacksize(x64emu_t* emu, void* attr, size_t p)
+{
+	ptr_t pp;
+	my32_pthread_attr_getstackaddr(emu, attr, &pp);
+	return pthread_attr_setstack(get_attr(attr), from_ptrv(pp), p);
+}
+
+
+EXPORT int my32_pthread_attr_setscope(x64emu_t* emu, void* attr, int scope)
+{
+    if(scope!=PTHREAD_SCOPE_SYSTEM) printf_log(LOG_INFO, "Warning, scope of call to pthread_attr_setscope(...) changed from %d to PTHREAD_SCOPE_SYSTEM\n", scope);
+	return pthread_attr_setscope(get_attr(attr), PTHREAD_SCOPE_SYSTEM);
+    //The scope is either PTHREAD_SCOPE_SYSTEM or PTHREAD_SCOPE_PROCESS
+    // but PTHREAD_SCOPE_PROCESS doesn't seem supported on ARM linux, and PTHREAD_SCOPE_SYSTEM is default
+}
+
+#ifndef ANDROID
+EXPORT void my32__pthread_cleanup_push_defer(x64emu_t* emu, void* buffer, void* routine, void* arg)
+{
+	real_pthread_cleanup_push_defer(buffer, findcleanup_routineFct(routine), arg);
+}
+
+EXPORT void my32__pthread_cleanup_push(x64emu_t* emu, void* buffer, void* routine, void* arg)
+{
+	_pthread_cleanup_push(buffer, findcleanup_routineFct(routine), arg);
+}
+
+EXPORT void my32__pthread_cleanup_pop_restore(x64emu_t* emu, void* buffer, int exec)
+{
+	real_pthread_cleanup_pop_restore(buffer, exec);
+}
+
+EXPORT void my32__pthread_cleanup_pop(x64emu_t* emu, void* buffer, int exec)
+{
+	_pthread_cleanup_pop(buffer, exec);
+}
+
+// getaffinity_np (pthread or attr) hav an "old" version (glibc-2.3.3) that only have 2 args, cpusetsize is omited
+EXPORT int my32_pthread_getaffinity_np(x64emu_t* emu, pthread_t thread, int cpusetsize, void* cpuset)
+{
+	if(cpusetsize>0x1000) {
+		// probably old version of the function, that didn't have cpusetsize....
+		cpuset = from_ptrv(cpusetsize);
+		cpusetsize = sizeof(cpu_set_t);
+	} 
+
+	int ret = pthread_getaffinity_np(thread, cpusetsize, cpuset);
+	if(ret<0) {
+		printf_log(LOG_INFO, "Warning, pthread_getaffinity_np(%p, %d, %p) errored, with errno=%d\n", (void*)thread, cpusetsize, cpuset, errno);
+	}
+
+    return ret;
+}
+
+EXPORT int my32_pthread_setaffinity_np(x64emu_t* emu, pthread_t thread, int cpusetsize, void* cpuset)
+{
+	if(cpusetsize>0x1000) {
+		// probably old version of the function, that didn't have cpusetsize....
+		cpuset = from_ptrv(cpusetsize);
+		cpusetsize = sizeof(cpu_set_t);
+	} 
+
+	int ret = pthread_setaffinity_np(thread, cpusetsize, cpuset);
+	if(ret<0) {
+		printf_log(LOG_INFO, "Warning, pthread_setaffinity_np(%p, %d, %p) errored, with errno=%d\n", (void*)thread, cpusetsize, cpuset, errno);
+	}
+
+    return ret;
+}
+
+EXPORT int my32_pthread_attr_setaffinity_np(x64emu_t* emu, void* attr, uint32_t cpusetsize, void* cpuset)
+{
+	if(cpusetsize>0x1000) {
+		// probably old version of the function, that didn't have cpusetsize....
+		cpuset = from_ptrv(cpusetsize);
+		cpusetsize = sizeof(cpu_set_t);
+	} 
+
+	int ret = pthread_attr_setaffinity_np(attr, cpusetsize, cpuset);
+	if(ret<0) {
+		printf_log(LOG_INFO, "Warning, pthread_attr_setaffinity_np(%p, %d, %p) errored, with errno=%d\n", attr, cpusetsize, cpuset, errno);
+	}
+
+    return ret;
+}
+#endif
+
+EXPORT int my32_pthread_kill(x64emu_t* emu, void* thread, int sig)
+{
+    // check for old "is everything ok?"
+    if((thread==NULL) && (sig==0))
+        return pthread_kill(pthread_self(), 0);
+    return pthread_kill((pthread_t)thread, sig);
+}
+
+//EXPORT void my32_pthread_exit(x64emu_t* emu, void* retval)
+//{
+//	emu->quit = 1;	// to be safe
+//	pthread_exit(retval);
+//}
+
+// TODO: find a better way for mutex. It should be possible to use the actual mutex most of the time, especially for simple ones
+// Having the mutex table behind a mutex is far from ideal!
+
+KHASH_MAP_INIT_INT(mutex, pthread_mutex_t*)
+static kh_mutex_t* unaligned_mutex = NULL;
+static pthread_rwlock_t m_lock = {0};
+pthread_mutex_t* getAlignedMutex(pthread_mutex_t* m)
+{
+	pthread_mutex_t* ret = NULL;
+	pthread_rwlock_rdlock(&m_lock);
+	khint_t k = kh_get(mutex, unaligned_mutex, (uintptr_t)m);
+	if(k!=kh_end(unaligned_mutex)) {
+		ret = kh_value(unaligned_mutex, k);
+	} else {
+		int r;
+		pthread_rwlock_unlock(&m_lock);
+		pthread_rwlock_wrlock(&m_lock);
+		k = kh_put(mutex, unaligned_mutex, (uintptr_t)m, &r);
+		ret = kh_value(unaligned_mutex, k) = (pthread_mutex_t*)calloc(1, sizeof(pthread_mutex_t));
+		memcpy(ret, m, 24);
+	}
+	pthread_rwlock_unlock(&m_lock);
+	return ret;
+}
+EXPORT int my32_pthread_mutex_destroy(pthread_mutex_t *m)
+{
+	pthread_rwlock_wrlock(&m_lock);
+	khint_t k = kh_get(mutex, unaligned_mutex, (uintptr_t)m);
+	if(k!=kh_end(unaligned_mutex)) {
+		pthread_mutex_t *n = kh_value(unaligned_mutex, k);
+		kh_del(mutex, unaligned_mutex, k);
+		int ret = pthread_mutex_destroy(n);
+		free(n);
+		return ret;
+	}
+	pthread_rwlock_unlock(&m_lock);
+	return pthread_mutex_destroy(m);
+}
+#define getAlignedMutexWithInit(A, B)	getAlignedMutex(A)
+
+EXPORT int my32___pthread_mutex_destroy(pthread_mutex_t *m) __attribute__((alias("my32_pthread_mutex_destroy")));
+
+EXPORT int my32_pthread_mutex_init(pthread_mutex_t *m, pthread_mutexattr_t *att)
+{
+	return pthread_mutex_init(getAlignedMutexWithInit(m, 0), att);
+}
+EXPORT int my32___pthread_mutex_init(pthread_mutex_t *m, pthread_mutexattr_t *att) __attribute__((alias("my32_pthread_mutex_init")));
+
+EXPORT int my32_pthread_mutex_lock(pthread_mutex_t *m)
+{
+	return pthread_mutex_lock(getAlignedMutex(m));
+}
+EXPORT int my32___pthread_mutex_lock(pthread_mutex_t *m) __attribute__((alias("my32_pthread_mutex_lock")));
+
+EXPORT int my32_pthread_mutex_timedlock(pthread_mutex_t *m, const struct timespec * t)
+{
+	return pthread_mutex_timedlock(getAlignedMutex(m), t);
+}
+
+EXPORT int my32_pthread_mutex_trylock(pthread_mutex_t *m)
+{
+	return pthread_mutex_trylock(getAlignedMutex(m));
+}
+EXPORT int my32___pthread_mutex_trylock(pthread_mutex_t *m) __attribute__((alias("my32_pthread_mutex_trylock")));
+
+EXPORT int my32_pthread_mutex_unlock(pthread_mutex_t *m)
+{
+	return pthread_mutex_unlock(getAlignedMutex(m));
+}
+EXPORT int my32___pthread_mutex_unlock(pthread_mutex_t *m) __attribute__((alias("my32_pthread_mutex_unlock")));
+
+static int done = 0;
+void init_pthread_helper_32()
+{
+	if(done)
+		return;
+	done = 1;
+	real_pthread_cleanup_push_defer = (vFppp_t)dlsym(NULL, "_pthread_cleanup_push_defer");
+	real_pthread_cleanup_pop_restore = (vFpi_t)dlsym(NULL, "_pthread_cleanup_pop_restore");
+
+	mapcond = kh_init(mapcond);
+	unaligned_mutex = kh_init(mutex);
+	pthread_key_create(&thread_key, emuthread_destroy);
+	pthread_setspecific(thread_key, NULL);
+}
+
+void clean_current_emuthread_32()
+{
+	emuthread_t *et = (emuthread_t*)pthread_getspecific(thread_key);
+	if(et) {
+		emuthread_destroy(et);
+		pthread_setspecific(thread_key, NULL);
+	}
+}
+
+void fini_pthread_helper_32(box64context_t* context)
+{
+	if(!done)
+		return;
+	done = 0;
+	//CleanStackSize(context);
+	pthread_cond_t *cond;
+	kh_foreach_value(mapcond, cond, 
+		pthread_cond_destroy(cond);
+		free(cond);
+	);
+	kh_destroy(mapcond, mapcond);
+	mapcond = NULL;
+	pthread_mutex_t *m;
+	kh_foreach_value(unaligned_mutex, m, 
+		pthread_mutex_destroy(m);
+		free(m);
+	);
+	kh_destroy(mutex, unaligned_mutex);
+
+	clean_current_emuthread_32();
+}