about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-08-26 17:45:13 +0200
committerGitHub <noreply@github.com>2024-08-26 17:45:13 +0200
commitb5105a1e57bba3305d5dce93ab4d2f7faab6b34a (patch)
treeab26b700d3c48f2c8e32a1084ae7c2e7a8448b06 /src/dynarec
parent9beb745765e9c99bad6410094a97bf0bf9ebc1eb (diff)
downloadbox64-b5105a1e57bba3305d5dce93ab4d2f7faab6b34a.tar.gz
box64-b5105a1e57bba3305d5dce93ab4d2f7faab6b34a.zip
Added preliminary Box32 support (#1760)
* Improve the ReserveHigMemory helper function

* [BOX32] Added some wrapping infrastructure

* [BOX32] More wrapped 32bits lib infrastructure

* [BOX32] Added callback and tls 32bits handling

* [BOX32] Added more 32bits, around wrappers and elfs

* [BOX32] Added the 32bits version of myalign

* [BOX32] More wrapped libs and 32bits fixes and imrpovments

* [BOX32] Added some 32bits tests

* [BOX32] Try to enable some Box32 build and test on the CI

* [BOX32] Disable Box32 testing on CI platform that use qemu

* [BOX32] Another attempt to disable Box32 testing on CI platform that use qemu

* [BOX32] Small fix for another attempt to disable Box32 testing on CI platform that use qemu

* [BOX32] Yet another fix for another attempt to disable Box32 testing on CI platform that use qemu

* [BOX32] Fixed a typo in CI script

* [BOX32] Better scratch alighnment and enabled more tests

* [BOX32] Added (partial) wrapped 32bits librt

* [BOX32] Added mention of Box32 in README

* [BOX32] Added phtread handling, and numerous fixes to 32bits handling. [ARM64_DYNAREC] Fixed access to segment with negative offset

* [BOX32] Added system libs and cpp testing, plus some more fixes

* [BOX32] Fix previous commit

* [BOX32] Better stack adjustment for 32bits processes

* [BOX32] Added getenv wrapped 32bits function and friends

* [BOX32] Don't look for box86 for a Box32 build

* [BOX32] Don't do 32bits cppThreads test for now on CI

* [BOX32] Enabled a few more 32bits tests

* [BOX32] For ld_lib_path for both CppThreads tests

* [BOX32] [ANDROID] Some Fixes for Android Build

* [BOX32] Still need to disable cppThread_32bits test on CI for some reason

* [BOX32] [ANDROID] Don't show PreInit Array Warning (#1751)

* [BOX32] [ANDROID] One More Fix for Android Build That I forgotten to … (#1752)

* [BOX32] [ANDROID] One More Fix for Android Build That I forgotten to push before

* [BOX32] [ANDROID] Try to Create __libc_init

* [BOX32] [ANDROID] Try to disable NEEDED_LIBS for now (libdl is not wrapped)

* [BOX32] Updated generated files

* [BOX32] Added 32bits context functions

* [BOX32] Added 32bits signal handling

* [BOX32] Added some missing 32bits elfloader functions

* [BOX32] Fix build on x86_64 machine

* [BOX32] Better fix for x86_64 build

* [BOX32] Actually added missing libs, and re-enabled cppThreads_32bits test

* [BOX32] Added wrapped 32bits libdl

* [BOX32] Try to re-enabled Box32 test on CI for ARM64 builds

* [BOX32] fine-tuning Box32 test on CI for ARM64 builds

* [BOX32] More fine-tuning to Box32 test on CI for ARM64 builds

* [BOX32] Enabled Box32 test on CI for LA64 and RV64 builds too

* [BOX32] re-Disabled Box32 test on CI for LA64 and RV64 builds, not working for now

* [BOX32] Temporarily disabled cppThreads_32bits test on CI

---------

Co-authored-by: KreitinnSoftware <pablopro5051@gmail.com>
Co-authored-by: KreitinnSoftware <80591934+KreitinnSoftware@users.noreply.github.com>
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h19
-rw-r--r--src/dynarec/arm64/dynarec_arm64_64.c92
-rw-r--r--src/dynarec/arm64/dynarec_arm64_6664.c30
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h26
-rw-r--r--src/dynarec/dynarec.c19
-rw-r--r--src/dynarec/dynarec_native_pass.c2
6 files changed, 151 insertions, 37 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 0070af36..781e7980 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -283,13 +283,19 @@ int convert_bitmask(uint64_t bitmask);
 #define LDRx_REG(Rt, Rn, Rm)            EMIT(LDR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt))
 #define LDRx_REG_LSL3(Rt, Rn, Rm)       EMIT(LDR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt))
 #define LDRx_REG_UXTW3(Rt, Rn, Rm)      EMIT(LDR_REG_gen(0b11, Rm, 0b010, 1, Rn, Rt))
+#define LDRx_REG_SXTW(Rt, Rn, Rm)       EMIT(LDR_REG_gen(0b11, Rm, 0b110, 0, Rn, Rt))
 #define LDRw_REG(Rt, Rn, Rm)            EMIT(LDR_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt))
 #define LDRw_REG_LSL2(Rt, Rn, Rm)       EMIT(LDR_REG_gen(0b10, Rm, 0b011, 1, Rn, Rt))
+#define LDRw_REG_SXTW(Rt, Rn, Rm)       EMIT(LDR_REG_gen(0b10, Rm, 0b110, 0, Rn, Rt))
 #define LDRxw_REG(Rt, Rn, Rm)           EMIT(LDR_REG_gen(0b10+rex.w, Rm, 0b011, 0, Rn, Rt))
 #define LDRz_REG(Rt, Rn, Rm)            EMIT(LDR_REG_gen(rex.is32bits?0b10:0b11, Rm, 0b011, 0, Rn, Rt))
+#define LDRxw_REG_SXTW(Rt, Rn, Rm)      EMIT(LDR_REG_gen(0b10+rex.w, Rm, 0b110, 0, Rn, Rt))
+#define LDRz_REG_SXTW(Rt, Rn, Rm)       EMIT(LDR_REG_gen(rex.is32bits?0b10:0b11, Rm, 0b110, 0, Rn, Rt))
 #define LDRB_REG(Rt, Rn, Rm)            EMIT(LDR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt))
 #define LDRB_REG_UXTW(Rt, Rn, Rm)       EMIT(LDR_REG_gen(0b00, Rm, 0b010, 0, Rn, Rt))
+#define LDRB_REG_SXTW(Rt, Rn, Rm)       EMIT(LDR_REG_gen(0b00, Rm, 0b110, 0, Rn, Rt))
 #define LDRH_REG(Rt, Rn, Rm)            EMIT(LDR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt))
+#define LDRH_REG_SXTW(Rt, Rn, Rm)       EMIT(LDR_REG_gen(0b01, Rm, 0b110, 0, Rn, Rt))
 
 #define LDRS_U12_gen(size, op1, opc, imm12, Rn, Rt)    ((size)<<30 | 0b111<<27 | (op1)<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt))
 #define LDRSHx_U12(Rt, Rn, imm12)           EMIT(LDRS_U12_gen(0b01, 0b01, 0b10, ((uint32_t)(imm12>>1))&0xfff, Rn, Rt))
@@ -301,6 +307,7 @@ int convert_bitmask(uint64_t bitmask);
 
 #define LDRS_REG_gen(size, Rm, option, S, Rn, Rt)    ((size)<<30 | 0b111<<27 | 0b10<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt))
 #define LDRSW_REG(Rt, Rn, Rm)           EMIT(LDRS_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt))
+#define LDRSW_REG_SXTW(Rt, Rn, Rm)      EMIT(LDRS_REG_gen(0b10, Rm, 0b110, 0, Rn, Rt))
 
 #define LDR_PC_gen(opc, imm19, Rt)      ((opc)<<30 | 0b011<<27 | (imm19)<<5 | (Rt))
 #define LDRx_literal(Rt, imm19)         EMIT(LDR_PC_gen(0b01, ((imm19)>>2)&0x7FFFF, Rt))
@@ -371,12 +378,18 @@ int convert_bitmask(uint64_t bitmask);
 #define STRx_REG(Rt, Rn, Rm)            EMIT(STR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt))
 #define STRx_REG_LSL3(Rt, Rn, Rm)       EMIT(STR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt))
 #define STRx_REG_UXTW(Rt, Rn, Rm)       EMIT(STR_REG_gen(0b11, Rm, 0b010, 0, Rn, Rt))
+#define STRx_REG_SXTW(Rt, Rn, Rm)       EMIT(STR_REG_gen(0b11, Rm, 0b110, 0, Rn, Rt))
 #define STRw_REG(Rt, Rn, Rm)            EMIT(STR_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt))
 #define STRw_REG_LSL2(Rt, Rn, Rm)       EMIT(STR_REG_gen(0b10, Rm, 0b011, 1, Rn, Rt))
+#define STRw_REG_SXTW(Rt, Rn, Rm)       EMIT(STR_REG_gen(0b10, Rm, 0b110, 0, Rn, Rt))
 #define STRB_REG(Rt, Rn, Rm)            EMIT(STR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt))
+#define STRB_REG_SXTW(Rt, Rn, Rm)       EMIT(STR_REG_gen(0b00, Rm, 0b110, 0, Rn, Rt))
 #define STRH_REG(Rt, Rn, Rm)            EMIT(STR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt))
+#define STRH_REG_SXTW(Rt, Rn, Rm)       EMIT(STR_REG_gen(0b01, Rm, 0b110, 0, Rn, Rt))
 #define STRxw_REG(Rt, Rn, Rm)           EMIT(STR_REG_gen(rex.w?0b11:0b10, Rm, 0b011, 0, Rn, Rt))
+#define STRxw_REG_SXTW(Rt, Rn, Rm)      EMIT(STR_REG_gen(rex.w?0b11:0b10, Rm, 0b110, 0, Rn, Rt))
 #define STRz_REG(Rt, Rn, Rm)            EMIT(STR_REG_gen(rex.is32bits?0b10:0b11, Rm, 0b011, 0, Rn, Rt))
+#define STRz_REG_SXTW(Rt, Rn, Rm)       EMIT(STR_REG_gen(rex.is32bits?0b10:0b11, Rm, 0b110, 0, Rn, Rt))
 
 // LOAD/STORE PAIR
 #define MEMPAIR_gen(size, L, op2, imm7, Rt2, Rn, Rt)    ((size)<<31 | 0b101<<27 | (op2)<<23 | (L)<<22 | (imm7)<<15 | (Rt2)<<10 | (Rn)<<5 | (Rt))
@@ -896,18 +909,24 @@ int convert_bitmask(uint64_t bitmask);
 #define VMEM_REG_gen(size, opc, Rm, option, S, Rn, Rt)  ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | 0b10<<10 | (Rn)<<5 | (Rt))
 
 #define VLDR32_REG(Dt, Rn, Rm)              EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 0, Rn, Dt))
+#define VLDR32_REG_SXTW(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b110, 0, Rn, Dt))
 #define VLDR32_REG_LSL2(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 1, Rn, Dt))
 #define VLDR64_REG(Dt, Rn, Rm)              EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 0, Rn, Dt))
+#define VLDR64_REG_SXTW(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b110, 0, Rn, Dt))
 #define VLDR64_REG_LSL3(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 1, Rn, Dt))
 #define VLDR128_REG(Qt, Rn, Rm)             EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 0, Rn, Qt))
 #define VLDR128_REG_LSL4(Qt, Rn, Rm)        EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 1, Rn, Qt))
+#define VLDR128_REG_SXTW(Qt, Rn, Rm)        EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b110, 0, Rn, Qt))
 
 #define VSTR32_REG(Dt, Rn, Rm)              EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 0, Rn, Dt))
 #define VSTR32_REG_LSL2(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 1, Rn, Dt))
+#define VSTR32_REG_SXTW(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b110, 0, Rn, Dt))
 #define VSTR64_REG(Dt, Rn, Rm)              EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 0, Rn, Dt))
 #define VSTR64_REG_LSL3(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 1, Rn, Dt))
+#define VSTR64_REG_SXTW(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b110, 0, Rn, Dt))
 #define VSTR128_REG(Qt, Rn, Rm)             EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Qt))
 #define VSTR128_REG_LSL4(Qt, Rn, Rm)        EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Qt))
+#define VSTR128_REG_SXTW(Qt, Rn, Rm)        EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b110, 0, Rn, Qt))
 
 #define VLDR_PC_gen(opc, imm19, Rt)         ((opc)<<30 | 0b011<<27 | 1<<26 | (imm19)<<5 | (Rt))
 #define VLDR32_literal(Vt, imm19)           EMIT(VLDR_PC_gen(0b00, ((imm19)>>2)&0x7FFFF, Vt))
diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c
index 85b89e3a..6ff6d594 100644
--- a/src/dynarec/arm64/dynarec_arm64_64.c
+++ b/src/dynarec/arm64/dynarec_arm64_64.c
@@ -108,7 +108,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
                                 SMREAD();
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                                ADDx_REG(x4, x4, ed);
+                                ADDz_REG(x4, x4, ed);
                                 VLD64(v0, x4, fixedaddress); // upper part reseted
                             }
                             break;
@@ -126,7 +126,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
                                 SMREAD();
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                                ADDx_REG(x4, x4, ed);
+                                ADDz_REG(x4, x4, ed);
                                 VLD32(v0, x4, fixedaddress);
                             }
                             break;
@@ -148,7 +148,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             } else {
                                 grab_segdata(dyn, addr, ninst, x4, seg);
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0);
-                                ADDx_REG(x4, x4, ed);
+                                ADDz_REG(x4, x4, ed);
                                 VST128(v0, x4, fixedaddress);
                                 SMWRITE2();
                             }
@@ -165,7 +165,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             } else {
                                 grab_segdata(dyn, addr, ninst, x4, seg);
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                                ADDx_REG(x4, x4, ed);
+                                ADDz_REG(x4, x4, ed);
                                 VST64(v0, x4, fixedaddress);
                                 SMWRITE2();
                             }
@@ -182,7 +182,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             } else {
                                 grab_segdata(dyn, addr, ninst, x4, seg);
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                                ADDx_REG(x4, x4, ed);
+                                ADDz_REG(x4, x4, ed);
                                 VST32(v0, x4, fixedaddress);
                                 SMWRITE2();
                             }
@@ -206,7 +206,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 grab_segdata(dyn, addr, ninst, x4, seg);
                                 SMREAD();
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0);
-                                ADDx_REG(x4, x4, ed);
+                                ADDz_REG(x4, x4, ed);
                                 VLD128(v0, ed, fixedaddress);
                             }
                             break;
@@ -275,7 +275,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             } else {
                                 SMREAD();
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                                LDRB_REG(gd, ed, x4);
+                                if(rex.is32bits)
+                                    LDRB_REG_SXTW(gd, x4, ed);
+                                else
+                                    LDRB_REG(gd, ed, x4);
                             }
                             break;
                         default:
@@ -397,7 +400,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         grab_segdata(dyn, addr, ninst, x4, seg);
                         SMREAD();
                         addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                        LDRSW_REG(gd, ed, x4);
+                        if(rex.is32bits)
+                            LDRSW_REG_SXTW(gd, x4, ed);
+                        else
+                            LDRSW_REG(gd, ed, x4);
                     }
                 } else {
                     if(MODREG) {   // reg <= reg
@@ -406,7 +412,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         grab_segdata(dyn, addr, ninst, x4, seg);
                         SMREAD();
                         addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                        LDRw_REG(gd, ed, x4);
+                        if(rex.is32bits)
+                            LDRw_REG_SXTW(gd, x4, ed);
+                        else
+                            LDRw_REG(gd, ed, x4);
                     }
                 }
             }
@@ -639,7 +648,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 grab_segdata(dyn, addr, ninst, x4, seg);
                 SMREAD();
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                LDRB_REG(x4, wback, x4);
+                if(rex.is32bits)
+                    LDRB_REG_SXTW(x4, x4, wback);
+                else
+                    LDRB_REG(x4, wback, x4);
                 ed = x4;
             }
             BFIx(gb1, ed, gb2, 8);
@@ -653,7 +665,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd);
             } else {                    // mem <= reg
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                STRxw_REG(gd, ed, x4);
+                if(rex.is32bits)
+                    STRxw_REG_SXTW(gd, x4, ed);
+                else
+                    STRxw_REG(gd, ed, x4);
                 SMWRITE2();
             }
             break;
@@ -668,7 +683,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {                    // mem <= reg
                 SMREAD();
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                LDRxw_REG(gd, ed, x4);
+                if(rex.is32bits)
+                    LDRxw_REG_SXTW(gd, x4, ed);
+                else
+                    LDRxw_REG(gd, ed, x4);
             }
             break;
 
@@ -698,7 +716,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {
                 SMREAD();
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                LDRH_REG(x1, wback, x4);
+                if(rex.is32bits)
+                    LDRH_REG_SXTW(x1, x4, wback);
+                else
+                    LDRH_REG(x1, wback, x4);
                 ed = x1;
             }
             STRH_U12(ed, xEmu, offsetof(x64emu_t, segs[u8]));
@@ -714,11 +735,17 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 POP1z(x2); // so this can handle POP [ESP] and maybe some variant too
                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0, 0, rex, NULL, 0, 0);
                 if(ed==xRSP) {
-                    STRz_REG(x2, ed, x4);
+                    if(rex.is32bits)
+                        STRz_REG_SXTW(x2, x4, ed);
+                    else
+                        STRz_REG(x2, ed, x4);
                 } else {
                     // complicated to just allow a segfault that can be recovered correctly
                     SUBz_U12(xRSP, xRSP, rex.is32bits?4:8);
-                    STRz_REG(x2, ed, x4);
+                    if(rex.is32bits)
+                        STRz_REG_SXTW(x2, x4, ed);
+                    else
+                        STRz_REG(x2, ed, x4);
                     ADDz_U12(xRSP, xRSP, rex.is32bits?4:8);
                 }
             }
@@ -735,9 +762,25 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             else
                 u64 = F64;
             MOV64z(x1, u64);
-            LDRxw_REG(xRAX, x1, x4);
+            if(rex.is32bits)
+                LDRxw_REG_SXTW(xRAX, x4, x1);
+            else
+                LDRxw_REG(xRAX, x4, x1);
+            break;
+        case 0xA2:
+            INST_NAME("MOV FS:Od,AL");
+            grab_segdata(dyn, addr, ninst, x4, seg);
+            if(rex.is32bits)
+                u64 = F32;
+            else
+                u64 = F64;
+            MOV64z(x1, u64);
+            if(rex.is32bits)
+                STRB_REG_SXTW(xRAX, x4, x1);
+            else
+                STRB_REG(xRAX, x4, x1);
+            SMWRITE2();
             break;
-
         case 0xA3:
             INST_NAME("MOV FS:Od,EAX");
             grab_segdata(dyn, addr, ninst, x4, seg);
@@ -746,7 +789,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             else
                 u64 = F64;
             MOV64z(x1, u64);
-            STRxw_REG(xRAX, x1, x4);
+            if(rex.is32bits)
+                STRxw_REG_SXTW(xRAX, x4, x1);
+            else
+                STRxw_REG(xRAX, x4, x1);
             SMWRITE2();
             break;
 
@@ -770,7 +816,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);
                 u8 = F8;
                 MOV32w(x3, u8);
-                STRB_REG(x3, ed, x4);
+                if(rex.is32bits)
+                    STRB_REG_SXTW(x3, x4, ed);
+                else
+                    STRB_REG(x3, ed, x4);
                 SMWRITE2();
             }
             break;
@@ -786,7 +835,10 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 4);
                 i64 = F32S;
                 MOV64xw(x3, i64);
-                STRxw_REG(x3, ed, x4);
+                if(rex.is32bits)
+                    STRxw_REG_SXTW(x3, x4, ed);
+                else
+                    STRxw_REG(x3, ed, x4);
                 SMWRITE2();
             }
             break;
diff --git a/src/dynarec/arm64/dynarec_arm64_6664.c b/src/dynarec/arm64/dynarec_arm64_6664.c
index 4b33fab6..30a28012 100644
--- a/src/dynarec/arm64/dynarec_arm64_6664.c
+++ b/src/dynarec/arm64/dynarec_arm64_6664.c
@@ -62,7 +62,10 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         SMREAD();
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
                         v1 = fpu_get_scratch(dyn, ninst);
-                        VLDR64_REG(v1, ed, x4);
+                        if(rex.is32bits)
+                            VLDR64_REG_SXTW(v1, x4, ed);
+                        else
+                            VLDR64_REG(v1, ed, x4);
                     }
                     FCMPD(v0, v1);
                     FCOMI(x1, x2);
@@ -80,7 +83,10 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         grab_segdata(dyn, addr, ninst, x4, seg);
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
                         SMREAD();
-                        VLDR128_REG(v0, ed, x4);
+                        if(rex.is32bits)
+                            VLDR128_REG_SXTW(v0, x4, ed);
+                        else
+                            VLDR128_REG(v0, ed, x4);
                     }
                     break;
 
@@ -94,7 +100,10 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     } else {
                         grab_segdata(dyn, addr, ninst, x4, seg);
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                        VSTR128_REG(v0, ed, x4);
+                        if(rex.is32bits)
+                            VSTR128_REG_SXTW(v0, x4, ed);
+                        else
+                            VSTR128_REG(v0, ed, x4);
                         SMWRITE2();
                     }
                     break;
@@ -110,7 +119,10 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     } else {
                         grab_segdata(dyn, addr, ninst, x4, seg);
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                        VSTR64_REG(v0, ed, x4);
+                        if(rex.is32bits)
+                            VSTR64_REG_SXTW(v0, x4, ed);
+                        else
+                            VSTR64_REG(v0, ed, x4);
                         SMWRITE();
                     }
                     break;
@@ -229,7 +241,10 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 if(rex.w) {
                     STRx_REG(gd, ed, x4);
                 } else {
-                    STRH_REG(gd, ed, x4);
+                    if(rex.is32bits)
+                        STRH_REG_SXTW(gd, x4, ed);
+                    else
+                        STRH_REG(gd, ed, x4);
                 }
                 SMWRITE();
             }
@@ -255,7 +270,10 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 if(rex.w) {
                     LDRx_REG(gd, ed, x4);
                 } else {
-                    LDRH_REG(x1, ed, x4);
+                    if(rex.is32bits)
+                        LDRH_REG_SXTW(x1, x4, ed);
+                    else
+                        LDRH_REG(x1, ed, x4);
                     BFIx(gd, x1, 0, 16);
                 }
             }
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 7810293b..06e7705b 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -213,10 +213,13 @@
                 } else {                                \
                     SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, D); \
-                    LDRxw_REG(x1, wback, O);            \
+                    if(rex.is32bits)                    \
+                        LDRxw_REG_SXTW(x1, O, wback);   \
+                    else                                \
+                        LDRxw_REG(x1, wback, O);        \
                     ed = x1;                            \
                 }
-#define WBACKO(O)   if(wback) {STRxw_REG(ed, wback, O); SMWRITE2();}
+#define WBACKO(O)   if(wback) {if(rex.is32bits) STRxw_REG_SXTW(ed, O, wback); else STRxw_REG(ed, wback, O); SMWRITE2();}
 //GETEDOx can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
 #define GETEDOx(O, D)  if(MODREG) {                     \
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
@@ -224,7 +227,10 @@
                 } else {                                \
                     SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, D); \
-                    LDRx_REG(x1, wback, O);             \
+                    if(rex.is32bits)                    \
+                        LDRx_REG_SXTW(x1, O, wback);    \
+                    else                                \
+                        LDRx_REG(x1, wback, O);         \
                     ed = x1;                            \
                 }
 //GETEDOz can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
@@ -234,7 +240,10 @@
                 } else {                                \
                     SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, D); \
-                    LDRz_REG(x1, wback, O);             \
+                    if(rex.is32bits)                    \
+                        LDRz_REG_SXTW(x1, O, wback);    \
+                    else                                \
+                        LDRz_REG(x1, wback, O);         \
                     ed = x1;                            \
                 }
 #define GETSEDOw(O, D)  if((nextop&0xC0)==0xC0) {       \
@@ -245,7 +254,10 @@
                 } else {                                \
                     SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, D); \
-                    LDRSW_REG(x1, wback, O);            \
+                    if(rex.is32bits)                    \
+                        LDRSW_REG_SXTW(x1, O, wback);   \
+                    else                                \
+                        LDRSW_REG(x1, wback, O);        \
                     wb = ed = x1;                       \
                 }
 //FAKEELike GETED, but doesn't get anything
@@ -304,7 +316,7 @@
                 } else {                        \
                     SMREAD();                   \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, (1<<1)-1, rex, NULL, 0, D); \
-                    ADDx_REG(x3, wback, i);     \
+                    ADDz_REG(x3, wback, i);     \
                     if(wback!=x3) wback = x3;   \
                     LDH(i, wback, fixedaddress);\
                     wb1 = 1;                    \
@@ -365,7 +377,7 @@
                 } else {                        \
                     SMREAD();                   \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff, 0, rex, NULL, 0, D); \
-                    ADDx_REG(x3, wback, i);     \
+                    ADDz_REG(x3, wback, i);     \
                     if(wback!=x3) wback = x3;   \
                     LDB(i, wback, fixedaddress);\
                     wb1 = 1;                    \
diff --git a/src/dynarec/dynarec.c b/src/dynarec/dynarec.c
index 1a389b94..915044eb 100644
--- a/src/dynarec/dynarec.c
+++ b/src/dynarec/dynarec.c
@@ -104,10 +104,15 @@ void DynaCall(x64emu_t* emu, uintptr_t addr)
     multiuint_t old_res_sav= emu->res_sav;
     deferred_flags_t old_df_sav= emu->df_sav;
     // uc_link
-    x64_ucontext_t* old_uc_link = emu->uc_link;
+    void* old_uc_link = emu->uc_link;
     emu->uc_link = NULL;
 
-    PushExit(emu);
+    #ifdef BOX32
+    if(box64_is32bits)
+        PushExit_32(emu);
+    else
+    #endif
+        PushExit(emu);
     R_RIP = addr;
     emu->df = d_none;
     DynaRun(emu);
@@ -137,6 +142,9 @@ void DynaCall(x64emu_t* emu, uintptr_t addr)
 }
 
 int my_setcontext(x64emu_t* emu, void* ucp);
+#ifdef BOX32
+int my32_setcontext(x64emu_t* emu, void* ucp);
+#endif
 void DynaRun(x64emu_t* emu)
 {
     // prepare setjump for signal handling
@@ -206,7 +214,12 @@ void DynaRun(x64emu_t* emu)
             }
             if(emu->quit && emu->uc_link) {
                 emu->quit = 0;
-                my_setcontext(emu, emu->uc_link);
+                #ifdef BOX32
+                if(box64_is32bits)
+                    my32_setcontext(emu, emu->uc_link);
+                else
+                #endif
+                    my_setcontext(emu, emu->uc_link);
             }
         }
 #endif
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index 9c212fea..24528fa1 100644
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -111,7 +111,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
         #endif
         dyn->f.dfnone_here = 0;
         NEW_INST;
-        MESSAGE(LOG_DUMP, "New Instruction x64:%p, native:%p\n", (void*)addr, (void*)dyn->block);
+        MESSAGE(LOG_DUMP, "New Instruction %s:%p, native:%p\n", is32bits?"x86":"x64",(void*)addr, (void*)dyn->block);
         if(!ninst) {
             GOTEST(x1, x2);
         }