about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h84
-rwxr-xr-xsrc/dynarec/arm64_printer.c77
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c98
-rwxr-xr-xsrc/dynarec/dynarec_arm64_emit_shift.c461
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.c12
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h42
6 files changed, 738 insertions, 36 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 738035f5..0d182425 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -131,6 +131,7 @@
 #define ADDSw_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd))
 #define ADDxw_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd))
 #define ADDSxw_REG(Rd, Rn, Rm)              EMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd))
+#define ADDxw_REG_LSR(Rd, Rn, Rm, lsr)      EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b01, Rm, lsr, Rn, Rd))
 
 #define ADDSUB_IMM_gen(sf, op, S, shift, imm12, Rn, Rd)    ((sf)<<31 | (op)<<30 | (S)<<29 | 0b10001<<24 | (shift)<<22 | (imm12)<<10 | (Rn)<<5 | (Rd))
 #define ADDx_U12(Rd, Rn, imm12)     EMIT(ADDSUB_IMM_gen(1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd))
@@ -214,6 +215,29 @@
 #define STRB_REG(Rt, Rn, Rm)            EMIT(STR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt))
 #define STRH_REG(Rt, Rn, Rm)            EMIT(STR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt))
 
+// LOAD/STORE PAIR
+#define MEMPAIR_gen(size, L, op2, imm7, Rt2, Rn, Rt)    ((size)<<31 | 0b101<<27 | (op2)<<23 | (L)<<22 | (imm7)<<15 | (Rt2)<<10 | (Rn)<<5 | (Rt))
+
+#define LDPx_S7_postindex(Rt, Rt2, Rn, imm)             EMIT(MEMPAIR_gen(1, 1, 0b01, (imm>>3)&0x7f, Rt2, Rn, Rt))
+#define LDPw_S7_postindex(Rt, Rt2, Rn, imm)             EMIT(MEMPAIR_gen(0, 1, 0b01, (imm>>2)&0x7f, Rt2, Rn, Rt))
+#define LDPxw_S7_postindex(Rt, Rt2, Rn, imm)            EMIT(MEMPAIR_gen(rex.w, 1, 0b01, (imm>>(2+rex.w)), Rt2, Rn, Rt))
+#define LDPx_S7_preindex(Rt, Rt2, Rn, imm)              EMIT(MEMPAIR_gen(1, 1, 0b11, (imm>>3)&0x7f, Rt2, Rn, Rt))
+#define LDPw_S7_preindex(Rt, Rt2, Rn, imm)              EMIT(MEMPAIR_gen(0, 1, 0b11, (imm>>2)&0x7f, Rt2, Rn, Rt))
+#define LDPxw_S7_preindex(Rt, Rt2, Rn, imm)             EMIT(MEMPAIR_gen(rex.w, 1, 0b11, (imm>>(2+rex.w)), Rt2, Rn, Rt))
+#define LDPx_S7_offset(Rt, Rt2, Rn, imm)                EMIT(MEMPAIR_gen(1, 1, 0b10, (imm>>3)&0x7f, Rt2, Rn, Rt))
+#define LDPw_S7_offset(Rt, Rt2, Rn, imm)                EMIT(MEMPAIR_gen(0, 1, 0b10, (imm>>2)&0x7f, Rt2, Rn, Rt))
+#define LDPxw_S7_offset(Rt, Rt2, Rn, imm)               EMIT(MEMPAIR_gen(rex.w, 1, 0b10, (imm>>(2+rex.w)), Rt2, Rn, Rt))
+
+#define STPx_S7_postindex(Rt, Rt2, Rn, imm)             EMIT(MEMPAIR_gen(1, 0, 0b01, (imm>>3)&0x7f, Rt2, Rn, Rt))
+#define STPw_S7_postindex(Rt, Rt2, Rn, imm)             EMIT(MEMPAIR_gen(0, 0, 0b01, (imm>>2)&0x7f, Rt2, Rn, Rt))
+#define STPxw_S7_postindex(Rt, Rt2, Rn, imm)            EMIT(MEMPAIR_gen(rex.w, 0, 0b01, (imm>>(2+rex.w)), Rt2, Rn, Rt))
+#define STPx_S7_preindex(Rt, Rt2, Rn, imm)              EMIT(MEMPAIR_gen(1, 0, 0b11, (imm>>3)&0x7f, Rt2, Rn, Rt))
+#define STPw_S7_preindex(Rt, Rt2, Rn, imm)              EMIT(MEMPAIR_gen(0, 0, 0b11, (imm>>2)&0x7f, Rt2, Rn, Rt))
+#define STPxw_S7_preindex(Rt, Rt2, Rn, imm)             EMIT(MEMPAIR_gen(rex.w, 0, 0b11, (imm>>(2+rex.w)), Rt2, Rn, Rt))
+#define STPx_S7_offset(Rt, Rt2, Rn, imm)                EMIT(MEMPAIR_gen(1, 0, 0b10, (imm>>3)&0x7f, Rt2, Rn, Rt))
+#define STPw_S7_offset(Rt, Rt2, Rn, imm)                EMIT(MEMPAIR_gen(0, 0, 0b10, (imm>>2)&0x7f, Rt2, Rn, Rt))
+#define STPxw_S7_offset(Rt, Rt2, Rn, imm)               EMIT(MEMPAIR_gen(rex.w, 0, 0b10, (imm>>(2+rex.w)), Rt2, Rn, Rt))
+
 // PUSH / POP helper
 #define POP1(reg)       LDRx_S9_postindex(reg, xRSP, 8)
 #define PUSH1(reg)      STRx_S9_preindex(reg, xRSP, -8)
@@ -232,6 +256,11 @@
 #define Bcond_gen(imm19, cond)          (0b0101010<<25 | (imm19)<<5 | (cond))
 #define Bcond(cond, imm19)              EMIT(Bcond_gen(((imm19)>>2)&0x7FFFF, cond))
 
+#define B_gen(imm26)                    (0b000101<<26 | (imm26))
+#define B(imm26)                        EMIT(B_gen(((imm26)>>2)&0x3ffffff))
+
+#define NOP                             EMIT(0b11010101000000110010000000011111)
+
 // AND / ORR
 #define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd)  ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | Rd)
 // logic to get the mask is ... convoluted... list of possible value there: https://gist.github.com/dinfuehr/51a01ac58c0b23e4de9aac313ed6a06a
@@ -241,8 +270,11 @@
 #define ANDSw_mask(Rd, Rn, immr, imms)      EMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd))
 #define ORRx_mask(Rd, Rn, N, immr, imms)    EMIT(LOGIC_gen(1, 0b01, N, immr, imms, Rn, Rd))
 #define ORRw_mask(Rd, Rn, immr, imms)       EMIT(LOGIC_gen(0, 0b01, 0, immr, imms, Rn, Rd))
+#define EORx_mask(Rd, Rn, N, immr, imms)    EMIT(LOGIC_gen(1, 0b10, N, immr, imms, Rn, Rd))
+#define EORw_mask(Rd, Rn, immr, imms)       EMIT(LOGIC_gen(0, 0b10, 0, immr, imms, Rn, Rd))
 #define TSTx_mask(Rn, immr, imms)           ANDSx_mask(xZR, Rn, immr, imms)
 #define TSTw_mask(Rn, immr, imms)           ANDSw_mask(wZR, Rn, immr, imms)
+#define TSTxw_mask(Rn, immr, imms)          ANDSxw_mask(xZR, Rn, immr, imms)
 
 #define LOGIC_REG_gen(sf, opc, shift, N, Rm, imm6, Rn, Rd)    ((sf)<<31 | (opc)<<29 | 0b01010<<24 | (shift)<<22 | (N)<<21 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd))
 #define ANDx_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
@@ -265,6 +297,9 @@
 #define EORx_REG_LSL(Rd, Rn, Rm, lsl)   EMIT(LOGIC_REG_gen(1, 0b10, 0b00, 0, Rm, lsl, Rn, Rd))
 #define EORw_REG_LSL(Rd, Rn, Rm, lsl)   EMIT(LOGIC_REG_gen(0, 0b10, 0b00, 0, Rm, lsl, Rn, Rd))
 #define EORxw_REG_LSL(Rd, Rn, Rm, lsl)  EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b00, 0, Rm, lsl, Rn, Rd))
+#define EORx_REG_LSR(Rd, Rn, Rm, lsr)   EMIT(LOGIC_REG_gen(1, 0b10, 0b01, 0, Rm, lsr, Rn, Rd))
+#define EORw_REG_LSR(Rd, Rn, Rm, lsr)   EMIT(LOGIC_REG_gen(0, 0b10, 0b01, 0, Rm, lsr, Rn, Rd))
+#define EORxw_REG_LSR(Rd, Rn, Rm, lsr)  EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b01, 0, Rm, lsr, Rn, Rd))
 #define MOVx_REG(Rd, Rm)                ORRx_REG(Rd, xZR, Rm)
 #define MOVw_REG(Rd, Rm)                ORRw_REG(Rd, xZR, Rm)
 #define MOVxw_REG(Rd, Rm)               ORRxw_REG(Rd, xZR, Rm)
@@ -295,9 +330,9 @@
 #define BFIx(Rd, Rn, lsb, width)        BFMx(Rd, Rn, ((-lsb)%64)&0x3f, (width)-1)
 #define BFIw(Rd, Rn, lsb, width)        BFMw(Rd, Rn, ((-lsb)%32)&0x1f, (width)-1)
 #define BFIxw(Rd, Rn, lsb, width)       if(rex.w) {BFIx(Rd, Rn, lsb, width);} else {BFIw(Rd, Rn, lsb, width);}
-#define BFCx(Rd, Rn, lsb, width)        BFMx(Rd, xZR, ((-lsb)%64)&0x3f, (width)-1)
-#define BFCw(Rd, Rn, lsb, width)        BFMw(Rd, xZR, ((-lsb)%32)&0x1f, (width)-1)
-#define BFCxw(Rd, Rn, lsb, width)       if(rex.w) {BFCx(Rd, Rn, lsb, width);} else {BFCw(Rd, Rn, lsb, width);}
+#define BFCx(Rd, lsb, width)            BFMx(Rd, xZR, ((-lsb)%64)&0x3f, (width)-1)
+#define BFCw(Rd, lsb, width)            BFMw(Rd, xZR, ((-lsb)%32)&0x1f, (width)-1)
+#define BFCxw(Rd, lsb, width)           BFMxw(Rd, xZR, rex.w?(((-lsb)%64)&0x3f):(((-lsb)%32)&0x1f), (width)-1)
 
 // UBFX
 #define UBFM_gen(sf, N, immr, imms, Rn, Rd)    ((sf)<<31 | 0b10<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd))
@@ -315,12 +350,45 @@
 #define LSRxw(Rd, Rn, shift)            EMIT(UBFM_gen(rex.w, rex.w, shift, (rex.w)?63:31, Rn, Rd))
 #define LSLx(Rd, Rn, lsl)               UBFMx(Rd, Rn, ((-(lsl))%64)&63, 63-(lsl))
 #define LSLw(Rd, Rn, lsl)               UBFMw(Rd, Rn, ((-(lsl))%32)&31, 31-(lsl))
+#define LSLxw(Rd, Rn, lsl)              UBFMxw(Rd, Rn, rex.w?(((-(lsl))%64)&63):(((-(lsl))%32)&31), (rex.w?63:31)-(lsl))
+
+// SBFM
+#define SBFM_gen(sf, N, immr, imms, Rn, Rd)    ((sf)<<31 | 0b00<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd))
+#define SBFMx(Rd, Rn, immr, imms)       EMIT(SBFM_gen(1, 1, immr, imms, Rn, Rd))
+#define SBFMw(Rd, Rn, immr, imms)       EMIT(SBFM_gen(0, 0, immr, imms, Rn, Rd))
+#define SBFMxw(Rd, Rn, immr, imms)      EMIT(SBFM_gen(rex.w, rex.w, immr, imms, Rn, Rd))
+#define SBFXx(Rd, Rn, lsb, width)       SBFMx(Rd, Rn, lsb, lsb+width-1)
+#define SBFXw(Rd, Rn, lsb, width)       SBFMw(Rd, Rn, lsb, lsb+width-1)
+#define SBFXxw(Rd, Rn, lsb, width)      SBFMxw(Rd, Rn, lsb, lsb+width-1)
+#define SXTBx(Rd, Rn)                   SBFMx(Rd, Rn, 0, 7)
+#define SXTBw(Rd, Rn)                   SBFMw(Rd, Rn, 0, 7)
+#define SXTHx(Rd, Rn)                   SBFMx(Rd, Rn, 0, 15)
+#define SXTHw(Rd, Rn)                   SBFMw(Rd, Rn, 0, 15)
+#define SXTWx(Rd, Rn)                   SBFMx(Rd, Rn, 0, 31)
+#define ASRx(Rd, Rn, shift)             SBFMx(Rd, Rn, shift, 63)
+#define ASRw(Rd, Rn, shift)             SBFMw(Rd, Rn, shift, 31)
+#define ASRxw(Rd, Rn, shift)            SBFMxw(Rd, Rn, shift, rex.w?63:31)
+
+// EXTR
+#define EXTR_gen(sf, N, Rm, imms, Rn, Rd)   ((sf)<<31 | 0b00<<29 | 0b100111<<23 | (N)<<22 | (Rm)<<16 | (imms)<<10 | (Rn)<<5 | (Rd))
+#define EXTRx(Rd, Rn, Rm, lsb)          EMIT(EXTR_gen(1, 1, Rm, lsb, Rn, Rd))
+#define EXTRw(Rd, Rn, Rm, lsb)          EMIT(EXTR_gen(0, 0, Rm, lsb, Rn, Rd))
+#define EXTRxw(Rd, Rn, Rm, lsb)         EMIT(EXTR_gen(rex.w, rex.w, Rm, lsb, Rn, Rd))
+#define RORx(Rd, Rn, lsb)               EMIT(EXTR_gen(1, 1, Rn, lsb, Rn, Rd))
+#define RORw(Rd, Rn, lsb)               EMIT(EXTR_gen(0, 0, Rn, lsb, Rn, Rd))
+#define RORxw(Rd, Rn, lsb)              EMIT(EXTR_gen(rex.w, rex.w, Rn, lsb, Rn, Rd))
+
+// LSRV / LSLV
+#define LS_V_gen(sf, Rm, op2, Rn, Rd)   ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | (op2)<<10 | (Rn)<<5 | (Rd))
+#define LSRx_REG(Rd, Rn, Rm)            EMIT(LS_V_gen(1, Rm, 0b01, Rn, Rd))
+#define LSRw_REG(Rd, Rn, Rm)            EMIT(LS_V_gen(0, Rm, 0b01, Rn, Rd))
+#define LSRxw_REG(Rd, Rn, Rm)           EMIT(LS_V_gen(rex.w, Rm, 0b01, Rn, Rd))
+
+#define LSLx_REG(Rd, Rn, Rm)            EMIT(LS_V_gen(1, Rm, 0b00, Rn, Rd))
+#define LSLw_REG(Rd, Rn, Rm)            EMIT(LS_V_gen(0, Rm, 0b00, Rn, Rd))
+#define LSLxw_REG(Rd, Rn, Rm)           EMIT(LS_V_gen(rex.w, Rm, 0b00, Rn, Rd))
+
 
-// LSRV
-#define LSRV_gen(sf, Rm, op2, Rn, Rd)   ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | (op2)<<10 | (Rn)<<5 | (Rd))
-#define LSRx_REG(Rd, Rn, Rm)            EMIT(LSRV_gen(1, Rm, 0b01, Rn, Rd))
-#define LSRw_REG(Rd, Rn, Rm)            EMIT(LSRV_gen(0, Rm, 0b01, Rn, Rd))
-#define LSRxw_REG(Rd, Rn, Rm)           EMIT(LSRV_gen(rex.w, Rm, 0b01, Rn, Rd))
 
 // MRS
 #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt)  (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt))
diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c
index 3137d33a..082260b9 100755
--- a/src/dynarec/arm64_printer.c
+++ b/src/dynarec/arm64_printer.c
@@ -17,7 +17,7 @@ static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", "
 

 typedef struct arm64_print_s {

     int N, S;

-    int t, n, m, d;

+    int t, n, m, d, t2;

     int f, c, o, h;

     int i, r, s;

     int x, w;

@@ -60,6 +60,7 @@ int isMask(uint32_t opcode, const char* mask, arm64_print_t *a)
             case 'N': a->N = (a->N<<1) | v; break;

             case 'S': a->S = (a->S<<1) | v; break;

             case 't': a->t = (a->t<<1) | v; break;

+            case '2': a->t2 = (a->t2<<1) | v; break;

             case 'n': a->n = (a->n<<1) | v; break;

             case 'm': a->m = (a->m<<1) | v; break;

             case 'd': a->d = (a->d<<1) | v; break;

@@ -96,6 +97,7 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
     arm64_print_t a;

     #define Rn a.n

     #define Rt a.t

+    #define Rt2 a.t2

     #define Rm a.m

     #define Rd a.d

     #define sf a.f

@@ -106,7 +108,47 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
     #define cond a.c

     #define immr a.r

     #define imms a.s

+    if(isMask(opcode, "11010101000000110010000000011111", &a)) {

+        snprintf(buff, sizeof(buff), "NOP");

+        return buff;

+    }

     // --- LDR / STR

+    if(isMask(opcode, "f010100011iiiiiii22222nnnnnttttt", &a)) {

+        int offset = signExtend(imm, 9)<<(2+sf);

+        snprintf(buff, sizeof(buff), "LDP %s, %s, [%s], %s0x%x", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset));

+        return buff;

+    }

+    if(isMask(opcode, "f010100111iiiiiii22222nnnnnttttt", &a)) {

+        int offset = signExtend(imm, 9)<<(2+sf);

+        snprintf(buff, sizeof(buff), "LDP %s, %s, [%s, %s0x%x]!", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset));

+        return buff;

+    }

+    if(isMask(opcode, "f010100101iiiiiii22222nnnnnttttt", &a)) {

+        int offset = signExtend(imm, 9)<<(2+sf);

+        if(!offset)

+            snprintf(buff, sizeof(buff), "LDP %s, %s, [%s]", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn]);

+        else

+            snprintf(buff, sizeof(buff), "LDP %s, %s, [%s, %s0x%x]", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset));

+        return buff;

+    }

+    if(isMask(opcode, "f010100010iiiiiii22222nnnnnttttt", &a)) {

+        int offset = signExtend(imm, 9)<<(2+sf);

+        snprintf(buff, sizeof(buff), "STP %s, %s, [%s], %s0x%x", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset));

+        return buff;

+    }

+    if(isMask(opcode, "f010100110iiiiiii22222nnnnnttttt", &a)) {

+        int offset = signExtend(imm, 9)<<(2+sf);

+        snprintf(buff, sizeof(buff), "STP %s, %s, [%s, %s0x%x]!", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset));

+        return buff;

+    }

+    if(isMask(opcode, "f010100100iiiiiii22222nnnnnttttt", &a)) {

+        int offset = signExtend(imm, 9)<<(2+sf);

+        if(!offset)

+            snprintf(buff, sizeof(buff), "STP %s, %s, [%s]", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn]);

+        else

+            snprintf(buff, sizeof(buff), "STP %s, %s, [%s, %s0x%x]", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset));

+        return buff;

+    }

     if(isMask(opcode, "1x111000010iiiiiiiii01nnnnnttttt", &a)) {

         int size = (opcode>>30)&3;

         int offset = signExtend(imm, 9);

@@ -387,12 +429,38 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         return buff;

     }

 

+    if(isMask(opcode, "f00100110Nrrrrrrssssssnnnnnddddd", &a)) {

+        if(sf && imms==0b111111)

+            snprintf(buff, sizeof(buff), "ASR %s, %s, %d", Xt[Rd], Xt[Rn], immr);

+        else if(!sf && imms==0b011111)

+            snprintf(buff, sizeof(buff), "ASR %s, %s, %d", Wt[Rd], Wt[Rn], immr);

+        else if(immr==0 && imms==0b000111)

+            snprintf(buff, sizeof(buff), "SXTB %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]);

+        else if(immr==0 && imms==0b001111)

+            snprintf(buff, sizeof(buff), "SXTH %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]);

+        else if(sf && immr==0 && imms==0b011111)

+            snprintf(buff, sizeof(buff), "SXTW %s, %s", Xt[Rd], Xt[Rn]);

+        else if(imms>=immr)

+            snprintf(buff, sizeof(buff), "SBFX %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms-immr+1);

+        else

+            snprintf(buff, sizeof(buff), "SBFM %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms);

+        return buff;

+    }

+

+    if(isMask(opcode, "f00100111N0mmmmmssssssnnnnnddddd", &a)) {

+        if(Rn==Rm)

+            snprintf(buff, sizeof(buff), "ROR %s, %s, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], imms);

+        else

+            snprintf(buff, sizeof(buff), "EXTR %s, %s, %s, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], imms);

+        return buff;

+    }

+

     if(isMask(opcode, "f01100110Nrrrrrrssssssnnnnnddddd", &a)) {

         if(imms<immr) {

             int width = imms + 1;

             int lsb = ((-immr)%(sf?64:32))&(sf?0x3f:0x1f);

             if(Rn==31)

-                snprintf(buff, sizeof(buff), "BFC %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], lsb, width);

+                snprintf(buff, sizeof(buff), "BFC %s, %d, %d", sf?Xt[Rd]:Wt[Rd], lsb, width);

             else

                 snprintf(buff, sizeof(buff), "BFI %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], lsb, width);

         } else

@@ -413,6 +481,11 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "B.%s #+%d\t; %p", conds[cond], offset, (void*)(addr + offset));

         return buff;

     }

+    if(isMask(opcode, "000101iiiiiiiiiiiiiiiiiiiiiiiiii", &a)) {

+        int offset = signExtend(imm, 26)<<2;

+        snprintf(buff, sizeof(buff), "B #+%d\t; %p", offset, (void*)(addr + offset));

+        return buff;

+    }

     if(isMask(opcode, "f0110100iiiiiiiiiiiiiiiiiiittttt", &a)) {

         int offset = signExtend(imm, 19)<<2;

         snprintf(buff, sizeof(buff), "CBZ %s, #%+d\t; %p", Xt[Rt], offset, (void*)(addr + offset));

diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index 5c7cff03..6b5ebbf9 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -326,6 +326,76 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             }
             break;
 
+        case 0xC1:
+            nextop = F8;
+            switch((nextop>>3)&7) {
+                case 0:
+                    INST_NAME("ROL Ed, Ib");
+                    SETFLAGS(X_OF|X_CF, SF_SUBSET);
+                    GETED(1);
+                    u8 = (F8)&(rex.w?0x3f:0x1f);
+                    emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4);
+                    if(u8) { WBACK; }
+                    break;
+                case 1:
+                    INST_NAME("ROR Ed, Ib");
+                    SETFLAGS(X_OF|X_CF, SF_SUBSET);
+                    GETED(1);
+                    u8 = (F8)&(rex.w?0x3f:0x1f);
+                    emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4);
+                    if(u8) { WBACK; }
+                    break;
+                case 2:
+                    INST_NAME("RCL Ed, Ib");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEDW(x4, x1, 1);
+                    u8 = F8;
+                    MOV32w(x2, u8);
+                    CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4);
+                    WBACK;
+                    break;
+                case 3:
+                    INST_NAME("RCR Ed, Ib");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEDW(x4, x1, 1);
+                    u8 = F8;
+                    MOV32w(x2, u8);
+                    CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4);
+                    WBACK;
+                    break;
+                case 4:
+                case 6:
+                    INST_NAME("SHL Ed, Ib");
+                    SETFLAGS(X_ALL, SF_SET);    // some flags are left undefined
+                    GETED(1);
+                    u8 = (F8)&(rex.w?0x3f:0x1f);
+                    emit_shl32c(dyn, ninst, rex, ed, u8, x3, x4);
+                    WBACK;
+                    break;
+                case 5:
+                    INST_NAME("SHR Ed, Ib");
+                    SETFLAGS(X_ALL, SF_SET);    // some flags are left undefined
+                    GETED(1);
+                    u8 = (F8)&(rex.w?0x3f:0x1f);
+                    emit_shr32c(dyn, ninst, rex, ed, u8, x3, x4);
+                    if(u8) {
+                        WBACK;
+                    }
+                    break;
+                case 7:
+                    INST_NAME("SAR Ed, Ib");
+                    SETFLAGS(X_ALL, SF_SET);    // some flags are left undefined
+                    GETED(1);
+                    u8 = (F8)&(rex.w?0x3f:0x1f);
+                    emit_sar32c(dyn, ninst, rex, ed, u8, x3, x4);
+                    if(u8) {
+                        WBACK;
+                    }
+                    break;
+            }
+            break;
         case 0xC2:
             INST_NAME("RETN");
             //SETFLAGS(X_ALL, SF_SET);    // Hack, set all flags (to an unknown state...)
@@ -423,6 +493,34 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
             }
             break;
+        case 0xE9:
+        case 0xEB:
+            BARRIER(1);
+            if(opcode==0xE9) {
+                INST_NAME("JMP Id");
+                i32 = F32S;
+            } else {
+                INST_NAME("JMP Ib");
+                i32 = F8S;
+            }
+            JUMP(addr+i32);
+            if(dyn->insts) {
+                PASS2IF(dyn->insts[ninst].x64.jmp_insts==-1, 1) {
+                    // out of the block
+                    jump_to_next(dyn, addr+i32, 0, ninst);
+                } else {
+                    // inside the block
+                    tmp = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->arm_size);
+                    if(tmp==4) {
+                        NOP;
+                    } else {
+                        B(tmp);
+                    }
+                }
+            }
+            *need_epilog = 0;
+            *ok = 0;
+            break;
 
         case 0xFF:
             nextop = F8;
diff --git a/src/dynarec/dynarec_arm64_emit_shift.c b/src/dynarec/dynarec_arm64_emit_shift.c
new file mode 100755
index 00000000..fef4b4e5
--- /dev/null
+++ b/src/dynarec/dynarec_arm64_emit_shift.c
@@ -0,0 +1,461 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_arm64.h"
+#include "dynarec_arm64_private.h"
+#include "arm64_printer.h"
+#include "../tools/bridge_private.h"
+
+#include "dynarec_arm64_functions.h"
+#include "dynarec_arm64_helper.h"
+
+// emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3 and s4 as scratch. s3 can be same as s2
+void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
+{
+    int32_t j32;
+    MAYUSE(j32);
+
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_shl64:d_shl32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    IFX(F_OF) {
+        CMPSxw_U12(s2, 0);
+        IFX(F_OF) {
+            Bcond(cNE, +8);
+            BFCx(xFlags, F_OF, 1);
+        }
+        IFX(X_PEND) {
+            Bcond(cNE, +8);
+            STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        B_NEXT(cEQ);
+    }
+    IFX(X_CF | X_OF) {
+        MOV32w(s4, rex.w?64:32);
+        SUBxw_REG(s4, s4, s2);
+        LSRxw_REG(s4, s1, s4);
+        BFIw(xFlags, s4, F_CF, 1);
+    }
+    LSLxw_REG(s1, s1, s2);
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        TSTw_REG(s1, s1);
+        BFCx(xFlags, F_ZF, 1);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
+    }
+    IFX(X_SF) {
+        LSRxw(s4, s1, (rex.w)?63:31);
+        BFIx(xFlags, s4, F_SF, 1);
+    }
+    IFX(X_OF) {
+        CMPSxw_U12(s2, 1);   // if s3==1
+            IFX(X_SF) {} else {LSRxw(s4, s1, (rex.w)?63:31);}
+            BFCw(xFlags, F_OF, 1);
+            Bcond(cNE, +12);
+            EORxw_REG(s4, s4, xFlags);  // CF is set if OF is asked
+            BFIw(xFlags, s4, F_OF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SHL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4)
+{
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_shl64:d_shl32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    if(c==0) {
+        IFX(F_OF) {
+            BFCx(xFlags, F_OF, 1);
+        }
+        IFX(X_PEND) {
+            STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    IFX(X_CF) {
+        LSRxw(s3, s1, (rex.w?64:32)-c);
+        BFIxw(xFlags, s3, F_CF, 1);
+    }
+    LSLxw(s1, s1, c);
+
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        TSTw_REG(s1, s1);
+        BFCx(xFlags, F_ZF, 1);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
+    }
+    IFX(X_SF) {
+        LSRxw(s4, s1, (rex.w)?63:31);
+        BFIx(xFlags, s4, F_SF, 1);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            IFX(X_SF) {} else {LSRxw(s4, s1, (rex.w)?63:31);}
+            Bcond(cNE, +12);
+            EORxw_REG(s4, s4, xFlags);  // CF is set if OF is asked
+            BFIw(xFlags, s4, F_OF, 1);
+        } else {
+            BFCw(xFlags, F_OF, 1);
+        }
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SHR32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, s2 can be same as s3
+void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
+{
+    int32_t j32;
+    MAYUSE(j32);
+
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_shr64:d_shr32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    IFX(X_ALL) {
+        CMPSxw_U12(s2, 0); //if(!c)
+            IFX(X_PEND) {
+                Bcond(cNE, +12);
+                STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+            }
+            B_NEXT(cEQ);
+    }
+    LSRxw_REG(s1, s1, s2);
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        TSTw_REG(s1, s1);
+        BFCx(xFlags, F_ZF, 1);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
+    }
+    IFX(X_CF) {
+        SUBxw_U12(s3, s2, 1);
+        LSRxw_REG(s3, s1, s3);
+        BFIw(xFlags, s3, 0, 1);
+    }
+    IFX(X_SF) {
+        LSRxw(s4, s1, (rex.w)?63:31);
+        BFIx(xFlags, s4, F_SF, 1);
+    }
+    IFX(X_OF) {
+        CMPSxw_U12(s2, 1);   // if s3==1
+            Bcond(cNE, 4+3*4);
+            if(rex.w) {
+                LSRx(s4, s1, 62);
+            } else {
+                LSRw(s4, s1, 30);
+            }
+            EORw_mask(s4, s4, 0, 0);  // CF is set if OF is asked
+            BFIw(xFlags, s4, F_OF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4)
+{
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_shr64:d_shr32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    LSRxw(s1, s1, c);
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        TSTw_REG(s1, s1);
+        BFCx(xFlags, F_ZF, 1);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
+    }
+    IFX(X_CF) {
+        LSRxw_REG(s3, s1, c-1);
+        BFIw(xFlags, s3, 0, 1);
+    }
+    IFX(X_SF) {
+        LSRxw(s4, s1, (rex.w)?63:31);
+        BFIx(xFlags, s4, F_SF, 1);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            LSRxw(s4, s1, rex.w?62:30);
+            EORw_mask(s4, s4, 0, 0);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4)
+{
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_sar64:d_sar32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    ASRxw(s1, s1, c);
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        TSTw_REG(s1, s1);
+        BFCx(xFlags, F_ZF, 1);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
+    }
+    IFX(X_CF) {
+        ASRxw(s3, s1, c-1);
+        BFIw(xFlags, s3, 0, 1);
+    }
+    IFX(X_SF) {
+        LSRxw(s4, s1, (rex.w)?63:31);
+        BFIx(xFlags, s4, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4)
+{
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_rol32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    RORxw(s1, s1, (rex.w?64:32)-c);
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF) {
+        BFIw(xFlags, s1, F_CF, 1);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            ADDxw_REG_LSR(s3, s1, s1, rex.w?63:31);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
+    }
+}
+
+// emit ROR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4)
+{
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_ror64:d_ror32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    RORxw(s1, s1, c);
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF) {
+        LSRxw(s3, s1, rex.w?63:31);
+        BFIw(xFlags, s3, F_CF, 1);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            LSRxw(s3, s1, rex.w?62:30);
+            EORxw_REG_LSR(s3, s3, s3, 1);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
+    }
+}
+
+// emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
+//void emit_shrd32c(dynarec_arm_t* dyn, int ninst, int s1, int s2, int32_t c, int s3, int s4)
+//{
+//    c&=0x1f;
+//    IFX(X_PEND) {
+//        MOVW(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        // same flags calc as shr32
+//        SET_DF(s4, d_shr32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s4);
+//    }
+//    if(!c) {
+//        IFX(X_PEND) {
+//            STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//        }
+//        return;
+//    }
+//    IFX(X_CF) {
+//        MOVS_REG_LSR_IMM5(s1, s1, c);
+//    } else {
+//        MOV_REG_LSR_IMM5(s1, s1, c);
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_CF) {
+//        ORR_IMM8_COND(cCS, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_ZF) {
+//        ORRS_REG_LSL_IMM5(s1, s1, s2, 32-c);
+//    } else {
+//        ORR_REG_LSL_IMM5(s1, s1, s2, 32-c);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_OF) {
+//        if(c==1) {
+//            MOV_REG_LSR_IMM5(s4, s1, 30);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);
+//        }
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+//void emit_shld32c(dynarec_arm_t* dyn, int ninst, int s1, int s2, int32_t c, int s3, int s4)
+//{
+//    c&=0x1f;
+//    IFX(X_PEND) {
+//        MOVW(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        // same flags computation as with shl32
+//        SET_DF(s4, d_shl32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s4);
+//    }
+//    if(c==0) {
+//        IFX(F_OF) {
+//            BFC(xFlags, F_OF, 1);
+//        }
+//        IFX(X_PEND) {
+//            STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//        }
+//        return;
+//    }
+//    IFX(X_CF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 32-c);
+//        BFI(xFlags, s3, F_CF, 1);
+//    }
+//    IFX(X_OF) {
+//        MOVS_REG_LSL_IMM5(s1, s1, c);
+//    } else {
+//        MOV_REG_LSL_IMM5(s1, s1, c);
+//    }
+//    IFX(X_OF) {
+//        if(c==1) {
+//            UBFX(s3, s2, 0, 1);
+//            XOR_IMM8_COND(cCS, s3, s3, 1);
+//            BFI(xFlags, s3, F_OF, 1);
+//        } else {
+//            BFC(xFlags, F_OF, 1);
+//        }
+//    }
+//    IFX(X_ZF) {
+//        ORRS_REG_LSR_IMM5(s1, s1, s2, 32-c);
+//    } else {
+//        ORR_REG_LSR_IMM5(s1, s1, s2, 32-c);
+//    }
+//
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c
index 07c744e8..2f257929 100755
--- a/src/dynarec/dynarec_arm64_helper.c
+++ b/src/dynarec/dynarec_arm64_helper.c
@@ -327,10 +327,12 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst)
     BR(x2);
 }
 
-void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags)
+void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg)
 {
+    if(savereg==0)
+        savereg = 7;
     if(ret!=-2) {
-        STRx_S9_preindex(xEmu, xSP, -16);   // ARM64 stack needs to be 16byte aligned
+        STPx_S7_preindex(xEmu, savereg, xSP, -16);   // ARM64 stack needs to be 16byte aligned
     }
     fpu_pushcache(dyn, ninst, reg);
     if(saveflags) {
@@ -343,7 +345,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
         MOVx_REG(ret, xEmu);
     }
     if(ret!=-2) {
-        LDRx_S9_postindex(xEmu, xSP, 16);
+        LDPx_S7_postindex(xEmu, savereg, xSP, 16);
     }
     if(saveflags) {
         LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
@@ -366,7 +368,7 @@ void grab_tlsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg)
     SUBx_REG(t1, t1, t2);
     CBZx_MARKSEG(t1);
     MOVZw(x1, _GS);
-    call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 1);
+    call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 1, 0);
     MARKSEG;
     MESSAGE(LOG_DUMP, "----TLSData\n");
 }
@@ -383,7 +385,7 @@ void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg)
     LDRx_U12(reg, xEmu, offsetof(x64emu_t, segs_offs[_FS]));
     CBZx_MARKSEG(t2);
     MOVZw(x1, _FS);
-    call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 1);
+    call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 1, 0);
     MARKSEG;
     MESSAGE(LOG_DUMP, "----FS: Offset\n");
 }
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index 43861e39..6cad1fd9 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -65,14 +65,14 @@
                     ed = hint;                            \
                 }
 //GETEDW can use hint for wback and ret for ed. wback is 0 if ed is xEAX..xEDI
-#define GETEDW(hint, ret)   if((nextop&0xC0)==0xC0) {   \
-                    ed = xEAX+(nextop&7);   \
-                    MOV_REG(ret, ed);       \
-                    wback = 0;              \
-                } else {                    \
-                    addr = geted(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 4095, 0); \
-                    ed = ret;               \
-                    LDR_IMM9(ed, wback, fixedaddress); \
+#define GETEDW(hint, ret, D)   if((nextop&0xC0)==0xC0) {\
+                    ed = xRAX+(nextop&7)+(rex.b<<3);    \
+                    MOVxw_REG(ret, ed);                 \
+                    wback = 0;                          \
+                } else {                                \
+                    addr = geted(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \
+                    ed = ret;                           \
+                    LDRxw_U12(ed, wback, fixedaddress); \
                 }
 // Write back ed in wback (if wback not 0)
 #define WBACK       if(wback) {STRxw_U12(ed, wback, fixedaddress);}
@@ -81,7 +81,7 @@
 // Write back ed in wback (if wback not 0)
 #define WBACKw      if(wback) {STRw_U12(ed, wback, fixedaddress);}
 // Send back wb to either ed or wback
-#define SBACK(wb)   if(wback) {STR_IMM9(wb, wback, fixedaddress);} else {MOV_REG(ed, wb);}
+#define SBACK(wb)   if(wback) {STRxw(wb, wback, fixedaddress);} else {MOVxw_REG(ed, wb);}
 //GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
 #define GETEDO(O)   if((nextop&0xC0)==0xC0) {   \
                     ed = xEAX+(nextop&7)+(rex.b<<3);   \
@@ -209,13 +209,13 @@
 
 // CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2
-#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1)
+#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0)
 // CALL_ will use x3 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2
-#define CALL_(F, ret) call_c(dyn, ninst, F, x3, ret, 1)
+#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x3, ret, 1, reg)
 // CALL_S will use x3 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2. Flags are not save/restored
-#define CALL_S(F, ret) call_c(dyn, ninst, F, x3, ret, 0)
+#define CALL_S(F, ret) call_c(dyn, ninst, F, x3, ret, 0, 0)
 
 #define MARK    if(dyn->insts) {dyn->insts[ninst].mark = (uintptr_t)dyn->arm_size;}
 #define GETMARK ((dyn->insts)?dyn->insts[ninst].mark:(dyn->arm_size+4))
@@ -388,7 +388,7 @@
             j32 = (GETMARKF)-(dyn->arm_size);           \
             CBZw(x3, j32);                              \
         }                                               \
-        CALL_(UpdateFlags, -1);                         \
+        CALL_(UpdateFlags, -1, 0);                      \
         MARKF;                                          \
         dyn->state_flags = SF_SET;                      \
         SET_DFOK();                                     \
@@ -610,7 +610,7 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst);
 void ret_to_epilog(dynarec_arm_t* dyn, int ninst);
 void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n);
 void iret_to_epilog(dynarec_arm_t* dyn, int ninst);
-void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags);
+void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
 //void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg);
 //void grab_tlsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg);
 //void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
@@ -673,13 +673,13 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
 //void emit_neg32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
 //void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
 //void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
-//void emit_shl32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
-//void emit_shl32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
-//void emit_shr32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
-//void emit_shr32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
-//void emit_sar32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
-//void emit_rol32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
-//void emit_ror32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4);
+void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4);
+void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4);
+void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4);
+void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4);
 //void emit_shrd32c(dynarec_arm_t* dyn, int ninst, int s1, int s2, int32_t c, int s3, int s4);
 //void emit_shld32c(dynarec_arm_t* dyn, int ninst, int s1, int s2, int32_t c, int s3, int s4);