about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-10-20 21:47:21 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-10-20 21:47:21 +0200
commitfc7d204d4bca211f2462a0534e77f0929f121330 (patch)
tree682dc885fbd4572c0cd96dd23dab25ac07d13855
parent2153e7098ccaa287f9377914be20219dad9a3044 (diff)
downloadbox64-fc7d204d4bca211f2462a0534e77f0929f121330.tar.gz
box64-fc7d204d4bca211f2462a0534e77f0929f121330.zip
[ARM64_DYNAREC] More work around native flags handling
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c11
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c59
-rw-r--r--src/dynarec/arm64/dynarec_arm64_64.c60
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66.c38
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c43
-rw-r--r--src/dynarec/arm64/dynarec_arm64_67.c8
-rw-r--r--src/dynarec/arm64/dynarec_arm64_67_32.c8
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_shift.c53
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f0.c22
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h105
10 files changed, 242 insertions, 165 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 53dec770..5a55e015 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -2753,13 +2753,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     UFLAG_IF {
                         UFLAG_DF(x2, d_none);
                     }
-                    MOV64xw(x4, (rex.w?64:32));
-                    SUBx_REG(x3, x4, x3);
                     GETED(0);
                     UFLAG_IF {
                         if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
                         CBZw_NEXT(x3);
                     }
+                    MOV64xw(x4, (rex.w?64:32));
+                    SUBx_REG(x3, x4, x3);
                     RORxw_REG(ed, ed, x3);
                     WBACK;
                     UFLAG_IF {  // calculate flags directly
@@ -2884,7 +2884,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 7:
                     INST_NAME("SAR Ed, CL");
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
                     if(box64_dynarec_safeflags>1)
                         MAYSETFLAGS();
                     if(rex.w) {
@@ -2897,11 +2897,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
                         CBZw_NEXT(x3);
                     }
-                    UFLAG_OP12(ed, x3);
-                    ASRxw_REG(ed, ed, x3);
+                    emit_sar32(dyn, ninst, rex, ed, x3, x5, x4);
                     WBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, rex.w?d_sar64:d_sar32);
                     break;
             }
             break;
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 79c0c916..3969a0cf 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -69,8 +69,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             switch(nextop) {

                 case 0xD0:

                     INST_NAME("XGETBV");

-                    CMPSw_REG(xRCX, xZR);

-                    B_MARK(cEQ);

+                    CBZw_MARK(xRCX);

                     UDF(0);

                     MARK;

                     MOV32w(xRAX, 0b111);

@@ -154,8 +153,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             CALL_S(x64Syscall, -1);

             LOAD_XEMU_CALL(xRIP);

             TABLE64(x3, addr); // expected return address

-            CMPSx_REG(xRIP, x3);

-            B_MARK(cNE);

+            SUBx_REG(x3, x3, xRIP);

+            CBNZx_MARK(x3);

             LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit));

             CBZw_NEXT(w1);

             MARK;

@@ -1679,11 +1678,11 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             GETED(0);

             if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}

             if(rex.w) {

-                ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f

+                ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f

             } else {

-                ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f

+                ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f

             }

-            B_NEXT(cEQ);

+            CBZw_NEXT(x3);

             emit_shld32(dyn, ninst, rex, ed, gd, x3, x5, x4);

             WBACK;

             break;

@@ -1760,11 +1759,11 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             GETED(0);

             if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}

             if(rex.w) {

-                ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f

+                ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f

             } else {

-                ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f

+                ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f

             }

-            B_NEXT(cEQ);

+            CBZw_NEXT(x3);

             emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4);

             WBACK;

             break;

@@ -1936,20 +1935,16 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 wback = 0;

                 UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);}

                 MOVxw_REG(x1, ed);  // save value

-                CMPSxw_REG(xRAX, x1);

-                if(rex.w) {

-                    CSELxw(ed, gd, ed, cEQ);

-                } else {

-                    B_MARK2(cNE);

-                    MOVw_REG(ed, gd);

-                    MARK2;

-                }

+                SUBxw_REG(x4, xRAX, x1);

+                CBNZxw_MARK2(x4);

+                MOVxw_REG(ed, gd);

+                MARK2;

             } else {

                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);

                 LDxw(x1, wback, fixedaddress);

                 UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);}

-                CMPSxw_REG(xRAX, x1);

-                B_MARK(cNE);

+                SUBxw_REG(x4, xRAX, x1);

+                CBNZxw_MARK(x4);

                 // EAX == Ed

                 STxw(gd, wback, fixedaddress);

                 MARK;

@@ -2167,8 +2162,12 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             nextop = F8;

             GETED(0);

             GETGD;

-            TSTxw_REG(ed, ed);

-            B_MARK(cEQ);

+            IFX(X_ZF) {

+                TSTxw_REG(ed, ed);

+                B_MARK(cEQ);

+            } else {

+                CBZxw_MARK(ed);

+            }

             RBITxw(x1, ed);   // reverse

             CLZxw(gd, x1);    // x2 gets leading 0 == BSF

             MARK;

@@ -2186,8 +2185,12 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             nextop = F8;

             GETED(0);

             GETGD;

-            TSTxw_REG(ed, ed);

-            B_MARK(cEQ);

+            IFX(X_ZF) {

+                TSTxw_REG(ed, ed);

+                B_MARK(cEQ);

+            } else {

+                CBZxw_MARK(ed);

+            }

             CLZxw(gd, ed);    // x2 gets leading 0

             SUBxw_U12(gd, gd, rex.w?63:31);

             NEGxw_REG(gd, gd);   // complement

@@ -2411,18 +2414,20 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 B_MARK(cNE);    // EAX!=ED[0] || EDX!=Ed[1]

                 STPxw_S7_offset(xRBX, xRCX, wback, 0);

                 UFLAG_IF {

-                    MOV32w(x1, 1);

+                    IFNATIVE(NF_EQ) {} else {MOV32w(x1, 1);}

                 }

                 B_MARK3_nocond;

                 MARK;

                 MOVxw_REG(xRAX, x2);

                 MOVxw_REG(xRDX, x3);

                 UFLAG_IF {

-                    MOV32w(x1, 0);

+                    IFNATIVE(NF_EQ) {} else {MOV32w(x1, 0);}

                 }

                 MARK3;

                 UFLAG_IF {

-                    BFIw(xFlags, x1, F_ZF, 1);

+                    IFNATIVE(NF_EQ) {} else {

+                        BFIw(xFlags, x1, F_ZF, 1);

+                    }

                 }

                 SMWRITE();

                 break;

diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c
index 956ee565..de3a4041 100644
--- a/src/dynarec/arm64/dynarec_arm64_64.c
+++ b/src/dynarec/arm64/dynarec_arm64_64.c
@@ -1015,22 +1015,21 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     INST_NAME("ROL Ed, CL");
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
                     if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     MOV64xw(x4, (rex.w?64:32));
                     SUBx_REG(x3, x4, x3);
                     GETEDO(x6, 0);
                     if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    CBZw_NEXT(x3);
                     RORxw_REG(ed, ed, x3);
                     WBACKO(x6);
                     UFLAG_IF {  // calculate flags directly
-                        CMPSw_U12(x3, rex.w?63:31);
-                        B_MARK(cNE);
-                            LSRxw(x4, ed, rex.w?63:31);
-                            ADDxw_REG(x4, x4, ed);
+                        SUBw_U12(x4, x3, rex.w?63:31);
+                        CBNZw_MARK(x4);
+                            EORw_REG_LSR(x4, ed, ed, rex.w?63:31);
                             BFIw(xFlags, x4, F_OF, 1);
                         MARK;
                         BFIw(xFlags, ed, F_CF, 1);
@@ -1041,18 +1040,18 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     INST_NAME("ROR Ed, CL");
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
                     if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETEDO(x6, 0);
                     if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    CBZw_NEXT(x3);
                     RORxw_REG(ed, ed, x3);
                     WBACKO(x6);
                     UFLAG_IF {  // calculate flags directly
-                        CMPSw_U12(x3, 1);
-                        B_MARK(cNE);
+                        SUBw_U12(x2, x3, 1);
+                        CBNZw_MARK(x2);
                             LSRxw(x2, ed, rex.w?62:30); // x2 = d>>30
                             EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1))
                             BFIw(xFlags, x2, F_OF, 1);
@@ -1068,14 +1067,14 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     READFLAGS(X_CF);
                     SETFLAGS(X_OF|X_CF, SF_SET_DF);
                     if(rex.w) {
-                        ANDSx_mask(x2, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        ANDx_mask(x2, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
                     } else {
-                        ANDSw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETEDO(x6, 0);
                     if(wback) {ADDx_REG(x6, x6, wback); wback=x6;}
                     if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    CBZw_NEXT(x2);
                     CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x6);
                     WBACK;
                     break;
@@ -1085,14 +1084,14 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     READFLAGS(X_CF);
                     SETFLAGS(X_OF|X_CF, SF_SET_DF);
                     if(rex.w) {
-                        ANDSx_mask(x2, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        ANDx_mask(x2, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
                     } else {
-                        ANDSw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETEDO(x6, 0);
                     if(wback) {ADDx_REG(x6, x6, wback); wback=x6;}
                     if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    CBZw_NEXT(x2);
                     CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x6);
                     WBACK;
                     break;
@@ -1101,13 +1100,13 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     INST_NAME("SHL Ed, CL");
                     SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
                     if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETEDO(x6, 0);
                     if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    CBZw_NEXT(x3);
                     emit_shl32(dyn, ninst, rex, ed, x3, x5, x4);
                     WBACKO(x6);
                     break;
@@ -1115,32 +1114,29 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     INST_NAME("SHR Ed, CL");
                     SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
                     if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETEDO(x6, 0);
                     if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    CBZw_NEXT(x3);
                     emit_shr32(dyn, ninst, rex, ed, x3, x5, x4);
                     WBACKO(x6);
                     break;
                 case 7:
                     INST_NAME("SAR Ed, CL");
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
                     if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETEDO(x6, 0);
                     if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
-                    UFLAG_OP12(ed, x3);
-                    ASRxw_REG(ed, ed, x3);
+                    CBZw_NEXT(x3);
+                    emit_sar32(dyn, ninst, rex, ed, x3, x5, x4);
                     WBACKO(x6);
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, rex.w?d_sar64:d_sar32);
                     break;
             }
             break;
diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c
index aa1333e9..8a270619 100644
--- a/src/dynarec/arm64/dynarec_arm64_66.c
+++ b/src/dynarec/arm64/dynarec_arm64_66.c
@@ -581,8 +581,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 GETGD;

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);

                 if(!ALIGNED_ATOMICH) {

-                    TSTx_mask(ed, 1, 0, 0);    // mask=1

-                    B_MARK(cNE);

+                    TBNZ_MARK(ed, 0);

                 }

                 if(arm64_atomics) {

                     SWPALH(gd, x1, ed);

@@ -1161,8 +1160,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     if(box64_dynarec_safeflags>1)

                         MAYSETFLAGS();

                     UFLAG_IF {

-                        TSTw_mask(xRCX, 0, 0b00100);  //mask=0x00000001f

-                        B_NEXT(cEQ);

+                        ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

+                        CBZw_NEXT(x2);

                     }

                     ANDw_mask(x2, xRCX, 0, 0b00011);  //mask=0x00000000f

                     MOV32w(x4, 16);

@@ -1172,9 +1171,9 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     LSRw_REG(ed, ed, x2);

                     EWBACK;

                     UFLAG_IF {  // calculate flags directly

-                        CMPSw_U12(x2, 15);

-                        B_MARK(cNE);

-                            ADDxw_REG_LSR(x3, ed, ed, 15);

+                        SUBw_U12(x2, x2, 15);

+                        CBNZw_MARK(x2);

+                            EORw_REG_LSR(x3, ed, ed, 15);

                             BFIw(xFlags, x3, F_OF, 1);

                         MARK;

                         BFIw(xFlags, ed, F_CF, 1);

@@ -1187,8 +1186,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     if(box64_dynarec_safeflags>1)

                         MAYSETFLAGS();

                     UFLAG_IF {

-                        TSTw_mask(xRCX, 0, 0b00100);  //mask=0x00000001f

-                        B_NEXT(cEQ);

+                        ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

+                        CBZw_NEXT(x2);

                     }

                     ANDw_mask(x2, xRCX, 0, 0b00011);  //mask=0x00000000f

                     GETEW(x1, 0);

@@ -1196,8 +1195,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     LSRw_REG(ed, ed, x2);

                     EWBACK;

                     UFLAG_IF {  // calculate flags directly

-                        CMPSw_U12(x2, 1);

-                        B_MARK(cNE);

+                        SUBw_U12(x2, x2, 1);

+                        CBNZw_MARK(x2);

                             LSRxw(x2, ed, 14); // x2 = d>>14

                             EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>14) ^ ((d>>14)>>1))

                             BFIw(xFlags, x2, F_OF, 1);

@@ -1236,11 +1235,9 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined

                     if(box64_dynarec_safeflags>1)

                         MAYSETFLAGS();

+                    ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

                     UFLAG_IF {

-                        ANDSw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

-                        B_NEXT(cEQ);

-                    } else {

-                        ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

+                        CBZw_NEXT(x2);

                     }

                     GETEW(x1, 0);

                     emit_shl16(dyn, ninst, x1, x2, x5, x4);

@@ -1251,11 +1248,9 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined

                     if(box64_dynarec_safeflags>1)

                         MAYSETFLAGS();

+                    ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

                     UFLAG_IF {

-                        ANDSw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

-                        B_NEXT(cEQ);

-                    } else {

-                        ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

+                        CBZw_NEXT(x2);

                     }

                     GETEW(x1, 0);

                     emit_shr16(dyn, ninst, x1, x2, x5, x4);

@@ -1266,11 +1261,10 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined

                     if(box64_dynarec_safeflags>1)

                         MAYSETFLAGS();

+                    ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

                     UFLAG_IF {

-                        ANDSw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

-                        B_NEXT(cEQ);

+                        CBZw_NEXT(x2);

                     } else {

-                        ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f

                     }

                     GETSEW(x1, 0);

                     emit_sar16(dyn, ninst, x1, x2, x5, x4);

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 8a385ca4..2ddb2212 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -2344,11 +2344,9 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 MAYSETFLAGS();

             GETGW(x2);

             GETEW(x1, 0);

+            ANDw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

             UFLAG_IF {

-                ANDSw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

-                B_NEXT(cEQ);

-            } else {

-                ANDw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

+                CBZw_NEXT(x4);

             }

             emit_shld16(dyn, ninst, ed, gd, x4, x5, x6);

             EWBACK;

@@ -2401,11 +2399,10 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 MAYSETFLAGS();

             GETGW(x2);

             GETEW(x1, 0);

+            ANDw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

             UFLAG_IF {

-                ANDSw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

-                B_NEXT(cEQ);

+                CBZw_NEXT(x4);

             } else {

-                ANDw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

             }

             emit_shrd16(dyn, ninst, ed, gd, x4, x5, x6);

             EWBACK;

@@ -2586,14 +2583,22 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETGD;

             GETEW(x1, 0);  // Get EW

-            TSTw_REG(x1, x1);

-            B_MARK(cEQ);

+            IFX(X_ZF) {

+                TSTw_REG(x1, x1);

+                B_MARK(cEQ);

+            } else {

+                CBZw_MARK(x1);

+            }

             RBITw(x1, x1);   // reverse

             CLZw(x2, x1);    // x2 gets leading 0 == BSF

             BFIx(gd, x2, 0, 16);

             MARK;

-            CSETw(x1, cEQ);    //ZF not set

-            BFIw(xFlags, x1, F_ZF, 1);

+            IFX(X_ZF) {

+                IFNATIVE(NF_EQ) {} else {

+                    CSETw(x1, cEQ);    //ZF not set

+                    BFIw(xFlags, x1, F_ZF, 1);

+                }

+            }

             break;

         case 0xBD:

             INST_NAME("BSR Gw,Ew");

@@ -2602,16 +2607,24 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETGD;

             GETEW(x1, 0);  // Get EW

-            TSTw_REG(x1, x1);   // Don't use CBZ here, as the flag is reused later

-            B_MARK(cEQ);

+            IFX(X_ZF) {

+                TSTw_REG(x1, x1);   // Don't use CBZ here, as the flag is reused later

+                B_MARK(cEQ);

+            } else {

+                CBZw_MARK(x1);

+            }

             LSLw(x1, x1, 16);   // put bits on top

             CLZw(x2, x1);       // x2 gets leading 0

             SUBw_U12(x2, x2, 15);

             NEGw_REG(x2, x2);   // complement

             BFIx(gd, x2, 0, 16);

             MARK;

-            CSETw(x1, cEQ);    //ZF not set

-            BFIw(xFlags, x1, F_ZF, 1);

+            IFX(X_ZF) {

+                IFNATIVE(NF_EQ) {} else {

+                    CSETw(x1, cEQ);    //ZF not set

+                    BFIw(xFlags, x1, F_ZF, 1);

+                }

+            }

             break;

         case 0xBE:

             INST_NAME("MOVSX Gw, Eb");

diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c
index 111bcf8e..94202ba1 100644
--- a/src/dynarec/arm64/dynarec_arm64_67.c
+++ b/src/dynarec/arm64/dynarec_arm64_67.c
@@ -1321,9 +1321,9 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             READFLAGS(X_ZF);

             i8 = F8S;

             MOVw_REG(x1, xRCX);

-            SUBSw_U12(x1, x1, 1);

+            SUBw_U12(x1, x1, 1);

             BFIx(xRCX, x1, 0, 32);

-            B_NEXT(cEQ);    // ECX is 0, no LOOP

+            CBZw_NEXT(x1);    // ECX is 0, no LOOP

             TSTw_mask(xFlags, 0b011010, 0); //mask=0x40

             GO(cNE, cEQ);

             break;

@@ -1332,9 +1332,9 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             READFLAGS(X_ZF);

             i8 = F8S;

             MOVw_REG(x1, xRCX);

-            SUBSw_U12(x1, x1, 1);

+            SUBw_U12(x1, x1, 1);

             BFIx(xRCX, x1, 0, 32);

-            B_NEXT(cEQ);    // ECX is 0, no LOOP

+            CBZw_NEXT(x1);    // ECX is 0, no LOOP

             TSTw_mask(xFlags, 0b011010, 0); //mask=0x40

             GO(cEQ, cNE);

             break;

diff --git a/src/dynarec/arm64/dynarec_arm64_67_32.c b/src/dynarec/arm64/dynarec_arm64_67_32.c
index 691c470c..61556716 100644
--- a/src/dynarec/arm64/dynarec_arm64_67_32.c
+++ b/src/dynarec/arm64/dynarec_arm64_67_32.c
@@ -92,9 +92,9 @@ uintptr_t dynarec64_67_32(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
             READFLAGS(X_ZF);
             i8 = F8S;
             UXTHw(x1, xRCX);
-            SUBSw_U12(x1, x1, 1);
+            SUBw_U12(x1, x1, 1);
             BFIx(xRCX, x1, 0, 16);
-            B_NEXT(cEQ);    // ECX is 0, no LOOP
+            CBZw_NEXT(x1);    // ECX is 0, no LOOP
             TSTw_mask(xFlags, 0b011010, 0); //mask=0x40
             GO(cNE, cEQ);
             break;
@@ -103,9 +103,9 @@ uintptr_t dynarec64_67_32(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
             READFLAGS(X_ZF);
             i8 = F8S;
             UXTHw(x1, xRCX);
-            SUBSw_U12(x1, x1, 1);
+            SUBw_U12(x1, x1, 1);
             BFIx(xRCX, x1, 0, 16);
-            B_NEXT(cEQ);    // ECX is 0, no LOOP
+            CBZw_NEXT(x1);    // ECX is 0, no LOOP
             TSTw_mask(xFlags, 0b011010, 0); //mask=0x40
             GO(cEQ, cNE);
             break;
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
index c87a43ab..c2d74576 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
@@ -266,6 +266,57 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
     }
 }
 
+// emit SAR32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, s2 can be same as s3
+void emit_sar32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
+{
+    MAYUSE(s2);
+    int64_t j64;
+    MAYUSE(j64);
+
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_sar64:d_sar32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    IFX(X_CF) {
+        SUBxw_U12(s3, s2, 1);
+        ASRxw_REG(s3, s1, s3);
+        BFIw(xFlags, s3, 0, 1);
+    }
+    IFX(X_OF) {
+        BFCw(xFlags, F_OF, 1);
+    }
+    ASRxw_REG(s1, s1, s2);
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
+    IFX(X_ZF) {
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
+    }
+    IFX(X_SF) {
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s4, s1, (rex.w)?63:31);
+            BFIx(xFlags, s4, F_SF, 1);
+        }
+    }
+    if(box64_dynarec_test)
+        IFX(X_AF) {
+            BFCw(xFlags, F_AF, 1);
+        }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s4);
+    }
+}
+
 // emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
 {
@@ -304,7 +355,7 @@ void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
         }
     }
     IFX(X_OF)
-        if(c==1) {
+        if(c==1 || box64_dynarec_test) {
             BFCw(xFlags, F_OF, 1);
     }
     if(box64_dynarec_test)
diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c
index c6ded5d0..4767f36a 100644
--- a/src/dynarec/arm64/dynarec_arm64_f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_f0.c
@@ -96,7 +96,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
             GETGD;
-            if((nextop&0xC0)==0xC0) {
+            if(MODREG) {
                 ed = xRAX+(nextop&7)+(rex.b<<3);
                 emit_add32(dyn, ninst, rex, ed, gd, x3, x4);
             } else {
@@ -271,8 +271,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 wb1 = 0;
                                 ed = x2;
                                 UFLAG_IF {emit_cmp8(dyn, ninst, x6, ed, x3, x4, x5);}
-                                CMPSxw_REG(x6, x2);
-                                B_MARK2(cNE);
+                                SUBxw_REG(x6, x6, x2);
+                                CBNZxw_MARK2(x6);
                                 BFIx(wback, x2, wb2, 8);
                                 MOVxw_REG(ed, gd);
                                 MARK2;
@@ -321,8 +321,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 wback = 0;
                                 UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);}
                                 MOVxw_REG(x1, ed);  // save value
-                                CMPSxw_REG(xRAX, x1);
-                                B_MARK2(cNE);
+                                SUBxw_REG(x3, xRAX, x1);
+                                CBNZxw_MARK2(x3);
                                 MOVxw_REG(ed, gd);
                                 MARK2;
                                 MOVxw_REG(xRAX, x1);
@@ -719,7 +719,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             UFLAG_IF {
                                 CMPSxw_REG(x2, xRAX);
                                 CCMPxw(x3, xRDX, 0, cEQ);
-                                CSETw(x1, cEQ);
+                                IFNATIVE(NF_EQ) {} else {CSETw(x1, cEQ);}
                             }
                             MOVx_REG(xRAX, x2);
                             MOVx_REG(xRDX, x3);
@@ -735,7 +735,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             STLXPxw(x4, xRBX, xRCX, wback);
                             CBNZx_MARKLOCK(x4);
                             UFLAG_IF {
-                                MOV32w(x1, 1);
+                                IFNATIVE(NF_EQ) {} else {MOV32w(x1, 1);}
                             }
                             B_MARK3_nocond;
                             MARK;
@@ -744,7 +744,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             MOVxw_REG(xRAX, x2);
                             MOVxw_REG(xRDX, x3);
                             UFLAG_IF {
-                                MOV32w(x1, 0);
+                                IFNATIVE(NF_EQ) {} else {MOV32w(x1, 0);}
                             }
                             if(!ALIGNED_ATOMICxw) {
                                 B_MARK3_nocond;
@@ -761,7 +761,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             CBNZx_MARK2(x4);
                             STPxw_S7_offset(xRBX, xRCX, wback, 0);
                             UFLAG_IF {
-                                MOV32w(x1, 1);
+                                IFNATIVE(NF_EQ) {} else {MOV32w(x1, 1);}
                             }
                             B_MARK3_nocond;
                             MARKSEG;
@@ -770,13 +770,13 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             MOVxw_REG(xRAX, x2);
                             MOVxw_REG(xRDX, x3);
                             UFLAG_IF {
-                                MOV32w(x1, 0);
+                                IFNATIVE(NF_EQ) {} else {MOV32w(x1, 0);}
                             }
                         }
                         MARK3;
                         SMDMB();
                         UFLAG_IF {
-                            BFIw(xFlags, x1, F_ZF, 1);
+                            IFNATIVE(NF_EQ) {} else {BFIw(xFlags, x1, F_ZF, 1);}
                         }
                         break;
                     default:
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 47c3b806..dc4ad69b 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -752,88 +752,99 @@
 #define GETMARKLOCK dyn->insts[ninst].marklock
 
 // Branch to MARK if cond (use j64)
-#define B_MARK(cond)                \
-    j64 = GETMARK-(dyn->native_size);  \
+#define B_MARK(cond)                    \
+    j64 = GETMARK-(dyn->native_size);   \
     Bcond(cond, j64)
 // Branch to MARK unconditionnal (use j64)
-#define B_MARK_nocond               \
-    j64 = GETMARK-(dyn->native_size);  \
+#define B_MARK_nocond                   \
+    j64 = GETMARK-(dyn->native_size);   \
     B(j64)
 // Branch to MARK if reg is 0 (use j64)
-#define CBZxw_MARK(reg)             \
-    j64 = GETMARK-(dyn->native_size);  \
+#define CBZw_MARK(reg)                  \
+    j64 = GETMARK-(dyn->native_size);   \
+    CBZw(reg, j64)
+// Branch to MARK if reg is 0 (use j64)
+#define CBZxw_MARK(reg)                 \
+    j64 = GETMARK-(dyn->native_size);   \
     CBZxw(reg, j64)
 // Branch to MARK if reg is not 0 (use j64)
-#define CBNZx_MARK(reg)             \
-    j64 = GETMARK-(dyn->native_size);  \
+#define CBNZx_MARK(reg)                 \
+    j64 = GETMARK-(dyn->native_size);   \
     CBNZx(reg, j64)
 // Branch to MARK if reg is not 0 (use j64)
-#define CBNZw_MARK(reg)             \
-    j64 = GETMARK-(dyn->native_size);  \
+#define CBNZxw_MARK(reg)                \
+    j64 = GETMARK-(dyn->native_size);   \
+    CBNZxw(reg, j64)
+// Branch to MARK if reg is not 0 (use j64)
+#define CBNZw_MARK(reg)                 \
+    j64 = GETMARK-(dyn->native_size);   \
     CBNZw(reg, j64)
 // Test bit N of A and branch to MARK if not set
-#define TBZ_MARK(A, N)              \
-    j64 = GETMARK-(dyn->native_size);  \
+#define TBZ_MARK(A, N)                  \
+    j64 = GETMARK-(dyn->native_size);   \
     TBZ(A, N, j64)
 // Test bit N of A and branch to MARK if set
-#define TBNZ_MARK(A, N)             \
-    j64 = GETMARK-(dyn->native_size);  \
+#define TBNZ_MARK(A, N)                 \
+    j64 = GETMARK-(dyn->native_size);   \
     TBNZ(A, N, j64)
 // Branch to MARK2 if cond (use j64)
-#define B_MARK2(cond)               \
-    j64 = GETMARK2-(dyn->native_size); \
+#define B_MARK2(cond)                   \
+    j64 = GETMARK2-(dyn->native_size);  \
     Bcond(cond, j64)
 // Branch to MARK2 unconditionnal (use j64)
-#define B_MARK2_nocond              \
-    j64 = GETMARK2-(dyn->native_size); \
+#define B_MARK2_nocond                  \
+    j64 = GETMARK2-(dyn->native_size);  \
     B(j64)
 // Branch to MARK2 if reg is 0 (use j64)
-#define CBZx_MARK2(reg)             \
-    j64 = GETMARK2-(dyn->native_size); \
+#define CBZx_MARK2(reg)                 \
+    j64 = GETMARK2-(dyn->native_size);  \
     CBZx(reg, j64)
 // Branch to MARK2 if reg is not 0 (use j64)
-#define CBNZx_MARK2(reg)            \
-    j64 = GETMARK2-(dyn->native_size); \
+#define CBNZx_MARK2(reg)                \
+    j64 = GETMARK2-(dyn->native_size);  \
     CBNZx(reg, j64)
+#define CBNZxw_MARK2(reg)               \
+    j64 = GETMARK2-(dyn->native_size);  \
+    CBNZxw(reg, j64)
 // Test bit N of A and branch to MARK2 if set
-#define TBNZ_MARK2(A, N)            \
-    j64 = GETMARK2-(dyn->native_size); \
+#define TBNZ_MARK2(A, N)                \
+    j64 = GETMARK2-(dyn->native_size);  \
     TBNZ(A, N, j64)
 // Branch to MARK3 if cond (use j64)
-#define B_MARK3(cond)               \
-    j64 = GETMARK3-(dyn->native_size); \
+#define B_MARK3(cond)                   \
+    j64 = GETMARK3-(dyn->native_size);  \
     Bcond(cond, j64)
 // Test bit N of A and branch to MARK3 if not set
-#define TBZ_MARK2(A, N)            \
-    j64 = GETMARK2-(dyn->native_size); \
+#define TBZ_MARK2(A, N)                 \
+    j64 = GETMARK2-(dyn->native_size);  \
     TBZ(A, N, j64)
 // Branch to MARK3 unconditionnal (use j64)
-#define B_MARK3_nocond              \
-    j64 = GETMARK3-(dyn->native_size); \
+#define B_MARK3_nocond                  \
+    j64 = GETMARK3-(dyn->native_size);  \
     B(j64)
 // Branch to MARK3 if reg is not 0 (use j64)
-#define CBNZx_MARK3(reg)            \
-    j64 = GETMARK3-(dyn->native_size); \
+#define CBNZx_MARK3(reg)                \
+    j64 = GETMARK3-(dyn->native_size);  \
     CBNZx(reg, j64)
 // Branch to MARK3 if reg is not 0 (use j64)
-#define CBNZw_MARK3(reg)            \
-    j64 = GETMARK3-(dyn->native_size); \
+#define CBNZw_MARK3(reg)                \
+    j64 = GETMARK3-(dyn->native_size);  \
     CBNZw(reg, j64)
 // Branch to MARK3 if reg is 0 (use j64)
-#define CBZx_MARK3(reg)             \
-    j64 = GETMARK3-(dyn->native_size); \
+#define CBZx_MARK3(reg)                 \
+    j64 = GETMARK3-(dyn->native_size);  \
     CBZx(reg, j64)
 // Branch to MARK3 if reg is 0 (use j64)
-#define CBZw_MARK3(reg)             \
-    j64 = GETMARK3-(dyn->native_size); \
+#define CBZw_MARK3(reg)                 \
+    j64 = GETMARK3-(dyn->native_size);  \
     CBZw(reg, j64)
 // Test bit N of A and branch to MARK3 if not set
-#define TBZ_MARK3(A, N)             \
-    j64 = GETMARK3-(dyn->native_size); \
+#define TBZ_MARK3(A, N)                 \
+    j64 = GETMARK3-(dyn->native_size);  \
     TBZ(A, N, j64)
 // Test bit N of A and branch to MARK3 if set
-#define TBNZ_MARK3(A, N)            \
-    j64 = GETMARK3-(dyn->native_size); \
+#define TBNZ_MARK3(A, N)                \
+    j64 = GETMARK3-(dyn->native_size);  \
     TBNZ(A, N, j64)
 // Branch to next instruction if cond (use j64)
 #define B_NEXT(cond)     \
@@ -851,6 +862,14 @@
 #define CBZx_NEXT(reg)    \
     j64 =  (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \
     CBZx(reg, j64)
+// Branch to next instruction if reg is 0 (use j64)
+#define CBZxw_NEXT(reg)    \
+    j64 =  (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \
+    CBZxw(reg, j64)
+// Branch to next instruction if reg is not 0 (use j64)
+#define CBNZw_NEXT(reg)   \
+    j64 =  (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \
+    CBNZw(reg, j64)
 // Branch to next instruction if reg is not 0 (use j64)
 #define CBNZx_NEXT(reg)   \
     j64 =  (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \
@@ -1295,6 +1314,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_shl32c     STEPNAME(emit_shl32c)
 #define emit_shr32      STEPNAME(emit_shr32)
 #define emit_shr32c     STEPNAME(emit_shr32c)
+#define emit_sar32      STEPNAME(emit_sar32)
 #define emit_sar32c     STEPNAME(emit_sar32c)
 #define emit_shl8       STEPNAME(emit_shl8)
 #define emit_shl8c      STEPNAME(emit_shl8c)
@@ -1459,6 +1479,7 @@ void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
 void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
+void emit_sar32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_shl8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);