about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-16 20:32:37 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-16 20:32:37 +0100
commitb553c3f3ac7fe752dca6c27be72bec5969a1c955 (patch)
treeaaff157a1de3962b8190609c49ba70f3a1edde73 /src
parent551542f99a2bcd690f19e1bb17ac16172feef583 (diff)
downloadbox64-b553c3f3ac7fe752dca6c27be72bec5969a1c955.tar.gz
box64-b553c3f3ac7fe752dca6c27be72bec5969a1c955.zip
[DYNAREC] Added 70..7F opcodes (and fixed ORR/AND bitmask coding/decoding, that's some convoluted encoding here)
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h38
-rwxr-xr-xsrc/dynarec/arm64_printer.c59
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c109
-rwxr-xr-xsrc/dynarec/dynarec_arm64_emit_math.c38
-rwxr-xr-xsrc/dynarec/dynarec_arm64_emit_tests.c16
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.c26
-rwxr-xr-xsrc/dynarec/dynarec_arm64_pass.c8
-rwxr-xr-xsrc/emu/x64emu.c7
-rwxr-xr-xsrc/include/regs.h3
-rwxr-xr-xsrc/libtools/signals.c4
10 files changed, 238 insertions, 70 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 7fabde02..f3750b07 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -224,15 +224,16 @@
 #define Bcond(cond, imm19)              EMIT(Bcond_gen(((imm19)>>2)&0x7FFFF, cond))
 
 // AND / ORR
-#define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd)  ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn) | Rd)
-#define ANDx_U13(Rd, Rn, imm13)         EMIT(LOGIC_gen(1, 0b00, ((imm13)>>12)&1, (imm13)&0b111111, ((imm13)>>6)&0b111111, Rn, Rd))
-#define ANDw_U12(Rd, Rn, imm12)         EMIT(LOGIC_gen(0, 0b00, 0, (imm12)&0b111111, ((imm12)>>6)&0b111111, Rn, Rd))
-#define ANDxw_U12(Rd, Rn, imm12)        EMIT(LOGIC_gen(rex.w, 0b00, 0, (imm12)&0b111111, ((imm12)>>6)&0b111111, Rn, Rd))
-#define ANDSx_U13(Rd, Rn, imm13)        EMIT(LOGIC_gen(1, 0b11, ((imm13)>>12)&1, (imm13)&0b111111, ((imm13)>>6)&0b111111, Rn, Rd))
-#define ANDSw_U12(Rd, Rn, imm12)        EMIT(LOGIC_gen(0, 0b11, 0, (imm12)&0b111111, ((imm12)>>6)&0b111111, Rn, Rd))
-#define ORRx_U13(Rd, Rn, imm13)         EMIT(LOGIC_gen(1, 0b01, ((imm13)>>12)&1, (imm13)&0b111111, ((imm13)>>6)&0b111111, Rn, Rd))
-#define ORRw_U12(Rd, Rn, imm12)         EMIT(LOGIC_gen(0, 0b01, 0, (imm12)&0b111111, ((imm12)>>6)&0b111111, Rn, Rd))
-#define ORRxw_U12(Rd, Rn, imm12)        EMIT(LOGIC_gen(rex.w, 0b01, 0, (imm12)&0b111111, ((imm12)>>6)&0b111111, Rn, Rd))
+#define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd)  ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | Rd)
+// logic to get the mask is ... convoluted... list of possible value there: https://gist.github.com/dinfuehr/51a01ac58c0b23e4de9aac313ed6a06a
+#define ANDx_mask(Rd, Rn, N, immr, imms)    EMIT(LOGIC_gen(1, 0b00, N, immr, imms, Rn, Rd))
+#define ANDw_mask(Rd, Rn, immr, imms)       EMIT(LOGIC_gen(0, 0b00, 0, immr, imms, Rn, Rd))
+#define ANDSx_mask(Rd, Rn, N, immr, imms)   EMIT(LOGIC_gen(1, 0b11, N, immr, imms, Rn, Rd))
+#define ANDSw_mask(Rd, Rn, immr, imms)      EMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd))
+#define ORRx_mask(Rd, Rn, N, immr, imms)    EMIT(LOGIC_gen(1, 0b01, N, immr, imms, Rn, Rd))
+#define ORRw_mask(Rd, Rn, immr, imms)       EMIT(LOGIC_gen(0, 0b01, 0, immr, imms, Rn, Rd))
+#define TSTx_mask(Rn, immr, imms)           ANDSx_mask(xZR, Rn, immr, imms)
+#define TSTw_mask(Rn, immr, imms)           ANDSw_mask(wZR, Rn, immr, imms)
 
 #define LOGIC_REG_gen(sf, opc, shift, N, Rm, imm6, Rn, Rd)    ((sf)<<31 | (opc)<<29 | 0b01010<<24 | (shift)<<22 | (N)<<21 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd))
 #define ANDx_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
@@ -243,11 +244,18 @@
 #define ANDSxw_REG(Rd, Rn, Rm)          EMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
 #define ORRx_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, 0, Rn, Rd))
 #define ORRx_REG_LSL(Rd, Rn, Rm, lsl)   EMIT(LOGIC_REG_gen(1, 0b01, lsl, 0, Rm, 0, Rn, Rd))
+#define ORRw_REG_LSL(Rd, Rn, Rm, lsl)   EMIT(LOGIC_REG_gen(0, 0b01, lsl, 0, Rm, 0, Rn, Rd))
 #define ORRxw_REG(Rd, Rn, Rm)           EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b00, 0, Rm, 0, Rn, Rd))
 #define ORRw_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 0, Rm, 0, Rn, Rd))
 #define ORNx_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 1, Rm, 0, Rn, Rd))
 #define ORNw_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 1, Rm, 0, Rn, Rd))
 #define ORNxw_REG(Rd, Rn, Rm)           EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b00, 1, Rm, 0, Rn, Rd))
+#define EORx_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(1, 0b10, 0b00, 0, Rm, 0, Rn, Rd))
+#define EORw_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(0, 0b10, 0b00, 0, Rm, 0, Rn, Rd))
+#define EORxw_REG(Rd, Rn, Rm)           EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b00, 0, Rm, 0, Rn, Rd))
+#define EORx_REG_LSL(Rd, Rn, Rm, imm6)  EMIT(LOGIC_REG_gen(1, 0b10, 0b00, 0, Rm, imm6, Rn, Rd))
+#define EORw_REG_LSL(Rd, Rn, Rm, imm6)  EMIT(LOGIC_REG_gen(0, 0b10, 0b00, 0, Rm, imm6, Rn, Rd))
+#define EORxw_REG_LSL(Rd, Rn, Rm, imm6) EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b00, 0, Rm, imm6, Rn, Rd))
 #define MOVx(Rd, Rm)                    ORRx_REG(Rd, xZR, Rm)
 #define MOVw(Rd, Rm)                    ORRw_REG(Rd, xZR, Rm)
 #define MOVxw(Rd, Rm)                   ORRxw_REG(Rd, xZR, Rm)
@@ -266,6 +274,8 @@
 #define BICx_REG    BICx
 #define BICw_REG    BICw
 #define BICxw_REG   BICxw
+#define TSTx_REG(Rn, Rm)                ANDSx_REG(xZR, Rn, Rm)
+#define TSTw_REG(Rn, Rm)                ANDSw_REG(wZR, Rn, Rm)
 
 
 // BFI
@@ -273,12 +283,10 @@
 #define BFMx(Rd, Rn, immr, imms)        EMIT(BFM_gen(1, 0b01, 1, immr, imms, Rn, Rd))
 #define BFMw(Rd, Rn, immr, imms)        EMIT(BFM_gen(0, 0b01, 0, immr, imms, Rn, Rd))
 #define BFMxw(Rd, Rn, immr, imms)       EMIT(BFM_gen(rex.w, 0b01, rex.w, immr, imms, Rn, Rd))
-#define BFIx(Rd, Rn, lsb, width)        BFMx(Rd, Rn, (-lsb)%64, (width)-1)
-#define BFIw(Rd, Rn, lsb, width)        BFMw(Rd, Rn, (-lsb)%32, (width)-1)
-#define BFIxw(Rd, Rn, lsb, width)       BFMxw(Rd, Rn, (-lsb)%(rex.w?64:32), (width)-1)
-#define BFCx(Rd, Rn, lsb, width)        BFMx(Rd, xZR, (-lsb)%64, (width)-1)
-#define BFCw(Rd, Rn, lsb, width)        BFMw(Rd, xZR, (-lsb)%32, (width)-1)
-#define BFCxw(Rd, Rn, lsb, width)       BFMxw(Rd, xZR, (-lsb)%(rex.w?64:32), (width)-1)
+#define BFIx(Rd, Rn, lsb, width)        BFMx(Rd, Rn, ((-lsb)%64)&0x3f, (width)-1)
+#define BFIw(Rd, Rn, lsb, width)        BFMw(Rd, Rn, ((-lsb)%32)&0x1f, (width)-1)
+#define BFCx(Rd, Rn, lsb, width)        BFMx(Rd, xZR, ((-lsb)%64)&0x3f, (width)-1)
+#define BFCw(Rd, Rn, lsb, width)        BFMw(Rd, xZR, ((-lsb)%32)&0x1f, (width)-1)
 
 // UBFX
 #define UBFM_gen(sf, N, immr, imms, Rn, Rd)    ((sf)<<31 | 0b10<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd))
diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c
index 2a66f553..2e0c62c1 100755
--- a/src/dynarec/arm64_printer.c
+++ b/src/dynarec/arm64_printer.c
@@ -21,6 +21,27 @@ typedef struct arm64_print_s {
     int x, w;

 } arm64_print_t;

 

+uint64_t DecodeBitMasks(int N, int imms, int immr)

+{

+    int len = 31-__builtin_clz(N<<6 | ((~imms)&0b111111));

+    if(len<1) return 0;

+    int levels = (1<<len) - 1;

+    int s = imms & levels;

+    int r = immr & levels;  // this the ROR factor

+    int e = 1<<len; // actual number of bits

+    if(s==levels) return 0;

+    uint64_t mask = (1LL<<(s+1))-1;

+    if(r) { // rotate

+         mask=(mask>>r)|(mask<<(e-r));

+         mask&=((1LL<<e)-1);

+    }

+    while (e<64) {  // replicate

+        mask|=(mask<<e);

+        e<<=1;

+    }

+    return mask;

+}

+

 int isMask(uint32_t opcode, const char* mask, arm64_print_t *a)

 {

     if(strlen(mask)!=32) {

@@ -259,13 +280,14 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
     }

     // ---- LOGIC

     if(isMask(opcode, "f11100100Nrrrrrrssssssnnnnnddddd", &a)) {

-        int i = (a.N<<12)|(imms<<6)|immr;

+        uint64_t i = DecodeBitMasks(a.N, imms, immr);

+        if(!sf) i&=0xffffffff;

         if(sf==0 && a.N==1)

-            snprintf(buff, sizeof(buff), "invalid ANDS %s, %s, 0x%x", Wt[Rd], Wt[Rn], i);

+            snprintf(buff, sizeof(buff), "invalid ANDS %s, %s, 0x%lx", Wt[Rd], Wt[Rn], i);

         else if(Rd==31)

-            snprintf(buff, sizeof(buff), "TST %s, 0x%x", sf?Xt[Rn]:Wt[Rn], i);

+            snprintf(buff, sizeof(buff), "TST %s, 0x%lx", sf?Xt[Rn]:Wt[Rn], i);

         else

-            snprintf(buff, sizeof(buff), "ANDS %s, %s, 0x%x", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], i);

+            snprintf(buff, sizeof(buff), "ANDS %s, %s, 0x%lx", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], i);

         return buff;

     }

     if(isMask(opcode, "f1101010hh0mmmmmiiiiiinnnnnddddd", &a)) {

@@ -283,6 +305,23 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         }

         return buff;

     }

+    if(isMask(opcode, "f0001010hh1mmmmmiiiiiinnnnnddddd", &a)) {

+        const char* shifts[] = { "LSL", "LSR", "ASR", "ROR" };

+        if(shift==0 && imm==0)

+            snprintf(buff, sizeof(buff), "BIC %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]);

+        else

+            snprintf(buff, sizeof(buff), "BIC %s, %s, %s, %s %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], shifts[shift], imm);

+        return buff;

+    }

+    if(isMask(opcode, "f01100100Nrrrrrrssssssnnnnnddddd", &a)) {

+        uint64_t i = DecodeBitMasks(a.N, imms, immr);

+        if(!sf) i&=0xffffffff;

+        if(sf==0 && a.N==1)

+            snprintf(buff, sizeof(buff), "invalid ORR %s, %s, 0x%lx", Wt[Rd], Wt[Rn], i);

+        else

+            snprintf(buff, sizeof(buff), "ORR %s, %s, 0x%lx", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], i);

+        return buff;

+    }

 

     // ---- SHIFT

     if(isMask(opcode, "f10100110Nrrrrrrssssssnnnnnddddd", &a)) {

@@ -304,6 +343,18 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         return buff;

     }

 

+    if(isMask(opcode, "f01100110Nrrrrrrssssssnnnnnddddd", &a)) {

+        if(imms<immr) {

+            int width = imms + 1;

+            int lsb = ((-immr)%(sf?64:32))&(sf?0x3f:0x1f);

+            if(Rn==31)

+                snprintf(buff, sizeof(buff), "BFC %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], lsb, width);

+            else

+                snprintf(buff, sizeof(buff), "BFI %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], lsb, width);

+        } else

+            snprintf(buff, sizeof(buff), "BFM %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms);

+        return buff;

+    }

     // ---- BRANCH / TEST

     if(isMask(opcode, "1101011000011111000000nnnnn00000", &a)) {

         snprintf(buff, sizeof(buff), "BR %s", Xt[Rn]);

diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index 4c786ebd..b3a4f6f5 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -117,6 +117,115 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             }
             break;
 
+        #define GO(GETFLAGS, NO, YES, F)    \
+            READFLAGS(F);                   \
+            i8 = F8S;   \
+            BARRIER(2); \
+            JUMP(addr+i8);\
+            GETFLAGS;   \
+            if(dyn->insts) {    \
+                if(dyn->insts[ninst].x64.jmp_insts==-1) {   \
+                    /* out of the block */                  \
+                    i32 = dyn->insts[ninst+1].address-(dyn->arm_size); \
+                    Bcond(NO, i32);     \
+                    jump_to_next(dyn, addr+i8, 0, ninst); \
+                } else {    \
+                    /* inside the block */  \
+                    i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->arm_size);    \
+                    Bcond(YES, i32);    \
+                }   \
+            }
+
+        case 0x70:
+            INST_NAME("JO ib");
+            GO( TSTw_mask(xFlags, 0b010101, 0)
+                , cEQ, cNE, X_OF)
+            break;
+        case 0x71:
+            INST_NAME("JNO ib");
+            GO( TSTw_mask(xFlags, 0b010101, 0)
+                , cNE, cEQ, X_OF)
+            break;
+        case 0x72:
+            INST_NAME("JC ib");
+            GO( TSTw_mask(xFlags, 0, 0)
+                , cEQ, cNE, X_CF)
+            break;
+        case 0x73:
+            INST_NAME("JNC ib");
+            GO( TSTw_mask(xFlags, 0, 0)
+                , cNE, cEQ, X_CF)
+            break;
+        case 0x74:
+            INST_NAME("JZ ib");
+            GO( TSTw_mask(xFlags, 0b011010, 0)
+                , cEQ, cNE, X_ZF)
+            break;
+        case 0x75:
+            INST_NAME("JNZ ib");
+            GO( TSTw_mask(xFlags, 0b011010, 0)
+                , cNE, cEQ, X_ZF)
+            break;
+        case 0x76:
+            INST_NAME("JBE ib");
+            GO( MOV32w(x1, (1<<F_CF)|(1<<F_ZF));
+                TSTw_REG(xFlags, x1)
+                , cEQ, cNE, X_CF|X_ZF)
+            break;
+        case 0x77:
+            INST_NAME("JNBE ib");
+            GO( MOV32w(x1, (1<<F_CF)|(1<<F_ZF));
+                TSTw_REG(xFlags, x1)
+                , cNE, cEQ, X_CF|X_ZF)
+            break;
+        case 0x78:
+            INST_NAME("JS ib");
+            GO( TSTw_mask(xFlags, 0b011001, 0)  // 0X80
+                , cEQ, cNE, X_SF)
+            break;
+        case 0x79:
+            INST_NAME("JNS ib");
+            GO( TSTw_mask(xFlags, 0b011001, 0)
+                , cNE, cEQ, X_SF)
+            break;
+        case 0x7A:
+            INST_NAME("JP ib");
+            GO( TSTw_mask(xFlags, 0b011110, 0)
+                , cEQ, cNE, X_PF)
+            break;
+        case 0x7B:
+            INST_NAME("JNP ib");
+            GO( TSTw_mask(xFlags, 0b011110, 0)
+                , cNE, cEQ, X_PF)
+            break;
+        case 0x7C:
+            INST_NAME("JL ib");
+            GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF);
+                TSTw_mask(x1, 0b010101, 0)
+                , cEQ, cNE, X_SF|X_OF)
+            break;
+        case 0x7D:
+            INST_NAME("JGE ib");
+            GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF);
+                TSTw_mask(x1, 0b010101, 0)
+                , cNE, cEQ, X_SF|X_OF)
+            break;
+        case 0x7E:
+            INST_NAME("JLE ib");
+            GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF);
+                ORRw_REG_LSL(x1, x1, xFlags, F_OF-F_ZF);
+                TSTw_mask(x1, 0b010101, 0)
+                , cEQ, cNE, X_SF|X_OF|X_ZF)
+            break;
+        case 0x7F:
+            INST_NAME("JG ib");
+            GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF);
+                ORRw_REG_LSL(x1, x1, xFlags, F_OF-F_ZF);
+                TSTw_mask(x1, 0b010101, 0)
+                , cNE, cEQ, X_SF|X_OF|X_ZF)
+            break;
+        #undef GO
+        
         case 0x81:
         case 0x83:
             nextop = F8;
diff --git a/src/dynarec/dynarec_arm64_emit_math.c b/src/dynarec/dynarec_arm64_emit_math.c
index 1c14991e..0d867ecc 100755
--- a/src/dynarec/dynarec_arm64_emit_math.c
+++ b/src/dynarec/dynarec_arm64_emit_math.c
@@ -189,17 +189,17 @@ void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         BICx(xFlags, xFlags, s5);
     }
     IFX(X_ZF) {
-        Bcond(cNE, +4);
-        ORRw_U12(xFlags, xFlags, 1<<F_ZF);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
     }
     IFX(X_CF) {
         // inverted carry
-        Bcond(cCS, +4);
-        ORRw_U12(xFlags, xFlags, 1<<F_CF);
+        Bcond(cCS, +8);
+        ORRw_mask(xFlags, xFlags, 0, 0);    // mask=0x01
     }
     IFX(X_OF) {
-        Bcond(cVC, +4);
-        ORRw_U12(xFlags, xFlags, 1<<F_OF);
+        Bcond(cVC, +8);
+        ORRw_mask(xFlags, xFlags, 0b010101, 0);  // mask=0x800
     }
     IFX(X_SF) {
         LSRxw(s3, s1, (rex.w)?63:31);
@@ -233,16 +233,10 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
         SET_DFNONE(s4);
     }
     IFX(X_AF) {
-        if(c>=0 && c<0x1000) {
-            MVNxw(s3, s1);
-            ANDxw_U12(s4, s3, c);                // s4 = ~op1 & op2
-            ORRxw_U12(s3, s3, c);             // s3 = ~op1 | op2
-        } else {
-            MOV64x(s5, c);
-            MVNxw(s4, s1);
-            ORRxw_REG(s3, s4, s5);
-            BICxw_REG(s4, s5, s1);
-        }
+        MOV64x(s5, c);
+        MVNxw(s4, s1);
+        ORRxw_REG(s3, s4, s5);      // s3 = ~op1 | op2
+        BICxw_REG(s4, s5, s1);      // s4 = ~op1 & op2
     }
     if(c>=0 && c<0x1000) {
         IFX(X_ALL) {
@@ -272,17 +266,17 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
         BICx(xFlags, xFlags, s5);
     }
     IFX(X_ZF) {
-        Bcond(cNE, +4);
-        ORRw_U12(xFlags, xFlags, 1<<F_ZF);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
     }
     IFX(X_CF) {
         // inverted carry
-        Bcond(cCS, +4);
-        ORRw_U12(xFlags, xFlags, 1<<F_CF);
+        Bcond(cCS, +8);
+        ORRw_mask(xFlags, xFlags, 0, 0);    // mask=0x01
     }
     IFX(X_OF) {
-        Bcond(cVC, +4);
-        ORRw_U12(xFlags, xFlags, 1<<F_OF);
+        Bcond(cVC, +8);
+        ORRw_mask(xFlags, xFlags, 0b010101, 0);  // mask=0x800
     }
     IFX(X_SF) {
         LSRxw(s3, s1, (rex.w)?63:31);
diff --git a/src/dynarec/dynarec_arm64_emit_tests.c b/src/dynarec/dynarec_arm64_emit_tests.c
index cfd7f6b4..986334e2 100755
--- a/src/dynarec/dynarec_arm64_emit_tests.c
+++ b/src/dynarec/dynarec_arm64_emit_tests.c
@@ -305,31 +305,31 @@ void emit_test32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SET_DFNONE(s4);
     }
     IFX(X_ZF|X_CF|X_OF) {
-        MOVw(s5, (1<<F_ZF)|(1<<F_CF)|(1<<F_OF));
-        BICx(xFlags, xFlags, s5);
+        MOV32w(s5, (1<<F_ZF)|(1<<F_CF)|(1<<F_OF));
+        BICw(xFlags, xFlags, s5);
     }
     ANDSxw_REG(s3, s1, s2);   // res = s1 & s2
     IFX(X_PEND) {
         STRx_U12(s3, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        Bcond(cNE, +4);
-        ORRw_U12(xFlags, xFlags, 1<<F_ZF);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
     }
     IFX(X_SF) {
         LSRxw(s4, s3, rex.w?63:31);
-        BFIx(xFlags, s4, F_SF, 1);
+        BFIw(xFlags, s4, F_SF, 1);
     }
     // PF: (((emu->x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0)
     IFX(X_PF) {
-        ANDw_U12(s3, s3, 0xE0); // lsr 5 masking pre-applied
+        ANDw_mask(s3, s3, 0b011011, 000010); // 0xE0
         LSRw(s3, s3, 5);
         MOV64x(s4, (uintptr_t)GetParityTab());
         LDRw_REG_LSL2(s4, s4, s3);
-        ANDw_U12(s3, s1, 31);
+        ANDw_mask(s3, s1, 0, 0b000100);   // 0x1f
         LSRw_REG(s4, s4, s3);
         MVNx(s4, s4);
-        BFIx(xFlags, s4, F_PF, 1);
+        BFIw(xFlags, s4, F_PF, 1);
     }
 }
 
diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c
index f6b7ed71..1bf37ab3 100755
--- a/src/dynarec/dynarec_arm64_helper.c
+++ b/src/dynarec/dynarec_arm64_helper.c
@@ -313,7 +313,7 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst)
     POP1(xFlags);
     MOV32w(x1, 0x3F7FD7);
     ANDx_REG(xFlags, xFlags, x1);
-    ORRx_U13(xFlags, xFlags, 2);
+    ORRx_mask(xFlags, xFlags, 1, 0b111111, 0);
     SET_DFNONE(x1);
     // Ret....
     MOV64x(x2, (uintptr_t)arm64_epilog);  // epilog on purpose, CS might have changed!
@@ -329,7 +329,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
     if(saveflags) {
         STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
     }
-    MOV64x(reg, (uintptr_t)fnc);
+    TABLE64(reg, (uintptr_t)fnc);
     BLR(reg);
     fpu_popcache(dyn, ninst, reg);
     if(ret>=0) {
@@ -498,7 +498,7 @@ static void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3
             ADDx_U12(s1, xEmu, offsetof(x64emu_t, p_regs));
             for (int i=0; i<a; ++i) {
                 SUBw_U12(s2, s2, 1);
-                ANDw_U12(s2, s2, 7);    // (emu->top + st)&7
+                ANDw_mask(s2, s2, 0b011111, 1);    // (emu->top + st)&7
                 STRw_REG_LSL2(s3, s1, s2);
             }
         } else {
@@ -508,7 +508,7 @@ static void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3
             for (int i=0; i<-a; ++i) {
                 STRw_REG_LSL2(s3, s1, s2);
                 ADDw_U12(s2, s2, 1);
-                ANDw_U12(s2, s2, 7);    // (emu->top + st)&7
+                ANDw_mask(s2, s2, 0b011111, 1);    // (emu->top + st)&7
             }
         }
         STRw_U12(s2, xEmu, offsetof(x64emu_t, top));
@@ -524,7 +524,7 @@ static void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3
         for (int i=0; i<8; ++i)
             if(dyn->x87cache[i]!=-1) {
                 ADDw_U12(s3, s2, dyn->x87cache[i]);
-                ANDw_U12(s3, s3, 7);    // (emu->top + st)&7
+                ANDw_mask(s3, s3, 0b011111, 1);    // (emu->top + st)&7
                 VSTR64_REG_LSL3(dyn->x87reg[i], s1, s3);
                 fpu_free_reg_double(dyn, dyn->x87reg[i]);
                 dyn->x87reg[i] = -1;
@@ -553,7 +553,7 @@ static void x87_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int
     for (int i=0; i<8; ++i)
         if(dyn->x87cache[i]!=-1) {
             ADDw_U12(s3, s2, dyn->x87cache[i]);
-            ANDw_U12(s3, s3, 7);    // (emu->top + i)&7
+            ANDw_mask(s3, s3, 0b011111, 1);    // (emu->top + i)&7
             VLDR64_REG_LSL3(dyn->x87reg[i], s1, s3);
         }
 #endif
@@ -585,7 +585,7 @@ int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
         } else {
             ADDw_U12(s2, s2, a);
         }
-        ANDw_U12(s2, s2, 7);
+        ANDw_mask(s2, s2, 0b011111, 1);
     }
     VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2);
     MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st);
@@ -624,7 +624,7 @@ void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
     // Update
     if(st) {
         ADDw_U12(s2, s2, st);
-        ANDw_U12(s2, s2, 7);    // (emu->top + i)&7
+        ANDw_mask(s2, s2, 0b011111, 1);    // (emu->top + i)&7
     }
     VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2);
     MESSAGE(LOG_DUMP, "\t--------x87 Cache for ST%d\n", st);
@@ -649,7 +649,7 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
     // Update
     if(st) {
         ADDw_U12(s2, s2, st);
-        ANDw_U12(s2, s2, 7);    // (emu->top + i)&7
+        ANDw_mask(s2, s2, 0b011111, 1);    // (emu->top + i)&7
     }
     VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2);
     MESSAGE(LOG_DUMP, "\t--------x87 Cache for ST%d\n", st);
@@ -676,7 +676,7 @@ void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
             } else {
                 ADDw_U12(s2, s2, a);
             }
-            ANDw_U12(s2, s2, 7);    // (emu->top + i)&7
+            ANDw_mask(s2, s2, 0b011111, 1);    // (emu->top + i)&7
             VLDR64_REG_LSL3(dyn->x87reg[i], s1, s2);
             MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st);
             // ok
@@ -700,7 +700,7 @@ void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
     } else {
         ADDw_U12(s2, s2, a);
     }
-    ANDw_U12(s2, s2, 7);    // (emu->top + i)&7
+    ANDw_mask(s2, s2, 0b011111, 1);    // (emu->top + i)&7
     VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2);
     MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st);
 #endif
@@ -968,11 +968,11 @@ void fpu_reset(dynarec_arm_t* dyn, int ninst)
 void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
 {
     // PF: (((emu->x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0)
-    ANDw_U12(s3, s1, 0xE0); // lsr 5 masking pre-applied
+    ANDw_mask(s3, s1, 0b011011, 000010); // lsr 5 masking pre-applied
     LSRw(s3, s3, 5);
     MOV64x(s4, (uintptr_t)GetParityTab());
     LDRw_REG_LSL2(s4, s4, s3);
-    ANDw_U12(s3, s1, 31);
+    ANDw_mask(s3, s1, 0, 0b000100); //0x1f
     LSRw_REG(s4, s4, s3);
     MVNx(s4, s4);
     BFIx(xFlags, s4, F_PF, 1);
diff --git a/src/dynarec/dynarec_arm64_pass.c b/src/dynarec/dynarec_arm64_pass.c
index 09d09b30..12f22acf 100755
--- a/src/dynarec/dynarec_arm64_pass.c
+++ b/src/dynarec/dynarec_arm64_pass.c
@@ -54,10 +54,12 @@ void arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
             || ((ip >= trace_start) && (ip < trace_end)))  {
                 MESSAGE(LOG_DUMP, "TRACE ----\n");
                 fpu_reflectcache(dyn, ninst, x1, x2, x3);
-                MOV64x(x1, ip);
-                STORE_XEMU_REGS(x1);
-                MOVw(x2, 1);
+                GETIP(ip);
+                MOVx(x1, xRIP);
+                STORE_XEMU_REGS(xRIP);
+                MOV32w(x2, 1);
                 CALL(PrintTrace, -1);
+                LOAD_XEMU_REGS(xRIP);
                 MESSAGE(LOG_DUMP, "----------\n");
             }
         }
diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c
index 2a167f81..c80e6d56 100755
--- a/src/emu/x64emu.c
+++ b/src/emu/x64emu.c
@@ -62,7 +62,7 @@ static void internalX64Setup(x64emu_t* emu, box64context_t *context, uintptr_t s
         emu->sbiidx[i] = &emu->regs[i];
     emu->sbiidx[4] = &emu->zero;
     emu->x64emu_parity_tab = x86emu_parity_tab;
-    emu->eflags.x32 = 0x202; // default flags?
+    emu->eflags.x64 = 0x202; // default flags?
     // own stack?
     emu->stack2free = (ownstack)?(void*)stack:NULL;
     emu->init_stack = (void*)stack;
@@ -368,7 +368,10 @@ const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip)
         if (i%5==4) {
             if(i==4) {
                 if(emu->df) {
-                    strcat(buff, "FLAGS=??????\n");
+#define FLAG_CHAR(f) (ACCESS_FLAG(F_##f##F)) ? #f : "?"
+                    sprintf(tmp, "flags=%s%s%s%s%s%s\n", FLAG_CHAR(O), FLAG_CHAR(C), FLAG_CHAR(P), FLAG_CHAR(A), FLAG_CHAR(Z), FLAG_CHAR(S));
+                    strcat(buff, tmp);
+#undef FLAG_CHAR
                 } else {
 #define FLAG_CHAR(f) (ACCESS_FLAG(F_##f##F)) ? #f : "-"
                     sprintf(tmp, "FLAGS=%s%s%s%s%s%s\n", FLAG_CHAR(O), FLAG_CHAR(C), FLAG_CHAR(P), FLAG_CHAR(A), FLAG_CHAR(Z), FLAG_CHAR(S));
diff --git a/src/include/regs.h b/src/include/regs.h
index aee12996..3e9efb0a 100755
--- a/src/include/regs.h
+++ b/src/include/regs.h
@@ -179,8 +179,9 @@ typedef union {
         unsigned int _F_VIF:1; 
         unsigned int _F_VIP:1;
         unsigned int _F_ID:1;
+		unsigned int _F_fill1:10;
+		uint32_t	_F_fill2;
     } f;
-    uint32_t    x32;
 	uint64_t 	x64;
 } x64flags_t;
 
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index e3d3002a..fe30adf3 100755
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -492,7 +492,7 @@ void my_sigactionhandler_oldcode(int32_t sig, siginfo_t* info, void * ucntx, int
     sigcontext->uc_mcontext.gregs[X64_RSP] = R_RSP;
     sigcontext->uc_mcontext.gregs[X64_RBX] = R_RBX;
     // flags
-    sigcontext->uc_mcontext.gregs[X64_EFL] = emu->eflags.x32;
+    sigcontext->uc_mcontext.gregs[X64_EFL] = emu->eflags.x64;
     // get segments
     sigcontext->uc_mcontext.gregs[X64_CSGSFS] = ((uint64_t)(R_CS)) | (((uint64_t)(R_GS))<<16) | (((uint64_t)(R_FS))<<32);
 #if defined(DYNAREC) && defined(__aarch64__)
@@ -616,7 +616,7 @@ void my_sigactionhandler_oldcode(int32_t sig, siginfo_t* info, void * ucntx, int
             if(sigcontext->uc_mcontext.gregs[X64_RIP]!=sigcontext_copy.uc_mcontext.gregs[X64_RIP]) ejb->emu->ip.dword[0]=sigcontext->uc_mcontext.gregs[X64_RIP];
             sigcontext->uc_mcontext.gregs[X64_RIP] = R_RIP;
             // flags
-            if(sigcontext->uc_mcontext.gregs[X64_EFL]!=sigcontext_copy.uc_mcontext.gregs[X64_EFL]) ejb->emu->eflags.x32=sigcontext->uc_mcontext.gregs[X64_EFL];
+            if(sigcontext->uc_mcontext.gregs[X64_EFL]!=sigcontext_copy.uc_mcontext.gregs[X64_EFL]) ejb->emu->eflags.x64=sigcontext->uc_mcontext.gregs[X64_EFL];
             // get segments
             uint16_t seg;
             seg = (sigcontext->uc_mcontext.gregs[X64_CSGSFS] >> 0)&0xffff;