about summary refs log tree commit diff stats
path: root/src/dynarec/arm64/updateflags_arm64_pass.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynarec/arm64/updateflags_arm64_pass.c')
-rw-r--r--src/dynarec/arm64/updateflags_arm64_pass.c1005
1 files changed, 1005 insertions, 0 deletions
diff --git a/src/dynarec/arm64/updateflags_arm64_pass.c b/src/dynarec/arm64/updateflags_arm64_pass.c
new file mode 100644
index 00000000..8b4dfe39
--- /dev/null
+++ b/src/dynarec/arm64/updateflags_arm64_pass.c
@@ -0,0 +1,1005 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+#include <string.h>
+
+#include "os.h"
+#include "debug.h"
+#include "box64context.h"
+#include "box64cpu.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "x64trace.h"
+#include "dynablock.h"
+#include "dynarec_native.h"
+#include "../dynablock_private.h"
+#include "custommem.h"
+#include "x64test.h"
+#include "pe_tools.h"
+
+#include "../dynarec_arch.h"
+
+#if STEP == 0
+    #define EMIT(A)         dyn->native_size+=4
+    #define SETMARK(A)      jmp_df[A] = 1
+#elif STEP == 1
+    #define EMIT(A)         do {} while (0)
+    #define SETMARK(A)      jmp_df[A] = 0
+#elif STEP == 2
+    #define EMIT(A)         dyn->native_size+=4
+    #define SETMARK(A)      jmp_df[A] = dyn->native_size
+#elif STEP == 3
+    #define MESSAGE(A, ...)                                                   \
+        do {                                                                  \
+            if (dyn->need_dump) dynarec_log_prefix(0, LOG_NONE, __VA_ARGS__); \
+        } while (0)
+    #define EMIT(A)                                         \
+        do{                                                 \
+            if(dyn->need_dump) print_opcode(dyn, ninst, (uint32_t)(A)); \
+            *(uint32_t*)(dyn->block) = (uint32_t)(A);       \
+            dyn->block += 4; dyn->native_size += 4;         \
+            dyn->insts[ninst].size2 += 4;                   \
+        }while(0)
+    #define SETMARK(A)      MESSAGE(LOG_DUMP, "Mark(%d)=%p\n", A, dyn->block)
+#else
+#error Meh!
+#endif
+#define STEP_PASS
+#include "../dynarec_helper.h"
+
+/*
+    Will generate a dynablock that does UpdateFlags. x0 = x64emu_t, x1 = df
+    So read the current df, set df to None, and jump to the correct function
+    using a static jump table.
+    Only x1..x5 regs will be used. No saving of SIMD regs needed.
+    LR will be used for return, and x0 needs to be corrctly setup as xEmu
+    flags will be changed
+*/
+
+void updateflags_pass(dynarec_arm_t* dyn, uint64_t jmp_df[])
+{
+    int ninst = 0;
+    rex_t rex = {0};
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, df));
+    STRw_U12(xZR, xEmu, offsetof(x64emu_t, df));
+    CMPSw_U12(x1, d_unknown);
+    Bcond(cLT, 4+4);
+    RET(xLR);
+    ADR_S20(x2, 4+8);
+    ADDx_REG_LSL(x1, x2, x1, 2);
+    BR(x1);
+    for(int i=d_none; i<d_unknown; ++i)
+        B(jmp_df[i] - dyn->native_size);
+SETMARK(d_none);
+    RET(xLR);
+SETMARK(d_add8);
+SETMARK(d_add8b);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    emit_add8(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_add16);
+SETMARK(d_add16b);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    emit_add16(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_add32);
+SETMARK(d_add32b);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    rex.w = 0;
+    emit_add32(dyn, ninst, rex, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_add64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    rex.w = 1;
+    emit_add32(dyn, ninst, rex, x1, x2, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_and8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0xff);
+    emit_and8(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_and16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0xffff);
+    emit_and16(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_and32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0xffffffff);
+    rex.w = 0;
+    emit_and32(dyn, ninst, rex, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_and64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV64x(x2, 0xffffffffffffffffULL);
+    rex.w = 1;
+    emit_and32(dyn, ninst, rex, x1, x2, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_dec8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    emit_dec8(dyn, ninst, x1, x3, x4);
+    RET(xLR);
+SETMARK(d_dec16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    emit_dec16(dyn, ninst, x1, x3, x4);
+    RET(xLR);
+SETMARK(d_dec32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    rex.w = 0;
+    emit_dec32(dyn, ninst, rex, x1, x3, x4);
+    RET(xLR);
+SETMARK(d_dec64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    rex.w = 1;
+    emit_dec32(dyn, ninst, rex, x1, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_inc8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    emit_inc8(dyn, ninst, x1, x3, x4);
+    RET(xLR);
+SETMARK(d_inc16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    emit_inc16(dyn, ninst, x1, x3, x4);
+    RET(xLR);
+SETMARK(d_inc32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    rex.w = 0;
+    emit_inc32(dyn, ninst, rex, x1, x3, x4);
+    RET(xLR);
+SETMARK(d_inc64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    rex.w = 1;
+    emit_inc32(dyn, ninst, rex, x1, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_imul8);
+    LDRSH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    ASRxw(x2, x1, 8);
+    CMPSw_REG_ASR(x2, x1, 16);
+    CSETw(x3, cNE);
+    BFIw(xFlags, x3, F_CF, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    if(!BOX64ENV(cputype)) {
+        LSRw(x2, x1, 7);
+        BFIw(xFlags, x2, F_SF, 1);
+        BFCw(xFlags, F_ZF, 1);
+        BFCw(xFlags, F_AF, 1);
+        emit_pf(dyn, ninst, x1, x4);
+    }
+    RET(xLR);
+SETMARK(d_imul16);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    ASRw(x2, x1, 16);
+    CMPSw_REG_ASR(x2, x1, 31);
+    CSETw(x3, cNE);
+    BFIw(xFlags, x3, F_CF, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    if(!BOX64ENV(cputype)) {
+        LSRw(x2, x1, 15);
+        BFIw(xFlags, x2, F_SF, 1);
+        BFCw(xFlags, F_ZF, 1);
+        BFCw(xFlags, F_AF, 1);
+        emit_pf(dyn, ninst, x1, x4);
+    }
+    RET(xLR);
+SETMARK(d_imul32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    CMPSw_REG_ASR(x2, x1, 31);
+    CSETw(x3, cNE);
+    BFIw(xFlags, x3, F_CF, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    if(!BOX64ENV(cputype)) {
+        LSRw(x2, x1, 31);
+        BFIw(xFlags, x2, F_SF, 1);
+        BFCw(xFlags, F_ZF, 1);
+        BFCw(xFlags, F_AF, 1);
+        emit_pf(dyn, ninst, x1, x4);
+    }
+    RET(xLR);
+SETMARK(d_imul64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    CMPSx_REG_ASR(x2, x1, 63);
+    CSETw(x3, cNE);
+    BFIw(xFlags, x3, F_CF, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    if(!BOX64ENV(cputype)) {
+        LSRx(x2, x1, 63);
+        BFIw(xFlags, x2, F_SF, 1);
+        BFCw(xFlags, F_ZF, 1);
+        BFCw(xFlags, F_AF, 1);
+        emit_pf(dyn, ninst, x1, x4);
+    }
+    RET(xLR);
+SETMARK(d_or8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0);
+    emit_or8(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_or16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0);
+    emit_or16(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_or32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0);
+    rex.w = 0;
+    emit_or32(dyn, ninst, rex, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_or64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV64x(x2, 0);
+    rex.w = 1;
+    emit_or32(dyn, ninst, rex, x1, x2, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_mul8);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    CMPSw_REG_LSR(xZR, x1, 8);
+    CSETw(x3, cNE);
+    BFIw(xFlags, x3, F_CF, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    if(!BOX64ENV(cputype)) {
+        LSRw(x2, x1, 7);
+        BFIw(xFlags, x2, F_SF, 1);
+        BFCw(xFlags, F_ZF, 1);
+        BFCw(xFlags, F_AF, 1);
+        emit_pf(dyn, ninst, x1, x4);
+    }
+    RET(xLR);
+SETMARK(d_mul16);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    CMPSw_REG_LSR(xZR, x1, 16);
+    CSETw(x3, cNE);
+    BFIw(xFlags, x3, F_CF, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    if(!BOX64ENV(cputype)) {
+        LSRw(x2, x1, 15);
+        BFIw(xFlags, x2, F_SF, 1);
+        BFCw(xFlags, F_ZF, 1);
+        BFCw(xFlags, F_AF, 1);
+        emit_pf(dyn, ninst, x1, x4);
+    }
+    RET(xLR);
+SETMARK(d_mul32);
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    CMPSw_U12(x2, 0);
+    CSETw(x3, cNE);
+    BFIw(xFlags, x3, F_CF, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    if(!BOX64ENV(cputype)) {
+        LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+        LSRw(x2, x1, 31);
+        BFIw(xFlags, x2, F_SF, 1);
+        BFCw(xFlags, F_ZF, 1);
+        BFCw(xFlags, F_AF, 1);
+        emit_pf(dyn, ninst, x1, x4);
+    }
+    RET(xLR);
+SETMARK(d_mul64);
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    CMPSx_U12(x2, 0);
+    CSETw(x3, cNE);
+    BFIw(xFlags, x3, F_CF, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    if(!BOX64ENV(cputype)) {
+        LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+        LSRx(x2, x1, 63);
+        BFIw(xFlags, x2, F_SF, 1);
+        BFCw(xFlags, F_ZF, 1);
+        BFCw(xFlags, F_AF, 1);
+        emit_pf(dyn, ninst, x1, x4);
+    }
+    RET(xLR);
+SETMARK(d_neg8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    emit_neg8(dyn, ninst, x1, x3, x4);
+    RET(xLR);
+SETMARK(d_neg16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    emit_neg16(dyn, ninst, x1, x3, x4);
+    RET(xLR);
+SETMARK(d_neg32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    rex.w = 0;
+    emit_neg32(dyn, ninst, rex, x1, x3, x4);
+    RET(xLR);
+SETMARK(d_neg64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    rex.w = 1;
+    emit_neg32(dyn, ninst, rex, x1, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_shl8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 4);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    emit_shl8(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_shl16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 4);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    emit_shl16(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_shl32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 4);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    rex.w = 0;
+    emit_shl32(dyn, ninst, rex, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_shl64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 5);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    rex.w = 1;
+    emit_shl32(dyn, ninst, rex, x1, x2, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_shr8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 4);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    emit_shr8(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_shr16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 4);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    emit_shr16(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_shr32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 4);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    rex.w = 0;
+    emit_shr32(dyn, ninst, rex, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_shr64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 5);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    rex.w = 1;
+    emit_shr32(dyn, ninst, rex, x1, x2, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_sar8);
+    LDRSB_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 4);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    emit_sar8(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_sar16);
+    LDRSH_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 4);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    emit_sar16(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_sar32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 4);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    rex.w = 0;
+    emit_sar32(dyn, ninst, rex, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_sar64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    ANDSw_mask(x2, x2, 0, 5);
+    Bcond(cNE, 4+4);
+    RET(xLR);
+    rex.w = 1;
+    emit_sar32(dyn, ninst, rex, x1, x2, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_sub8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    emit_sub8(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_sub16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    emit_sub16(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_sub32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    rex.w = 0;
+    emit_sub32(dyn, ninst, rex, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_sub64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    rex.w = 1;
+    emit_sub32(dyn, ninst, rex, x1, x2, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_xor8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0);
+    emit_xor8(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_xor16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0);
+    emit_xor16(dyn, ninst, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_xor32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0);
+    rex.w = 0;
+    emit_xor32(dyn, ninst, rex, x1, x2, x3, x4);
+    RET(xLR);
+SETMARK(d_xor64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV64x(x2, 0);
+    rex.w = 1;
+    emit_xor32(dyn, ninst, rex, x1, x2, x3, x4);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_cmp8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5);
+    RET(xLR);
+SETMARK(d_cmp16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5);
+    RET(xLR);
+SETMARK(d_cmp32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    rex.w = 0;
+    emit_cmp32(dyn, ninst, rex, x1, x2, x3, x4, x5);
+    RET(xLR);
+SETMARK(d_cmp64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2));
+    rex.w = 1;
+    emit_cmp32(dyn, ninst, rex, x1, x2, x3, x4, x5);
+    rex.w = 0;
+    RET(xLR);
+SETMARK(d_tst8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0xff);
+    emit_test8(dyn, ninst, x1, x2, x3, x4, x5);
+    RET(xLR);
+SETMARK(d_tst16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0xffff);
+    emit_test16(dyn, ninst, x1, x2, x3, x4, x5);
+    RET(xLR);
+SETMARK(d_tst32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV32w(x2, 0xffffffff);
+    rex.w = 0;
+    emit_test32(dyn, ninst, rex, x1, x2, x3, x4, x5);
+    RET(xLR);
+SETMARK(d_tst64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    MOV64x(x2, 0xffffffffffffffffULL);
+    rex.w = 1;
+    emit_test32(dyn, ninst, rex, x1, x2, x3, x4, x5);
+    rex.w = 0;
+    RET(xLR);
+// for ADC & SBB, the emit_adcX cannot be used because the CF state is not saved
+SETMARK(d_adc8);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    BFXILw(xFlags, x1, 8, 1);   //F_CF
+    LSRw(x2, x1, 7);
+    BFIw(xFlags, x2, F_SF, 1);
+    TSTw_mask(x1, 0, 7);    // mask 0xff
+    CSETw(x2, cEQ);
+    BFIw(xFlags, x2, F_ZF, 1);
+    LDRB_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRB_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    ANDw_REG(x4, x2, x3);   // op1 & op2
+    ORRw_REG(x2, x2, x3);   // op1 | op2
+    BICw_REG(x2, x2, x1);   // ~res & (op1 | op2)
+    ORRw_REG(x2, x2, x4); // CC
+    LSRw(x3, x2, 3);
+    BFIw(xFlags, x3, F_AF, 1);
+    LSRw(x3, x2, 6);
+    EORw_REG_LSR(x3, x3, x3, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_adc16);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    BFXILw(xFlags, x1, 16, 1);   //F_CF
+    LSRw(x2, x1, 15);
+    BFIw(xFlags, x2, F_SF, 1);
+    TSTw_mask(x1, 0, 15);    // mask 0xffff
+    CSETw(x2, cEQ);
+    BFIw(xFlags, x2, F_ZF, 1);
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    ANDw_REG(x4, x2, x3);   // op1 & op2
+    ORRw_REG(x2, x2, x3);   // op1 | op2
+    BICw_REG(x2, x2, x1);   // ~res & (op1 | op2)
+    ORRw_REG(x2, x2, x4); // CC
+    LSRw(x3, x2, 3);
+    BFIw(xFlags, x3, F_AF, 1);
+    LSRw(x3, x2, 14);
+    EORw_REG_LSR(x3, x3, x3, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_adc32);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    BFXILx(xFlags, x1, 32, 1);   //F_CF
+    LSRw(x2, x1, 31);
+    BFIw(xFlags, x2, F_SF, 1);
+    TSTw_REG(x1, x1);
+    CSETw(x2, cEQ);
+    BFIw(xFlags, x2, F_ZF, 1);
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    ANDw_REG(x4, x2, x3);   // op1 & op2
+    ORRw_REG(x2, x2, x3);   // op1 | op2
+    BICw_REG(x2, x2, x1);   // ~res & (op1 | op2)
+    ORRw_REG(x2, x2, x4); // CC
+    LSRw(x3, x2, 3);
+    BFIw(xFlags, x3, F_AF, 1);
+    LSRw(x3, x2, 30);
+    EORw_REG_LSR(x3, x3, x3, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_adc32b);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LSRw(x2, x1, 31);
+    BFIw(xFlags, x2, F_SF, 1);
+    TSTw_REG(x1, x1);
+    CSETw(x2, cEQ);
+    BFIw(xFlags, x2, F_ZF, 1);
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    ADDw_REG(x4, x2, x3);
+    CMPSw_REG(x1, x4);
+    CSETw(x4, cNE);
+    ADDx_REG(x4, x4, x2);
+    ADDx_REG(x4, x4, x3);
+    BFXILx(xFlags, x4, 32, 1);   //F_CF
+    ANDw_REG(x4, x2, x3);   // op1 & op2
+    ORRw_REG(x2, x2, x3);   // op1 | op2
+    BICw_REG(x2, x2, x1);   // ~res & (op1 | op2)
+    ORRw_REG(x2, x2, x4); // CC
+    LSRw(x3, x2, 3);
+    BFIw(xFlags, x3, F_AF, 1);
+    LSRw(x3, x2, 30);
+    EORw_REG_LSR(x3, x3, x3, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_adc64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LSRx(x2, x1, 63);
+    BFIw(xFlags, x2, F_SF, 1);
+    TSTx_REG(x1, x1);
+    CSETw(x2, cEQ);
+    BFIw(xFlags, x2, F_ZF, 1);
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    ADDx_REG(x4, x2, x3);
+    CMPSx_REG(x1, x4);
+    CSETw(x4, cNE);
+    ADDx_UXTW(x4, x4, x2);
+    ADDx_UXTW(x4, x4, x3);  // x4 = lo
+    LSRx(x4, x4, 32);
+    ADDx_REG_LSR(x4, x4, x2, 32);
+    ADDx_REG_LSR(x4, x4, x3, 32);   // hi
+    BFXILx(xFlags, x4, 32, 1);   //F_CF
+    ANDx_REG(x4, x2, x3);   // op1 & op2
+    ORRx_REG(x2, x2, x3);   // op1 | op2
+    BICx_REG(x2, x2, x1);   // ~res & (op1 | op2)
+    ORRx_REG(x2, x2, x4); // CC
+    LSRx(x3, x2, 3);
+    BFIx(xFlags, x3, F_AF, 1);
+    LSRx(x3, x2, 62);
+    EORw_REG_LSR(x3, x3, x3, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_sbb8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LSRw(x2, x1, 7);
+    BFIw(xFlags, x2, F_SF, 1);
+    TSTw_mask(x1, 0, 7);    // mask 0xff
+    CSETw(x2, cEQ);
+    BFIw(xFlags, x2, F_ZF, 1);
+    LDRB_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRB_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    BICw_REG(x4, x3, x2);   // ~op1 & op2
+    ORNw_REG(x2, x3, x2);   // ~op1 | op2
+    ANDw_REG(x2, x2, x1);   // res & (~op1 | op2)
+    ORRw_REG(x2, x2, x4); // CC
+    BFXILw(xFlags, x2, 7, 1);
+    LSRw(x3, x2, 3);
+    BFIw(xFlags, x3, F_AF, 1);
+    LSRw(x3, x2, 6);
+    EORw_REG_LSR(x3, x3, x3, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_sbb16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LSRw(x2, x1, 15);
+    BFIw(xFlags, x2, F_SF, 1);
+    TSTw_mask(x1, 0, 15);    // mask 0xffff
+    CSETw(x2, cEQ);
+    BFIw(xFlags, x2, F_ZF, 1);
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    BICw_REG(x4, x3, x2);   // ~op1 & op2
+    ORNw_REG(x2, x3, x2);   // ~op1 | op2
+    ANDw_REG(x2, x2, x1);   // res & (~op1 | op2)
+    ORRw_REG(x2, x2, x4); // CC
+    BFXILw(xFlags, x2, 15, 1);
+    LSRw(x3, x2, 3);
+    BFIw(xFlags, x3, F_AF, 1);
+    LSRw(x3, x2, 14);
+    EORw_REG_LSR(x3, x3, x3, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_sbb32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LSRw(x2, x1, 31);
+    BFIw(xFlags, x2, F_SF, 1);
+    TSTw_REG(x1, x1);
+    CSETw(x2, cEQ);
+    BFIw(xFlags, x2, F_ZF, 1);
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    BICw_REG(x4, x3, x2);   // ~op1 & op2
+    ORNw_REG(x2, x3, x2);   // ~op1 | op2
+    ANDw_REG(x2, x2, x1);   // res & (~op1 | op2)
+    ORRw_REG(x2, x2, x4); // CC
+    BFXILw(xFlags, x2, 31, 1);
+    LSRw(x3, x2, 3);
+    BFIw(xFlags, x3, F_AF, 1);
+    LSRw(x3, x2, 30);
+    EORw_REG_LSR(x3, x3, x3, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_sbb64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LSRx(x2, x1, 63);
+    BFIw(xFlags, x2, F_SF, 1);
+    TSTx_REG(x1, x1);
+    CSETw(x2, cEQ);
+    BFIw(xFlags, x2, F_ZF, 1);
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    BICx_REG(x4, x3, x2);   // ~op1 & op2
+    ORNx_REG(x2, x3, x2);   // ~op1 | op2
+    ANDx_REG(x2, x2, x1);   // res & (~op1 | op2)
+    ORRx_REG(x2, x2, x4); // CC
+    BFXILx(xFlags, x2, 63, 1);
+    LSRw(x3, x2, 3);
+    BFIw(xFlags, x3, F_AF, 1);
+    LSRx(x3, x2, 62);
+    EORw_REG_LSR(x3, x3, x3, 1);
+    BFIw(xFlags, x3, F_OF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_rol8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, res));
+    if(BOX64ENV(cputype)) {
+        EORw_REG_LSR(x2, x1, x1, 7);
+        BFIw(xFlags, x2, F_OF, 1);
+    } else {
+        LDRB_U12(x2, xEmu, offsetof(x64emu_t, op1));
+        LSLw_IMM(x3, x2, 6);
+        EORw_REG_LSR(x3, x3, x3, 1);
+        BFIw(xFlags, x3, F_OF, 1);
+    }
+    BFXILw(xFlags, x1, 0, 1);
+    RET(xLR);
+SETMARK(d_rol16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    if(BOX64ENV(cputype)) {
+        EORw_REG_LSR(x2, x1, x1, 15);
+        BFIw(xFlags, x2, F_OF, 1);
+    } else {
+        LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1));
+        LSLw_IMM(x3, x2, 14);
+        EORw_REG_LSR(x3, x3, x3, 1);
+        BFIw(xFlags, x3, F_OF, 1);
+    }
+    BFXILw(xFlags, x1, 0, 1);
+    RET(xLR);
+SETMARK(d_rol32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    if(BOX64ENV(cputype)) {
+        EORw_REG_LSR(x2, x1, x1, 31);
+        BFIw(xFlags, x2, F_OF, 1);
+    } else {
+        LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1));
+        LSLw_IMM(x3, x2, 30);
+        EORw_REG_LSR(x3, x3, x3, 1);
+        BFIw(xFlags, x3, F_OF, 1);
+    }
+    BFXILw(xFlags, x1, 0, 1);
+    RET(xLR);
+SETMARK(d_rol64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    if(BOX64ENV(cputype)) {
+        EORx_REG_LSR(x2, x1, x1, 63);
+        BFIw(xFlags, x2, F_OF, 1);
+    } else {
+        LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1));
+        LSLx_IMM(x3, x2, 62);
+        EORw_REG_LSR(x3, x3, x3, 1);
+        BFIw(xFlags, x3, F_OF, 1);
+    }
+    BFXILw(xFlags, x1, 0, 1);
+    RET(xLR);
+SETMARK(d_ror8);
+    LDRB_U12(x1, xEmu, offsetof(x64emu_t, res));
+    if(BOX64ENV(cputype)) {
+        LSLw_IMM(x2, x1, 6);
+        EORw_REG_LSR(x3, x2, x2, 1);
+        BFIw(xFlags, x3, F_OF, 1);
+    } else {
+        LDRB_U12(x2, xEmu, offsetof(x64emu_t, op1));
+        EORw_REG_LSR(x3, x2, x2, 7);
+        BFIw(xFlags, x3, F_OF, 1);
+    }
+    BFXILw(xFlags, x1, 7, 1);
+    RET(xLR);
+SETMARK(d_ror16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    if(BOX64ENV(cputype)) {
+        LSLw_IMM(x2, x1, 14);
+        EORw_REG_LSR(x3, x2, x2, 1);
+        BFIw(xFlags, x3, F_OF, 1);
+    } else {
+        LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1));
+        EORw_REG_LSR(x3, x2, x2, 15);
+        BFIw(xFlags, x3, F_OF, 1);
+    }
+    BFXILw(xFlags, x1, 15, 1);
+    RET(xLR);
+SETMARK(d_ror32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    if(BOX64ENV(cputype)) {
+        LSLw_IMM(x2, x1, 30);
+        EORw_REG_LSR(x3, x2, x2, 1);
+        BFIw(xFlags, x3, F_OF, 1);
+    } else {
+        LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1));
+        EORw_REG_LSR(x3, x2, x2, 31);
+        BFIw(xFlags, x3, F_OF, 1);
+    }
+    BFXILw(xFlags, x1, 31, 1);
+    RET(xLR);
+SETMARK(d_ror64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    if(BOX64ENV(cputype)) {
+        LSLx_IMM(x2, x1, 62);
+        EORw_REG_LSR(x3, x2, x2, 1);
+        BFIw(xFlags, x3, F_OF, 1);
+    } else {
+        LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1));
+        EORx_REG_LSR(x3, x2, x2, 63);
+        BFIw(xFlags, x3, F_OF, 1);
+    }
+    BFXILx(xFlags, x1, 63, 1);
+    RET(xLR);
+SETMARK(d_shrd16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    if(BOX64ENV(cputype)) {
+        LSRw(x4, x1, 14);
+        EORw_REG_LSR(x4, x4, x4, 1);
+        BFIw(xFlags, x4, F_OF, 1);
+        ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10
+    } else {
+        ANDw_mask(x4, x3, 0, 3);    // mask = 0x0f
+        SUBw_U12(x4, x4, 16);
+        NEGw_REG(x4, x4);
+        LSRw_REG(x4, x1, x4);
+        EORw_REG_LSR(x4, x4, x2, 15);
+        BFIw(xFlags, x4, F_OF, 1);
+        BFCw(xFlags, F_AF, 1);
+    }
+    CBZw(x3, 4+4);
+    RET(xLR);
+    SUBw_U12(x4, x3, 1);
+    LSRw_REG(x4, x2, x4);
+    if(BOX64ENV(cputype)) {
+        CMPSw_U12(x3, 15);
+        CSELw(x4, x4, xZR, cGT);
+    }
+    BFIw(xFlags, x4, F_CF, 1);
+    LSRw(x4, x1, 15);
+    BFIw(xFlags, x4, F_SF, 1);
+    TSTw_mask(x1, 0, 15);
+    CSETw(x4, cEQ);
+    BFIw(xFlags, x4, F_ZF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_shrd32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    CBNZw(x3, 4+4);
+    RET(xLR);
+     if(BOX64ENV(cputype)) {
+        LSRw(x4, x1, 30);
+        EORw_REG_LSR(x4, x4, x4, 1);
+        BFIw(xFlags, x4, F_OF, 1);
+        ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10
+    } else {
+        SUBw_U12(x4, x3, 32);
+        NEGw_REG(x4, x4);
+        LSRw_REG(x4, x1, x4);
+        EORw_REG_LSR(x4, x4, x2, 31);
+        BFIw(xFlags, x4, F_OF, 1);
+        BFCw(xFlags, F_AF, 1);
+    }
+    SUBw_U12(x4, x3, 1);
+    LSRw_REG(x4, x2, x4);
+    BFIw(xFlags, x4, F_CF, 1);
+    LSRw(x4, x1, 31);
+    BFIw(xFlags, x4, F_SF, 1);
+    TSTw_REG(x1, x1);
+    CSETw(x4, cEQ);
+    BFIw(xFlags, x4, F_ZF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_shrd64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    CBNZw(x3, 4+4);
+    RET(xLR);
+     if(BOX64ENV(cputype)) {
+        LSRx(x4, x1, 62);
+        EORw_REG_LSR(x4, x4, x4, 1);
+        BFIw(xFlags, x4, F_OF, 1);
+        ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10
+    } else {
+        SUBw_U12(x4, x3, 64);
+        NEGw_REG(x4, x4);
+        LSRx_REG(x4, x1, x4);
+        EORx_REG_LSR(x4, x4, x2, 63);
+        BFIw(xFlags, x4, F_OF, 1);
+        BFCw(xFlags, F_AF, 1);
+    }
+    SUBx_U12(x4, x3, 1);
+    LSRx_REG(x4, x2, x4);
+    BFIw(xFlags, x4, F_CF, 1);
+    LSRx(x4, x1, 63);
+    BFIw(xFlags, x4, F_SF, 1);
+    TSTx_REG(x1, x1);
+    CSETw(x4, cEQ);
+    BFIw(xFlags, x4, F_ZF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_shld16);
+    LDRH_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRH_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    CBNZw(x3, 4+4);
+    RET(xLR);
+    SUBw_U12(x4, x3, 16);
+    NEGw_REG(x4, x4);
+    LSRw_REG(x4, x2, x4);
+    BFIw(xFlags, x4, F_CF, 1);
+    if(BOX64ENV(cputype)) {
+        EORw_REG_LSR(x4, xFlags, x1, 15);
+        CMPSw_U12(x3, 15);
+        CSELw(x4, x4, xFlags, cGT);
+        BFIw(xFlags, x4, F_OF, 1);
+        ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10
+    } else {
+        LSRw(x4, x2, 14);
+        EORw_REG_LSR(x4, x4, x4, 1);
+        BFIw(xFlags, x4, F_OF, 1);
+        BFCw(xFlags, F_AF, 1);
+    }
+    LSRw(x4, x1, 15);
+    BFIw(xFlags, x4, F_SF, 1);
+    TSTw_mask(x1, 0, 15);
+    CSETw(x4, cEQ);
+    BFIw(xFlags, x4, F_ZF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_shld32);
+    LDRw_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    CBNZw(x3, 4+4);
+    RET(xLR);
+    SUBw_U12(x4, x3, 32);
+    NEGw_REG(x4, x4);
+    LSRw_REG(x4, x2, x4);
+    BFIw(xFlags, x4, F_CF, 1);
+    if(BOX64ENV(cputype)) {
+        EORw_REG_LSR(x4, xFlags, x1, 31);
+        BFIw(xFlags, x4, F_OF, 1);
+        ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10
+    } else {
+        LSRw(x4, x2, 30);
+        EORw_REG_LSR(x4, x4, x4, 1);
+        BFIw(xFlags, x4, F_OF, 1);
+        BFCw(xFlags, F_AF, 1);
+    }
+    LSRw(x4, x1, 31);
+    BFIw(xFlags, x4, F_SF, 1);
+    TSTw_REG(x1, x1);
+    CSETw(x4, cEQ);
+    BFIw(xFlags, x4, F_ZF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+SETMARK(d_shld64);
+    LDRx_U12(x1, xEmu, offsetof(x64emu_t, res));
+    LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1));
+    LDRx_U12(x3, xEmu, offsetof(x64emu_t, op2));
+    CBNZx(x3, 4+4);
+    RET(xLR);
+    MOV32w(x4, 64);
+    SUBw_REG(x4, x4, x3);
+    LSRx_REG(x4, x2, x4);
+    BFIw(xFlags, x4, F_CF, 1);
+    if(BOX64ENV(cputype)) {
+        EORx_REG_LSR(x4, xFlags, x1, 63);
+        BFIw(xFlags, x4, F_OF, 1);
+        ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10
+    } else {
+        LSRx(x4, x2, 62);
+        EORw_REG_LSR(x4, x4, x4, 1);
+        BFIw(xFlags, x4, F_OF, 1);
+        BFCw(xFlags, F_AF, 1);
+    }
+    LSRx(x4, x1, 63);
+    BFIw(xFlags, x4, F_SF, 1);
+    TSTx_REG(x1, x1);
+    CSETw(x4, cEQ);
+    BFIw(xFlags, x4, F_ZF, 1);
+    emit_pf(dyn, ninst, x1, x4);
+    RET(xLR);
+// all done!
+}
\ No newline at end of file