From 0cdb134d45ba7249a9f15d10e46b10e926abae60 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 27 Feb 2022 09:48:00 +0100 Subject: [DYNAREC] Refactored dynarec to ease the future adding of new target architecture --- src/custommem.c | 59 +- src/dynarec/arm64/arm64_emitter.h | 1755 ++++++++++++++++++ src/dynarec/arm64/arm64_epilog.S | 54 + src/dynarec/arm64/arm64_lock.S | 131 ++ src/dynarec/arm64/arm64_lock.h | 39 + src/dynarec/arm64/arm64_next.S | 39 + src/dynarec/arm64/arm64_printer.c | 1353 ++++++++++++++ src/dynarec/arm64/arm64_printer.h | 6 + src/dynarec/arm64/arm64_prolog.S | 35 + src/dynarec/arm64/dynarec_arm64_00.c | 2453 ++++++++++++++++++++++++++ src/dynarec/arm64/dynarec_arm64_0f.c | 1911 ++++++++++++++++++++ src/dynarec/arm64/dynarec_arm64_64.c | 880 +++++++++ src/dynarec/arm64/dynarec_arm64_66.c | 871 +++++++++ src/dynarec/arm64/dynarec_arm64_660f.c | 1946 ++++++++++++++++++++ src/dynarec/arm64/dynarec_arm64_6664.c | 129 ++ src/dynarec/arm64/dynarec_arm64_67.c | 428 +++++ src/dynarec/arm64/dynarec_arm64_d8.c | 232 +++ src/dynarec/arm64/dynarec_arm64_d9.c | 356 ++++ src/dynarec/arm64/dynarec_arm64_db.c | 307 ++++ src/dynarec/arm64/dynarec_arm64_dc.c | 219 +++ src/dynarec/arm64/dynarec_arm64_dd.c | 205 +++ src/dynarec/arm64/dynarec_arm64_df.c | 295 ++++ src/dynarec/arm64/dynarec_arm64_emit_logic.c | 679 +++++++ src/dynarec/arm64/dynarec_arm64_emit_math.c | 1849 +++++++++++++++++++ src/dynarec/arm64/dynarec_arm64_emit_shift.c | 449 +++++ src/dynarec/arm64/dynarec_arm64_emit_tests.c | 374 ++++ src/dynarec/arm64/dynarec_arm64_f0.c | 939 ++++++++++ src/dynarec/arm64/dynarec_arm64_f20f.c | 367 ++++ src/dynarec/arm64/dynarec_arm64_f30f.c | 440 +++++ src/dynarec/arm64/dynarec_arm64_functions.c | 466 +++++ src/dynarec/arm64/dynarec_arm64_functions.h | 64 + src/dynarec/arm64/dynarec_arm64_helper.c | 1280 ++++++++++++++ src/dynarec/arm64/dynarec_arm64_helper.h | 1087 ++++++++++++ src/dynarec/arm64/dynarec_arm64_pass.c | 146 ++ src/dynarec/arm64/dynarec_arm64_pass0.h | 39 + src/dynarec/arm64/dynarec_arm64_pass1.h | 10 + src/dynarec/arm64/dynarec_arm64_pass2.h | 11 + src/dynarec/arm64/dynarec_arm64_pass3.h | 36 + src/dynarec/arm64/dynarec_arm64_private.h | 64 + src/dynarec/arm64_emitter.h | 1755 ------------------ src/dynarec/arm64_epilog.S | 54 - src/dynarec/arm64_lock.S | 131 -- src/dynarec/arm64_lock.h | 39 - src/dynarec/arm64_next.S | 39 - src/dynarec/arm64_printer.c | 1353 -------------- src/dynarec/arm64_printer.h | 6 - src/dynarec/arm64_prolog.S | 35 - src/dynarec/dynablock.c | 22 +- src/dynarec/dynarec_arch.h | 24 + src/dynarec/dynarec_arm64.c | 541 ------ src/dynarec/dynarec_arm64_00.c | 2453 -------------------------- src/dynarec/dynarec_arm64_0f.c | 1911 -------------------- src/dynarec/dynarec_arm64_64.c | 880 --------- src/dynarec/dynarec_arm64_66.c | 871 --------- src/dynarec/dynarec_arm64_660f.c | 1946 -------------------- src/dynarec/dynarec_arm64_6664.c | 129 -- src/dynarec/dynarec_arm64_67.c | 428 ----- src/dynarec/dynarec_arm64_d8.c | 232 --- src/dynarec/dynarec_arm64_d9.c | 356 ---- src/dynarec/dynarec_arm64_db.c | 307 ---- src/dynarec/dynarec_arm64_dc.c | 219 --- src/dynarec/dynarec_arm64_dd.c | 205 --- src/dynarec/dynarec_arm64_df.c | 295 ---- src/dynarec/dynarec_arm64_emit_logic.c | 679 ------- src/dynarec/dynarec_arm64_emit_math.c | 1849 ------------------- src/dynarec/dynarec_arm64_emit_shift.c | 449 ----- src/dynarec/dynarec_arm64_emit_tests.c | 374 ---- src/dynarec/dynarec_arm64_f0.c | 939 ---------- src/dynarec/dynarec_arm64_f20f.c | 367 ---- src/dynarec/dynarec_arm64_f30f.c | 440 ----- src/dynarec/dynarec_arm64_functions.c | 466 ----- src/dynarec/dynarec_arm64_functions.h | 64 - src/dynarec/dynarec_arm64_helper.c | 1280 -------------- src/dynarec/dynarec_arm64_helper.h | 1087 ------------ src/dynarec/dynarec_arm64_pass.c | 145 -- src/dynarec/dynarec_arm64_pass0.h | 39 - src/dynarec/dynarec_arm64_pass1.h | 10 - src/dynarec/dynarec_arm64_pass2.h | 11 - src/dynarec/dynarec_arm64_pass3.h | 36 - src/dynarec/dynarec_arm64_private.h | 64 - src/dynarec/dynarec_native.c | 535 ++++++ src/dynarec/native_lock.h | 25 + src/emu/x64run.c | 2 +- src/emu/x64run0f.c | 2 +- src/emu/x64run66.c | 2 +- src/emu/x64run66f0.c | 58 +- src/emu/x64run670f.c | 2 +- src/emu/x64run6766.c | 2 +- src/emu/x64runf0.c | 170 +- src/include/dynarec_arm64.h | 10 - src/include/dynarec_native.h | 10 + 91 files changed, 22699 insertions(+), 22652 deletions(-) create mode 100755 src/dynarec/arm64/arm64_emitter.h create mode 100755 src/dynarec/arm64/arm64_epilog.S create mode 100755 src/dynarec/arm64/arm64_lock.S create mode 100755 src/dynarec/arm64/arm64_lock.h create mode 100755 src/dynarec/arm64/arm64_next.S create mode 100755 src/dynarec/arm64/arm64_printer.c create mode 100644 src/dynarec/arm64/arm64_printer.h create mode 100755 src/dynarec/arm64/arm64_prolog.S create mode 100755 src/dynarec/arm64/dynarec_arm64_00.c create mode 100755 src/dynarec/arm64/dynarec_arm64_0f.c create mode 100644 src/dynarec/arm64/dynarec_arm64_64.c create mode 100755 src/dynarec/arm64/dynarec_arm64_66.c create mode 100755 src/dynarec/arm64/dynarec_arm64_660f.c create mode 100644 src/dynarec/arm64/dynarec_arm64_6664.c create mode 100755 src/dynarec/arm64/dynarec_arm64_67.c create mode 100644 src/dynarec/arm64/dynarec_arm64_d8.c create mode 100644 src/dynarec/arm64/dynarec_arm64_d9.c create mode 100644 src/dynarec/arm64/dynarec_arm64_db.c create mode 100644 src/dynarec/arm64/dynarec_arm64_dc.c create mode 100644 src/dynarec/arm64/dynarec_arm64_dd.c create mode 100644 src/dynarec/arm64/dynarec_arm64_df.c create mode 100755 src/dynarec/arm64/dynarec_arm64_emit_logic.c create mode 100755 src/dynarec/arm64/dynarec_arm64_emit_math.c create mode 100755 src/dynarec/arm64/dynarec_arm64_emit_shift.c create mode 100755 src/dynarec/arm64/dynarec_arm64_emit_tests.c create mode 100644 src/dynarec/arm64/dynarec_arm64_f0.c create mode 100755 src/dynarec/arm64/dynarec_arm64_f20f.c create mode 100755 src/dynarec/arm64/dynarec_arm64_f30f.c create mode 100755 src/dynarec/arm64/dynarec_arm64_functions.c create mode 100755 src/dynarec/arm64/dynarec_arm64_functions.h create mode 100755 src/dynarec/arm64/dynarec_arm64_helper.c create mode 100755 src/dynarec/arm64/dynarec_arm64_helper.h create mode 100755 src/dynarec/arm64/dynarec_arm64_pass.c create mode 100755 src/dynarec/arm64/dynarec_arm64_pass0.h create mode 100755 src/dynarec/arm64/dynarec_arm64_pass1.h create mode 100755 src/dynarec/arm64/dynarec_arm64_pass2.h create mode 100755 src/dynarec/arm64/dynarec_arm64_pass3.h create mode 100755 src/dynarec/arm64/dynarec_arm64_private.h delete mode 100755 src/dynarec/arm64_emitter.h delete mode 100755 src/dynarec/arm64_epilog.S delete mode 100755 src/dynarec/arm64_lock.S delete mode 100755 src/dynarec/arm64_lock.h delete mode 100755 src/dynarec/arm64_next.S delete mode 100755 src/dynarec/arm64_printer.c delete mode 100644 src/dynarec/arm64_printer.h delete mode 100755 src/dynarec/arm64_prolog.S create mode 100755 src/dynarec/dynarec_arch.h delete mode 100755 src/dynarec/dynarec_arm64.c delete mode 100755 src/dynarec/dynarec_arm64_00.c delete mode 100755 src/dynarec/dynarec_arm64_0f.c delete mode 100644 src/dynarec/dynarec_arm64_64.c delete mode 100755 src/dynarec/dynarec_arm64_66.c delete mode 100755 src/dynarec/dynarec_arm64_660f.c delete mode 100644 src/dynarec/dynarec_arm64_6664.c delete mode 100755 src/dynarec/dynarec_arm64_67.c delete mode 100644 src/dynarec/dynarec_arm64_d8.c delete mode 100644 src/dynarec/dynarec_arm64_d9.c delete mode 100644 src/dynarec/dynarec_arm64_db.c delete mode 100644 src/dynarec/dynarec_arm64_dc.c delete mode 100644 src/dynarec/dynarec_arm64_dd.c delete mode 100644 src/dynarec/dynarec_arm64_df.c delete mode 100755 src/dynarec/dynarec_arm64_emit_logic.c delete mode 100755 src/dynarec/dynarec_arm64_emit_math.c delete mode 100755 src/dynarec/dynarec_arm64_emit_shift.c delete mode 100755 src/dynarec/dynarec_arm64_emit_tests.c delete mode 100644 src/dynarec/dynarec_arm64_f0.c delete mode 100755 src/dynarec/dynarec_arm64_f20f.c delete mode 100755 src/dynarec/dynarec_arm64_f30f.c delete mode 100755 src/dynarec/dynarec_arm64_functions.c delete mode 100755 src/dynarec/dynarec_arm64_functions.h delete mode 100755 src/dynarec/dynarec_arm64_helper.c delete mode 100755 src/dynarec/dynarec_arm64_helper.h delete mode 100755 src/dynarec/dynarec_arm64_pass.c delete mode 100755 src/dynarec/dynarec_arm64_pass0.h delete mode 100755 src/dynarec/dynarec_arm64_pass1.h delete mode 100755 src/dynarec/dynarec_arm64_pass2.h delete mode 100755 src/dynarec/dynarec_arm64_pass3.h delete mode 100755 src/dynarec/dynarec_arm64_private.h create mode 100755 src/dynarec/dynarec_native.c create mode 100755 src/dynarec/native_lock.h delete mode 100755 src/include/dynarec_arm64.h create mode 100755 src/include/dynarec_native.h (limited to 'src') diff --git a/src/custommem.c b/src/custommem.c index ff777bc4..c92b7b03 100644 --- a/src/custommem.c +++ b/src/custommem.c @@ -24,7 +24,7 @@ #include "threads.h" #ifdef DYNAREC #include "dynablock.h" -#include "dynarec/arm64_lock.h" +#include "dynarec/native_lock.h" //#define USE_MMAP @@ -579,17 +579,17 @@ void addDBFromAddressRange(uintptr_t addr, size_t size) int idx1 = (i )&((1<>16); i<=(end>>16); ++i) if(memprot[i]==memprot_default) { uint8_t* newblock = calloc(1<<16, sizeof(uint8_t)); - /*if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { + /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { free(newblock); }*/ memprot[i] = newblock; @@ -795,7 +798,7 @@ void unprotectDB(uintptr_t addr, size_t size) for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) if(memprot[i]==memprot_default) { uint8_t* newblock = calloc(1<<16, sizeof(uint8_t)); - /*if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { + /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { free(newblock); }*/ memprot[i] = newblock; @@ -851,7 +854,7 @@ void updateProtection(uintptr_t addr, size_t size, uint32_t prot) if(memprot[i]==memprot_default) { uint8_t* newblock = calloc(1<<16, sizeof(uint8_t)); #if 0 //def ARM64 //disabled for now, not usefull with the mutex - if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { + if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { free(newblock); } #else @@ -880,7 +883,7 @@ void setProtection(uintptr_t addr, size_t size, uint32_t prot) if(memprot[i]==memprot_default) { uint8_t* newblock = calloc(MEMPROT_SIZE, sizeof(uint8_t)); #if 0 //def ARM64 //disabled for now, not usefull with the mutex - if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { + if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { free(newblock); } #else @@ -906,7 +909,7 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot) if(memprot[i]==memprot_default) { uint8_t* newblock = calloc(1<<16, sizeof(uint8_t)); #if 0 //def ARM64 //disabled for now, not usefull with the mutex - if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { + if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { free(newblock); } #else @@ -973,9 +976,9 @@ void freeProtection(uintptr_t addr, size_t size) memset(block+start, 0, finish-start+1); #if 0 //def ARM64 //disabled for now, not usefull with the mutex if (blockempty(block)) { - block = (void*)arm64_lock_xchg(&memprot[key], (uintptr_t)memprot_default); + block = (void*)native_lock_xchg(&memprot[key], (uintptr_t)memprot_default); if(!blockempty(block)) { - block = (void*)arm64_lock_xchg(&memprot[key], (uintptr_t)block); + block = (void*)native_lock_xchg(&memprot[key], (uintptr_t)block); for (int i = 0; i < 0x10000; ++i) { memprot[key][i] |= block[i]; } @@ -1162,7 +1165,7 @@ void init_custommem_helper(box64context_t* ctx) #ifdef ARM64 if(box64_dynarec) for(int i=0; i<(1<>16)&0xffff, 16);} \ + } +#define MOV64x(Rd, imm64) \ + if(~((uint64_t)(imm64))<0xffff) { \ + MOVNx(Rd, (~(uint64_t)(imm64))&0xffff); \ + } else { \ + MOVZx(Rd, ((uint64_t)(imm64))&0xffff); \ + if(((uint64_t)(imm64))&0xffff0000) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>16)&0xffff, 16);} \ + if(((uint64_t)(imm64))&0xffff00000000LL) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>32)&0xffff, 32);} \ + if(((uint64_t)(imm64))&0xffff000000000000LL) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>48)&0xffff, 48);} \ + } + +#define MOV64xw(Rd, imm64) if(rex.w) {MOV64x(Rd, imm64);} else {MOV32w(Rd, imm64);} + + +// ADD / SUB +#define ADDSUB_REG_gen(sf, op, S, shift, Rm, imm6, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | (shift)<<22 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd)) +#define ADDx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, 0, Rn, Rd)) +#define ADDSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 0, 1, 0b00, Rm, 0, Rn, Rd)) +#define ADDx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, lsl, Rn, Rd)) +#define ADDw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, 0, Rn, Rd)) +#define ADDSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd)) +#define ADDw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, lsl, Rn, Rd)) +#define ADDxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd)) +#define ADDSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd)) +#define ADDxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b01, Rm, lsr, Rn, Rd)) + +#define ADDSUB_IMM_gen(sf, op, S, shift, imm12, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b10001<<24 | (shift)<<22 | (imm12)<<10 | (Rn)<<5 | (Rd)) +#define ADDx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) + +#define SUBx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd)) +#define SUBSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd)) +#define SUBx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, lsl, Rn, Rd)) +#define SUBw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, 0, Rn, Rd)) +#define SUBw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, lsl, Rn, Rd)) +#define SUBSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd)) +#define SUBSw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd)) +#define SUBxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd)) +#define SUBSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd)) +#define CMPSx_REG(Rn, Rm) SUBSx_REG(xZR, Rn, Rm) +#define CMPSw_REG(Rn, Rm) SUBSw_REG(wZR, Rn, Rm) +#define CMPSxw_REG(Rn, Rm) SUBSxw_REG(xZR, Rn, Rm) +#define NEGx_REG(Rd, Rm) SUBx_REG(Rd, xZR, Rm); +#define NEGw_REG(Rd, Rm) SUBw_REG(Rd, wZR, Rm); +#define NEGxw_REG(Rd, Rm) SUBxw_REG(Rd, xZR, Rm); +#define NEGSx_REG(Rd, Rm) SUBSx_REG(Rd, xZR, Rm); +#define NEGSw_REG(Rd, Rm) SUBSw_REG(Rd, wZR, Rm); +#define NEGSxw_REG(Rd, Rm) SUBSxw_REG(Rd, xZR, Rm); + +#define SUBx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define CMPSx_U12(Rn, imm12) SUBSx_U12(xZR, Rn, imm12) +#define CMPSw_U12(Rn, imm12) SUBSw_U12(wZR, Rn, imm12) +#define CMPSxw_U12(Rn, imm12) SUBSxw_U12(xZR, Rn, imm12) + +#define ADDSUBC_gen(sf, op, S, Rm, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b11010000<<21 | (Rm)<<16 | (Rn)<<5 | (Rd)) +#define ADCx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 0, 0, Rm, Rn, Rd)) +#define ADCw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 0, 0, Rm, Rn, Rd)) +#define ADCxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 0, 0, Rm, Rn, Rd)) +#define SBCx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 1, 0, Rm, Rn, Rd)) +#define SBCw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 1, 0, Rm, Rn, Rd)) +#define SBCxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 1, 0, Rm, Rn, Rd)) +#define ADCSx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 0, 1, Rm, Rn, Rd)) +#define ADCSw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 0, 1, Rm, Rn, Rd)) +#define ADCSxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 0, 1, Rm, Rn, Rd)) +#define SBCSx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 1, 1, Rm, Rn, Rd)) +#define SBCSw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 1, 1, Rm, Rn, Rd)) +#define SBCSxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 1, 1, Rm, Rn, Rd)) + +// ADR +#define ADR_gen(immlo, immhi, Rd) ((immlo)<<29 | 0b10000<<24 | (immhi)<<5 | (Rd)) +#define ADR_S20(Rd, imm) EMIT(ADR_gen((imm)&3, ((imm)>>2)&0x7ffff, (Rd)) + +// LDR +#define LDR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) +#define LDRx_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define LDRx_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define LDRw_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define LDRw_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define LDRB_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define LDRB_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define LDRH_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b01, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define LDRH_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b01, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define LDRxw_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(rex.w?0b11:0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) + +#define LDRS_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b10<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) +#define LDRSW_S9_postindex(Rt, Rn, imm9) EMIT(LDRS_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define LDRSW_S9_preindex(Rt, Rn, imm9) EMIT(LDRS_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) + +#define LD_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) +#define LDRx_U12(Rt, Rn, imm12) EMIT(LD_gen(0b11, 0b01, ((uint32_t)((imm12)>>3))&0xfff, Rn, Rt)) +#define LDRw_U12(Rt, Rn, imm12) EMIT(LD_gen(0b10, 0b01, ((uint32_t)((imm12)>>2))&0xfff, Rn, Rt)) +#define LDRB_U12(Rt, Rn, imm12) EMIT(LD_gen(0b00, 0b01, ((uint32_t)((imm12)))&0xfff, Rn, Rt)) +#define LDRH_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt)) +#define LDRxw_U12(Rt, Rn, imm12) EMIT(LD_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt)) + +#define LDS_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b10<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) +#define LDRSW_U12(Rt, Rn, imm12) EMIT(LDS_gen(0b10, 0b01, ((uint32_t)((imm12)>>2))&0xfff, Rn, Rt)) + +#define LDR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b01<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) +#define LDRx_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt)) +#define LDRx_REG_LSL3(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt)) +#define LDRx_REG_UXTW3(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b010, 1, Rn, Rt)) +#define LDRw_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt)) +#define LDRw_REG_LSL2(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10, Rm, 0b011, 1, Rn, Rt)) +#define LDRxw_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10+rex.w, Rm, 0b011, 0, Rn, Rt)) +#define LDRB_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt)) +#define LDRH_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt)) + +#define LDRS_U12_gen(size, op1, opc, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) +#define LDRSHx_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b01, 0b01, 0b10, ((uint32_t)(imm12>>1))&0xfff, Rn, Rt)) +#define LDRSHw_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b01, 0b01, 0b11, ((uint32_t)(imm12>>1))&0xfff, Rn, Rt)) +#define LDRSHxw_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b01, 0b01, rex.w?0b10:0b11, ((uint32_t)(imm12>>1))&0xfff, Rn, Rt)) +#define LDRSBx_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b00, 0b01, 0b10, ((uint32_t)(imm12>>0))&0xfff, Rn, Rt)) +#define LDRSBw_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b00, 0b01, 0b11, ((uint32_t)(imm12>>0))&0xfff, Rn, Rt)) +#define LDRSBxw_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b00, 0b01, rex.w?0b10:0b11, ((uint32_t)(imm12>>0))&0xfff, Rn, Rt)) + +#define LDRS_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b10<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) +#define LDRSW_REG(Rt, Rn, Rm) EMIT(LDRS_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt)) + +#define LDR_PC_gen(opc, imm19, Rt) ((opc)<<30 | 0b011<<27 | (imm19)<<5 | (Rt)) +#define LDRx_literal(Rt, imm19) EMIT(LDR_PC_gen(0b01, ((imm19)>>2)&0x7FFFF, Rt)) + +// STR +#define STR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) +#define STRx_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define STRx_S9_preindex(Rt, Rn, imm9) EMIT(STR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define STRw_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define STRw_S9_preindex(Rt, Rn, imm9) EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define STRxw_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(rex.w?0b11:0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define STRB_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b00, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define STRH_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b01, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) + +#define ST_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) +#define STRx_U12(Rt, Rn, imm12) EMIT(ST_gen(0b11, 0b01, ((uint32_t)((imm12)>>3))&0xfff, Rn, Rt)) +#define STRw_U12(Rt, Rn, imm12) EMIT(ST_gen(0b10, 0b01, ((uint32_t)((imm12)>>2))&0xfff, Rn, Rt)) +#define STRB_U12(Rt, Rn, imm12) EMIT(ST_gen(0b00, 0b01, ((uint32_t)((imm12)))&0xfff, Rn, Rt)) +#define STRH_U12(Rt, Rn, imm12) EMIT(ST_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt)) +#define STRxw_U12(Rt, Rn, imm12) EMIT(ST_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt)) + +#define STR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b00<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) +#define STRx_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt)) +#define STRx_REG_LSL3(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt)) +#define STRx_REG_UXTW(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b010, 0, Rn, Rt)) +#define STRw_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt)) +#define STRw_REG_LSL2(Rt, Rn, Rm) EMIT(STR_REG_gen(0b10, Rm, 0b011, 1, Rn, Rt)) +#define STRB_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt)) +#define STRH_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt)) +#define STRxw_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(rex.w?0b11:0b10, Rm, 0b011, 0, Rn, Rt)) + +// LOAD/STORE PAIR +#define MEMPAIR_gen(size, L, op2, imm7, Rt2, Rn, Rt) ((size)<<31 | 0b101<<27 | (op2)<<23 | (L)<<22 | (imm7)<<15 | (Rt2)<<10 | (Rn)<<5 | (Rt)) + +#define LDPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b01, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define LDPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b01, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define LDPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b01, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b11, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define LDPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b11, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define LDPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b11, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b10, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define LDPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b10, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define LDPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b10, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) + +#define STPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b01, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define STPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b01, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define STPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b01, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define STPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b11, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define STPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b11, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define STPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b11, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define STPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b10, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define STPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b10, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define STPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b10, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) + +// PUSH / POP helper +#define POP1(reg) LDRx_S9_postindex(reg, xRSP, 8) +#define PUSH1(reg) STRx_S9_preindex(reg, xRSP, -8) + +// LOAD/STORE Acquire Exclusive +#define MEMAX_gen(size, L, Rs, Rn, Rt) ((size)<<30 | 0b001000<<24 | (L)<<22 | (Rs)<<16 | 1<<15 | 0b11111<<10 | (Rn)<<5 | (Rt)) +#define LDAXRB(Rt, Rn) EMIT(MEMAX_gen(0b00, 1, 31, Rn, Rt)) +#define STLXRB(Rs, Rt, Rn) EMIT(MEMAX_gen(0b00, 0, Rs, Rn, Rt)) +#define LDAXRH(Rt, Rn) EMIT(MEMAX_gen(0b01, 1, 31, Rn, Rt)) +#define STLXRH(Rs, Rt, Rn) EMIT(MEMAX_gen(0b01, 0, Rs, Rn, Rt)) +#define LDAXRw(Rt, Rn) EMIT(MEMAX_gen(0b10, 1, 31, Rn, Rt)) +#define STLXRw(Rs, Rt, Rn) EMIT(MEMAX_gen(0b10, 0, Rs, Rn, Rt)) +#define LDAXRx(Rt, Rn) EMIT(MEMAX_gen(0b11, 1, 31, Rn, Rt)) +#define STLXRx(Rs, Rt, Rn) EMIT(MEMAX_gen(0b11, 0, Rs, Rn, Rt)) +#define LDAXRxw(Rt, Rn) EMIT(MEMAX_gen(2+rex.w, 1, 31, Rn, Rt)) +#define STLXRxw(Rs, Rt, Rn) EMIT(MEMAX_gen(2+rex.w, 0, Rs, Rn, Rt)) + +#define MEMAX_pair(size, L, Rs, Rt2, Rn, Rt) (1<<31 | (size)<<30 | 0b001000<<24 | (L)<<22 | 1<<21 | (Rs)<<16 | 1<<15 | (Rt2)<<10 | (Rn)<<5 | (Rt)) +#define LDAXPx(Rt, Rt2, Rn) EMIT(MEMAX_pair(1, 1, 31, Rt2, Rn, Rt)) +#define LDAXPw(Rt, Rt2, Rn) EMIT(MEMAX_pair(0, 1, 31, Rt2, Rn, Rt)) +#define LDAXPxw(Rt, Rt2, Rn) EMIT(MEMAX_pair(rex.w, 1, 31, Rt2, Rn, Rt)) +#define STLXPx(Rs, Rt, Rt2, Rn) EMIT(MEMAX_pair(1, 0, Rs, Rt2, Rn, Rt)) +#define STLXPw(Rs, Rt, Rt2, Rn) EMIT(MEMAX_pair(0, 0, Rs, Rt2, Rn, Rt)) +#define STLXPxw(Rs, Rt, Rt2, Rn) EMIT(MEMAX_pair(rex.w, 0, Rs, Rt2, Rn, Rt)) + +// LOAD/STORE Exclusive +#define MEMX_gen(size, L, Rs, Rn, Rt) ((size)<<30 | 0b001000<<24 | (L)<<22 | (Rs)<<16 | 0<<15 | 0b11111<<10 | (Rn)<<5 | (Rt)) +#define LDXRB(Rt, Rn) EMIT(MEMX_gen(0b00, 1, 31, Rn, Rt)) +#define STXRB(Rs, Rt, Rn) EMIT(MEMX_gen(0b00, 0, Rs, Rn, Rt)) +#define LDXRH(Rt, Rn) EMIT(MEMX_gen(0b01, 1, 31, Rn, Rt)) +#define STXRH(Rs, Rt, Rn) EMIT(MEMX_gen(0b01, 0, Rs, Rn, Rt)) +#define LDXRw(Rt, Rn) EMIT(MEMX_gen(0b10, 1, 31, Rn, Rt)) +#define STXRw(Rs, Rt, Rn) EMIT(MEMX_gen(0b10, 0, Rs, Rn, Rt)) +#define LDXRx(Rt, Rn) EMIT(MEMX_gen(0b11, 1, 31, Rn, Rt)) +#define STXRx(Rs, Rt, Rn) EMIT(MEMX_gen(0b11, 0, Rs, Rn, Rt)) +#define LDXRxw(Rt, Rn) EMIT(MEMX_gen(2+rex.w, 1, 31, Rn, Rt)) +#define STXRxw(Rs, Rt, Rn) EMIT(MEMX_gen(2+rex.w, 0, Rs, Rn, Rt)) + +// Prefetch +#define PRFM_register(Rm, option, S, Rn, Rt) (0b11<<30 | 0b111<<27 | 0b10<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | 0b10<<10 | (Rn)<<5 | (Rt)) +#define PLD_L1_KEEP(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00000)) +#define PLD_L2_KEEP(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00010)) +#define PLD_L3_KEEP(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00100)) +#define PLD_L1_STREAM(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00001)) +#define PLD_L2_STREAM(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00011)) +#define PLD_L3_STREAM(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00101)) + +#define PRFM_imm(imm12, Rn, Rt) (0b11<<30 | 0b111<<27 | 0b01<<24 | 0b10<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) +#define PLD_L1_KEEP_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00000)) +#define PLD_L2_KEEP_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00010)) +#define PLD_L3_KEEP_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00100)) +#define PLD_L1_STREAM_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00001)) +#define PLD_L2_STREAM_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00011)) +#define PLD_L3_STREAM_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00101)) + +#define PST_L1_STREAM_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b01001)) + +// Data Memory Barrier +#define DMB_gen(CRm) (0b1101010100<<22 | 0b011<<16 | 0b0011<<12 | (CRm)<<8 | 1<<7 | 0b01<<5 | 0b11111) +#define DMB_ISH() EMIT(DMB_gen(0b1011)) + +// BR and Branches +#define BR_gen(Z, op, A, M, Rn, Rm) (0b1101011<<25 | (Z)<<24 | (op)<<21 | 0b11111<<16 | (A)<<11 | (M)<<10 | (Rn)<<5 | (Rm)) +#define BR(Rn) EMIT(BR_gen(0, 0b00, 0, 0, Rn, 0)) +#define BLR(Rn) EMIT(BR_gen(0, 0b01, 0, 0, Rn, 0)) + +#define CB_gen(sf, op, imm19, Rt) ((sf)<<31 | 0b011010<<25 | (op)<<24 | (imm19)<<5 | (Rt)) +#define CBNZx(Rt, imm19) EMIT(CB_gen(1, 1, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBNZw(Rt, imm19) EMIT(CB_gen(0, 1, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBNZxw(Rt, imm19) EMIT(CB_gen(rex.w, 1, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBZx(Rt, imm19) EMIT(CB_gen(1, 0, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBZw(Rt, imm19) EMIT(CB_gen(0, 0, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBZxw(Rt, imm19) EMIT(CB_gen(rex.w, 0, ((imm19)>>2)&0x7FFFF, Rt)) + +#define TB_gen(b5, op, b40, imm14, Rt) ((b5)<<31 | 0b011011<<25 | (op)<<24 | (b40)<<19 | (imm14)<<5 | (Rt)) +#define TBZ(Rt, bit, imm16) EMIT(TB_gen(((bit)>>5)&1, 0, (bit)&0x1f, ((imm16)>>2)&0x3FFF, Rt)) +#define TBNZ(Rt, bit, imm16) EMIT(TB_gen(((bit)>>5)&1, 1, (bit)&0x1f, ((imm16)>>2)&0x3FFF, Rt)) + +#define Bcond_gen(imm19, cond) (0b0101010<<25 | (imm19)<<5 | (cond)) +#define Bcond(cond, imm19) EMIT(Bcond_gen(((imm19)>>2)&0x7FFFF, cond)) + +#define B_gen(imm26) (0b000101<<26 | (imm26)) +#define B(imm26) EMIT(B_gen(((imm26)>>2)&0x3ffffff)) + +#define BL_gen(imm26) (0b100101<<26 | (imm26)) +#define BL(imm26) EMIT(BL_gen(((imm26)>>2)&0x3ffffff)) + +#define NOP EMIT(0b11010101000000110010000000011111) + +#define CSINC_gen(sf, Rm, cond, Rn, Rd) ((sf)<<31 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | 1<<10 | (Rn)<<5 | (Rd)) +#define CSINCx(Rd, Rn, Rm, cond) EMIT(CSINC_gen(1, Rm, cond, Rn, Rd)) +#define CSINCw(Rd, Rn, Rm, cond) EMIT(CSINC_gen(0, Rm, cond, Rn, Rd)) +#define CSINCxw(Rd, Rn, Rm, cond) EMIT(CSINC_gen(rex.w, Rm, cond, Rn, Rd)) +#define CSETx(Rd, cond) CSINCx(Rd, xZR, xZR, invCond(cond)) +#define CSETw(Rd, cond) CSINCw(Rd, xZR, xZR, invCond(cond)) +#define CSETxw(Rd, cond) CSINCxw(Rd, xZR, xZR, invCond(cond)) + +#define CSINV_gen(sf, Rm, cond, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | (Rd)) +#define CSINVx(Rd, Rn, Rm, cond) EMIT(CSINV_gen(1, Rm, cond, Rn, Rd)) +#define CSINVw(Rd, Rn, Rm, cond) EMIT(CSINV_gen(0, Rm, cond, Rn, Rd)) +#define CSINVxw(Rd, Rn, Rm, cond) EMIT(CSINV_gen(rex.w?, Rm, cond, Rn, Rd)) +#define CINVx(Rd, Rn, cond) CSINVx(Rd, Rn, Rn, invertCond(cond)) +#define CINVw(Rd, Rn, cond) CSINVw(Rd, Rn, Rn, invertCond(cond)) +#define CINVxw(Rd, Rn, cond) CSINVxw(Rd, Rn, Rn, invertCond(cond)) +#define CSETMx(Rd, cond) CSINVx(Rd, xZR, xZR, invCond(cond)) +#define CSETMw(Rd, cond) CSINVw(Rd, xZR, xZR, invCond(cond)) +#define CSETMxw(Rd, cond) CSINVxw(Rd, xZR, xZR, invCond(cond)) + +#define CSEL_gen(sf, Rm, cond, Rn, Rd) ((sf<<31) | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | Rd) +#define CSELx(Rd, Rn, Rm, cond) EMIT(CSEL_gen(1, Rm, cond, Rn, Rd)) +#define CSELw(Rd, Rn, Rm, cond) EMIT(CSEL_gen(0, Rm, cond, Rn, Rd)) +#define CSELxw(Rd, Rn, Rm, cond) EMIT(CSEL_gen(rex.w, Rm, cond, Rn, Rd)) + +#define CSNEG_gen(sf, Rm, cond, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | 1<<10 | (Rn)<<5 | (Rd)) +#define CSNEGx(Rd, Rn, Rm, cond) EMIT(CSNEG_gen(1, Rm, cond, Rn, Rd)) +#define CSNEGw(Rd, Rn, Rm, cond) EMIT(CSNEG_gen(0, Rm, cond, Rn, Rd)) +#define CSNEGxw(Rd, Rn, Rm, cond) EMIT(CSNEG_gen(rex.w, Rm, cond, Rn, Rd)) +#define CNEGx(Rd, Rn, cond) CSNEGx(Rn, Rn, Rn, invCond(cond)) +#define CNEGw(Rd, Rn, cond) CSNEGw(Rn, Rn, Rn, invCond(cond)) +#define CNEGxw(Rd, Rn, cond) CSNEGxw(Rn, Rn, Rn, invCond(cond)) + +// AND / ORR +#define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | Rd) +// logic to get the mask is ... convoluted... list of possible value there: https://gist.github.com/dinfuehr/51a01ac58c0b23e4de9aac313ed6a06a +#define ANDx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b00, N, immr, imms, Rn, Rd)) +#define ANDw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b00, 0, immr, imms, Rn, Rd)) +#define ANDSx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b11, N, immr, imms, Rn, Rd)) +#define ANDSw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd)) +#define ORRx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b01, N, immr, imms, Rn, Rd)) +#define ORRw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b01, 0, immr, imms, Rn, Rd)) +#define EORx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b10, N, immr, imms, Rn, Rd)) +#define EORw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b10, 0, immr, imms, Rn, Rd)) +#define TSTx_mask(Rn, N, immr, imms) ANDSx_mask(xZR, Rn, N, immr, imms) +#define TSTw_mask(Rn, immr, imms) ANDSw_mask(wZR, Rn, immr, imms) + +#define LOGIC_REG_gen(sf, opc, shift, N, Rm, imm6, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b01010<<24 | (shift)<<22 | (N)<<21 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd)) +#define ANDx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 0, Rm, 0, Rn, Rd)) +#define ANDw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 0, Rm, 0, Rn, Rd)) +#define ANDxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 0, Rm, 0, Rn, Rd)) +#define ANDSx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) +#define ANDSw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) +#define ANDSxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) +#define ORRx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, 0, Rn, Rd)) +#define ORRx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, lsl, Rn, Rd)) +#define ORRw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 0, Rm, lsl, Rn, Rd)) +#define ORRxw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b00, 0, Rm, lsl, Rn, Rd)) +#define ORRx_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(1, 0b01, 0b01, 0, Rm, lsr, Rn, Rd)) +#define ORRw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(0, 0b01, 0b01, 0, Rm, lsr, Rn, Rd)) +#define ORRxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b01, 0, Rm, lsr, Rn, Rd)) +#define ORRxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b00, 0, Rm, 0, Rn, Rd)) +#define ORRw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 0, Rm, 0, Rn, Rd)) +#define ORNx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 1, Rm, 0, Rn, Rd)) +#define ORNw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 1, Rm, 0, Rn, Rd)) +#define ORNxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b00, 1, Rm, 0, Rn, Rd)) +#define ORNx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 1, Rm, lsl, Rn, Rd)) +#define EORx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b10, 0b00, 0, Rm, 0, Rn, Rd)) +#define EORw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b10, 0b00, 0, Rm, 0, Rn, Rd)) +#define EORxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b00, 0, Rm, 0, Rn, Rd)) +#define EORx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(1, 0b10, 0b00, 0, Rm, lsl, Rn, Rd)) +#define EORw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(0, 0b10, 0b00, 0, Rm, lsl, Rn, Rd)) +#define EORxw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b00, 0, Rm, lsl, Rn, Rd)) +#define EORx_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(1, 0b10, 0b01, 0, Rm, lsr, Rn, Rd)) +#define EORw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(0, 0b10, 0b01, 0, Rm, lsr, Rn, Rd)) +#define EORxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b01, 0, Rm, lsr, Rn, Rd)) +#define MOVx_REG(Rd, Rm) ORRx_REG(Rd, xZR, Rm) +#define MOVw_REG(Rd, Rm) ORRw_REG(Rd, xZR, Rm) +#define MOVxw_REG(Rd, Rm) ORRxw_REG(Rd, xZR, Rm) +#define LSLw_IMM(Rd, Rm, lsl) ORRw_REG_LSL(Rd, xZR, Rm, lsl) +#define LSLx_IMM(Rd, Rm, lsl) ORRx_REG_LSL(Rd, xZR, Rm, lsl) +#define LSLxw_IMM(Rd, Rm, lsl) ORRxw_REG_LSL(Rd, xZR, Rm, lsl) +#define LSRw_IMM(Rd, Rm, lsr) ORRw_REG_LSR(Rd, xZR, Rm, lsr) +#define LSRx_IMM(Rd, Rm, lsr) ORRx_REG_LSR(Rd, xZR, Rm, lsr) +#define LSRxw_IMM(Rd, Rm, lsr) ORRxw_REG_LSR(Rd, xZR, Rm, lsr) +#define MVNx_REG(Rd, Rm) ORNx_REG(Rd, xZR, Rm) +#define MVNx_REG_LSL(Rd, Rm, lsl) ORNx_REG_LSL(Rd, xZR, Rm, lsl) +#define MVNw_REG(Rd, Rm) ORNw_REG(Rd, xZR, Rm) +#define MVNxw_REG(Rd, Rm) ORNxw_REG(Rd, xZR, Rm) +#define MOV_frmSP(Rd) ADDx_U12(Rd, xSP, 0) +#define MOV_toSP(Rm) ADDx_U12(xSP, Rm, 0) +#define BICx(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICw_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, lsl, Rn, Rd)) +#define BICSx(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICSw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICxw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICSxw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICx_REG BICx +#define BICw_REG BICw +#define BICxw_REG BICxw +#define TSTx_REG(Rn, Rm) ANDSx_REG(xZR, Rn, Rm) +#define TSTw_REG(Rn, Rm) ANDSw_REG(wZR, Rn, Rm) +#define TSTxw_REG(Rn, Rm) ANDSxw_REG(xZR, Rn, Rm) + +// ASRV +#define ASRV_gen(sf, Rm, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define ASRx_REG(Rd, Rn, Rm) EMIT(ASRV_gen(1, Rm, Rn, Rd)) +#define ASRw_REG(Rd, Rn, Rm) EMIT(ASRV_gen(0, Rm, Rn, Rd)) +#define ASRxw_REG(Rd, Rn, Rm) EMIT(ASRV_gen(rex.w, Rm, Rn, Rd)) + +// BFI +#define BFM_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) +#define BFMx(Rd, Rn, immr, imms) EMIT(BFM_gen(1, 0b01, 1, immr, imms, Rn, Rd)) +#define BFMw(Rd, Rn, immr, imms) EMIT(BFM_gen(0, 0b01, 0, immr, imms, Rn, Rd)) +#define BFMxw(Rd, Rn, immr, imms) EMIT(BFM_gen(rex.w, 0b01, rex.w, immr, imms, Rn, Rd)) +#define BFIx(Rd, Rn, lsb, width) BFMx(Rd, Rn, ((-lsb)%64)&0x3f, (width)-1) +#define BFIw(Rd, Rn, lsb, width) BFMw(Rd, Rn, ((-lsb)%32)&0x1f, (width)-1) +#define BFIxw(Rd, Rn, lsb, width) if(rex.w) {BFIx(Rd, Rn, lsb, width);} else {BFIw(Rd, Rn, lsb, width);} +#define BFCx(Rd, lsb, width) BFMx(Rd, xZR, ((-lsb)%64)&0x3f, (width)-1) +#define BFCw(Rd, lsb, width) BFMw(Rd, xZR, ((-lsb)%32)&0x1f, (width)-1) +#define BFCxw(Rd, lsb, width) BFMxw(Rd, xZR, rex.w?(((-lsb)%64)&0x3f):(((-lsb)%32)&0x1f), (width)-1) +// Insert lsb:width part of Rn into low part of Rd (leaving rest of Rd untouched) +#define BFXILx(Rd, Rn, lsb, width) EMIT(BFM_gen(1, 0b01, 1, (lsb), (lsb)+(width)-1, Rn, Rd)) +// Insert lsb:width part of Rn into low part of Rd (leaving rest of Rd untouched) +#define BFXILw(Rd, Rn, lsb, width) EMIT(BFM_gen(0, 0b01, 0, (lsb), (lsb)+(width)-1, Rn, Rd)) +// Insert lsb:width part of Rn into low part of Rd (leaving rest of Rd untouched) +#define BFXILxw(Rd, Rn, lsb, width) EMIT(BFM_gen(rex.w, 0b01, rex.w, (lsb), (lsb)+(width)-1, Rn, Rd)) + +// UBFX +#define UBFM_gen(sf, N, immr, imms, Rn, Rd) ((sf)<<31 | 0b10<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) +#define UBFMx(Rd, Rn, immr, imms) EMIT(UBFM_gen(1, 1, immr, imms, Rn, Rd)) +#define UBFMw(Rd, Rn, immr, imms) EMIT(UBFM_gen(0, 0, immr, imms, Rn, Rd)) +#define UBFMxw(Rd, Rn, immr, imms) EMIT(UBFM_gen(rex.w, rex.w, immr, imms, Rn, Rd)) +#define UBFXx(Rd, Rn, lsb, width) EMIT(UBFM_gen(1, 1, (lsb), (lsb)+(width)-1, Rn, Rd)) +#define UBFXw(Rd, Rn, lsb, width) EMIT(UBFM_gen(0, 0, (lsb), (lsb)+(width)-1, Rn, Rd)) +#define UBFXxw(Rd, Rn, lsb, width) EMIT(UBFM_gen(rex.w, rex.w, (lsb), (lsb)+(width)-1, Rn, Rd)) +#define UXTBx(Rd, Rn) EMIT(UBFM_gen(1, 1, 0, 7, Rn, Rd)) +#define UXTBw(Rd, Rn) EMIT(UBFM_gen(0, 0, 0, 7, Rn, Rd)) +#define UXTBxw(Rd, Rn) EMIT(UBFM_gen(rex.w, rex.w, 0, 7, Rn, Rd)) +#define UXTHx(Rd, Rn) EMIT(UBFM_gen(1, 1, 0, 15, Rn, Rd)) +#define UXTHw(Rd, Rn) EMIT(UBFM_gen(0, 0, 0, 15, Rn, Rd)) +#define LSRx(Rd, Rn, shift) EMIT(UBFM_gen(1, 1, shift, 63, Rn, Rd)) +#define LSRw(Rd, Rn, shift) EMIT(UBFM_gen(0, 0, shift, 31, Rn, Rd)) +#define LSRxw(Rd, Rn, shift) EMIT(UBFM_gen(rex.w, rex.w, shift, (rex.w)?63:31, Rn, Rd)) +#define LSLx(Rd, Rn, lsl) UBFMx(Rd, Rn, ((-(lsl))%64)&63, 63-(lsl)) +#define LSLw(Rd, Rn, lsl) UBFMw(Rd, Rn, ((-(lsl))%32)&31, 31-(lsl)) +#define LSLxw(Rd, Rn, lsl) UBFMxw(Rd, Rn, rex.w?(((-(lsl))%64)&63):(((-(lsl))%32)&31), (rex.w?63:31)-(lsl)) +// Take width first bits from Rn, LSL lsb and create Rd +#define UBFIZx(Rd, Rn, lsb, width) UBFMx(Rd, Rn, ((-(lsb))%64)&63, width-1) +// Take width first bits from Rn, LSL lsb and create Rd +#define UBFIZw(Rd, Rn, lsb, width) UBFMw(Rd, Rn, ((-(lsb))%32)&31, width-1) +// Take width first bits from Rn, LSL lsb and create Rd +#define UBFIZxw(Rd, Rn, lsb, width) UBFMxw(Rd, Rn, rex.w?(((-(lsb))%64)&63):(((-(lsb))%32)&31), width-1) + +// SBFM +#define SBFM_gen(sf, N, immr, imms, Rn, Rd) ((sf)<<31 | 0b00<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) +#define SBFMx(Rd, Rn, immr, imms) EMIT(SBFM_gen(1, 1, immr, imms, Rn, Rd)) +#define SBFMw(Rd, Rn, immr, imms) EMIT(SBFM_gen(0, 0, immr, imms, Rn, Rd)) +#define SBFMxw(Rd, Rn, immr, imms) EMIT(SBFM_gen(rex.w, rex.w, immr, imms, Rn, Rd)) +#define SBFXx(Rd, Rn, lsb, width) SBFMx(Rd, Rn, lsb, lsb+width-1) +#define SBFXw(Rd, Rn, lsb, width) SBFMw(Rd, Rn, lsb, lsb+width-1) +#define SBFXxw(Rd, Rn, lsb, width) SBFMxw(Rd, Rn, lsb, lsb+width-1) +#define SXTBx(Rd, Rn) SBFMx(Rd, Rn, 0, 7) +#define SXTBw(Rd, Rn) SBFMw(Rd, Rn, 0, 7) +#define SXTHx(Rd, Rn) SBFMx(Rd, Rn, 0, 15) +#define SXTHw(Rd, Rn) SBFMw(Rd, Rn, 0, 15) +#define SXTHxw(Rd, Rn) SBFMxw(Rd, Rn, 0, 15) +#define SXTWx(Rd, Rn) SBFMx(Rd, Rn, 0, 31) +#define ASRx(Rd, Rn, shift) SBFMx(Rd, Rn, shift, 63) +#define ASRw(Rd, Rn, shift) SBFMw(Rd, Rn, shift, 31) +#define ASRxw(Rd, Rn, shift) SBFMxw(Rd, Rn, shift, rex.w?63:31) +#define SBFIZx(Rd, Rn, lsb, width) SFBFMx(Rd, Rn, ((-(lsb))%64), (width)-1) +#define SBFIZw(Rd, Rn, lsb, width) SFBFMw(Rd, Rn, ((-(lsb))%32), (width)-1) +#define SBFIZxw(Rd, Rn, lsb, width) SFBFMxw(Rd, Rn, ((-(lsb))%(rex.w?64:32)), (width)-1) + +// EXTR +#define EXTR_gen(sf, N, Rm, imms, Rn, Rd) ((sf)<<31 | 0b00<<29 | 0b100111<<23 | (N)<<22 | (Rm)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) +#define EXTRx(Rd, Rn, Rm, lsb) EMIT(EXTR_gen(1, 1, Rm, lsb, Rn, Rd)) +#define EXTRw(Rd, Rn, Rm, lsb) EMIT(EXTR_gen(0, 0, Rm, lsb, Rn, Rd)) +#define EXTRxw(Rd, Rn, Rm, lsb) EMIT(EXTR_gen(rex.w, rex.w, Rm, lsb, Rn, Rd)) +#define RORx(Rd, Rn, lsb) EMIT(EXTR_gen(1, 1, Rn, lsb, Rn, Rd)) +#define RORw(Rd, Rn, lsb) EMIT(EXTR_gen(0, 0, Rn, lsb, Rn, Rd)) +#define RORxw(Rd, Rn, lsb) EMIT(EXTR_gen(rex.w, rex.w, Rn, lsb, Rn, Rd)) + +// RORV +#define RORV_gen(sf, Rm, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | 0b11<<10 | (Rn)<<5 | (Rd)) +#define RORx_REG(Rd, Rn, Rm) EMIT(RORV_gen(1, Rm, Rn, Rd)) +#define RORw_REG(Rd, Rn, Rm) EMIT(RORV_gen(0, Rm, Rn, Rd)) +#define RORxw_REG(Rd, Rn, Rm) EMIT(RORV_gen(rex.w, Rm, Rn, Rd)) + + +// LSRV / LSLV +#define LS_V_gen(sf, Rm, op2, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | (op2)<<10 | (Rn)<<5 | (Rd)) +#define LSRx_REG(Rd, Rn, Rm) EMIT(LS_V_gen(1, Rm, 0b01, Rn, Rd)) +#define LSRw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(0, Rm, 0b01, Rn, Rd)) +#define LSRxw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(rex.w, Rm, 0b01, Rn, Rd)) + +#define LSLx_REG(Rd, Rn, Rm) EMIT(LS_V_gen(1, Rm, 0b00, Rn, Rd)) +#define LSLw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(0, Rm, 0b00, Rn, Rd)) +#define LSLxw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(rex.w, Rm, 0b00, Rn, Rd)) + +// UMULL / SMULL +#define MADDL_gen(U, Rm, o0, Ra, Rn, Rd) (1<<31 | 0b11011<<24 | (U)<<23 | 0b01<<21 | (Rm)<<16 | (o0)<<15 | (Ra)<<10 | (Rn)<<5 | (Rd)) +#define UMADDL(Xd, Wn, Wm, Xa) EMIT(MADDL_gen(1, Wm, 0, Xa, Wn, Xd)) +#define UMULL(Xd, Wn, Wm) UMADDL(Xd, Wn, Wm, xZR) +#define SMADDL(Xd, Wn, Wm, Xa) EMIT(MADDL_gen(0, Wm, 0, Xa, Wn, Xd)) +#define SMULL(Xd, Wn, Wm) SMADDL(Xd, Wn, Wm, xZR) + +#define MULH_gen(U, Rm, Rn, Rd) (1<<31 | 0b11011<<24 | (U)<<23 | 0b10<<21 | (Rm)<<16 | 0b11111<<10 | (Rn)<<5 | (Rd)) +#define UMULH(Xd, Xn, Xm) EMIT(MULH_gen(1, Xm, Xn, Xd)) +#define SMULH(Xd, Xn, Xm) EMIT(MULH_gen(0, Xm, Xn, Xd)) + +#define MADD_gen(sf, Rm, o0, Ra, Rn, Rd) ((sf)<<31 | 0b11011<<24 | (Rm)<<16 | (o0)<<15 | (Ra)<<10 | (Rn)<<5 | (Rd)) +#define MADDx(Rd, Rn, Rm, Ra) EMIT(MADD_gen(1, Rm, 0, Ra, Rn, Rd)) +#define MADDw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(0, Rm, 0, Ra, Rn, Rd)) +#define MADDxw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(rex.w, Rm, 0, Ra, Rn, Rd)) +#define MULx(Rd, Rn, Rm) MADDx(Rd, Rn, Rm, xZR) +#define MULw(Rd, Rn, Rm) MADDw(Rd, Rn, Rm, xZR) +#define MULxw(Rd, Rn, Rm) MADDxw(Rd, Rn, Rm, xZR) +#define MSUBx(Rd, Rn, Rm, Ra) EMIT(MADD_gen(1, Rm, 1, Ra, Rn, Rd)) +#define MSUBw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(0, Rm, 1, Ra, Rn, Rd)) +#define MSUBxw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(rex.w, Rm, 1, Ra, Rn, Rd)) +#define MNEGx(Rd, Rn, Rm) EMIT(MADD_gen(1, Rm, 1, xZR, Rn, Rd)) +#define MNEGw(Rd, Rn, Rm) EMIT(MADD_gen(0, Rm, 1, xZR, Rn, Rd)) +#define MNEGxw(Rd, Rn, Rm) EMIT(MADD_gen(rex.w, Rm, 1, xZR, Rn, Rd)) + + +// DIV +#define DIV_gen(sf, Rm, o1, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b00001<<11 | (o1)<<10 | (Rn)<<5 | (Rd)) +#define UDIVw(Wd, Wn, Wm) EMIT(DIV_gen(0, Wm, 0, Wn, Wd)) +#define UDIVx(Xd, Xn, Xm) EMIT(DIV_gen(1, Xm, 0, Xn, Xd)) +#define SDIVw(Wd, Wn, Wm) EMIT(DIV_gen(0, Wm, 1, Wn, Wd)) +#define SDIVx(Xd, Xn, Xm) EMIT(DIV_gen(1, Xm, 1, Xn, Xd)) + +// CLZ +#define CL_gen(sf, op, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010110<<21 | 0b00010<<11 | (op)<<10 | (Rn)<<5 | (Rd)) +#define CLZx(Rd, Rn) EMIT(CL_gen(1, 0, Rn, Rd)) +#define CLZw(Rd, Rn) EMIT(CL_gen(0, 0, Rn, Rd)) +#define CLZxw(Rd, Rn) EMIT(CL_gen(rex.w, 0, Rn, Rd)) +#define CLSx(Rd, Rn) EMIT(CL_gen(1, 1, Rn, Rd)) +#define CLSw(Rd, Rn) EMIT(CL_gen(0, 1, Rn, Rd)) +#define CLSxw(Rd, Rn) EMIT(CL_gen(rex.w, 1, Rn, Rd)) + +// RBIT +#define RBIT_gen(sf, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010110<<21 | (Rn)<<5 | (Rd)) +#define RBITx(Rd, Rn) EMIT(RBIT_gen(1, Rn, Rd)) +#define RBITw(Rd, Rn) EMIT(RBIT_gen(0, Rn, Rd)) +#define RBITxw(Rd, Rn) EMIT(RBIT_gen(rex.w, Rn, Rd)) + +// REV +#define REV_gen(sf, opc, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010110<<21 | (opc)<<10 | (Rn)<<5 | (Rd)) +#define REV64x(Rd, Rn) EMIT(REV_gen(1, 0b11, Rn, Rd)) +#define REV32w(Rd, Rn) EMIT(REV_gen(0, 0b10, Rn, Rd)) +#define REVxw(Rd, Rn) EMIT(REV_gen(rex.w, 0b10|rex.w, Rn, Rd)) +#define REV16w(Rd, Rn) EMIT(REV_gen(0, 0b01, Rn, Rd)) +#define REV16x(Rd, Rn) EMIT(REV_gen(1, 0b01, Rn, Rd)) + +// MRS +#define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt) (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt)) +// mrs x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 +// MRS : from System register +#define MRS_nzvc(Rt) EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt)) +// MSR : to System register +#define MSR_nzvc(Rt) EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt)) +// mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=0 +#define MRS_fpcr(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt)) +#define MSR_fpcr(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt)) +// mrs x0, fpsr : 1101010100 1 1 1 011 0100 0100 001 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=1 +#define MRS_fpsr(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 1, Rt)) +#define MSR_fpsr(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 1, Rt)) +// NEON Saturation Bit +#define FPSR_QC 27 +// NEON Input Denormal Cumulative +#define FPSR_IDC 7 +// NEON IneXact Cumulative +#define FPSR_IXC 4 +// NEON Underflow Cumulative +#define FPSR_UFC 3 +// NEON Overflow Cumulative +#define FPSR_OFC 2 +// NEON Divide by 0 Cumulative +#define FPSR_DZC 1 +// NEON Invalid Operation Cumulative +#define FPSR_IOC 0 + +// FCSEL +#define FCSEL_scalar(type, Rm, cond, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | (cond)<<12 | 0b11<<10 | (Rn)<<5 | (Rd)) +#define FCSELS(Sd, Sn, Sm, cond) EMIT(FCSEL_scalar(0b00, Sm, cond, Sn, Sd)) +#define FCSELD(Dd, Dn, Dm, cond) EMIT(FCSEL_scalar(0b01, Dm, cond, Dn, Dd)) + +// VLDR +#define VMEM_gen(size, opc, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | 0b01<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) +// imm14 must be 3-aligned +#define VLDR32_U12(Dt, Rn, imm14) EMIT(VMEM_gen(0b10, 0b01, ((uint32_t)((imm14)>>2))&0xfff, Rn, Dt)) +// imm15 must be 3-aligned +#define VLDR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b01, ((uint32_t)((imm15)>>3))&0xfff, Rn, Dt)) +// imm16 must be 4-aligned +#define VLDR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b00, 0b11, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt)) +// (imm14) must be 3-aligned +#define VSTR32_U12(Dt, Rn, imm14) EMIT(VMEM_gen(0b10, 0b00, ((uint32_t)(imm14>>2))&0xfff, Rn, Dt)) +// (imm15) must be 3-aligned +#define VSTR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b00, ((uint32_t)(imm15>>3))&0xfff, Rn, Dt)) +// imm16 must be 4-aligned +#define VSTR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b00, 0b10, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt)) +// (imm14) must be 1-aligned +#define VSTR16_U12(Ht, Rn, imm14) EMIT(VMEM_gen(0b01, 0b00, ((uint32_t)(imm14>>1))&0xfff, Rn, Ht)) + +#define VMEMUR_vector(size, opc, imm9, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (Rn)<<5 | (Rt)) +// signed offset, no alignement! +#define VLDR8_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b01, (imm9)&0b111111111, Rn, Vt)) +#define VLDR16_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b01, 0b01, (imm9)&0b111111111, Rn, Vt)) +#define VLDR32_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b10, 0b01, (imm9)&0b111111111, Rn, Vt)) +#define VLDR64_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b11, 0b01, (imm9)&0b111111111, Rn, Vt)) +#define VLDR128_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b11, (imm9)&0b111111111, Rn, Vt)) +// signed offset, no alignement! +#define VSTR8_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b00, (imm9)&0b111111111, Rn, Vt)) +#define VSTR16_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b01, 0b00, (imm9)&0b111111111, Rn, Vt)) +#define VSTR32_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b10, 0b00, (imm9)&0b111111111, Rn, Vt)) +#define VSTR64_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b11, 0b00, (imm9)&0b111111111, Rn, Vt)) +#define VSTR128_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b10, (imm9)&0b111111111, Rn, Vt)) + +#define VMEMW_gen(size, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) +#define VLDR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define VLDR64_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define VLDR128_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b11, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define VLDR128_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b11, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define VSTR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define VSTR64_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define VSTR128_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b10, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define VSTR128_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b10, (imm9)&0x1ff, 0b11, Rn, Rt)) + +#define VMEM_REG_gen(size, opc, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | 0b10<<10 | (Rn)<<5 | (Rt)) + +#define VLDR32_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 0, Rn, Dt)) +#define VLDR32_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 1, Rn, Dt)) +#define VLDR64_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 0, Rn, Dt)) +#define VLDR64_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 1, Rn, Dt)) +#define VLDR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 0, Rn, Dt)) +#define VLDR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 1, Rn, Dt)) + +#define VSTR32_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 0, Rn, Dt)) +#define VSTR32_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 1, Rn, Dt)) +#define VSTR64_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 0, Rn, Dt)) +#define VSTR64_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 1, Rn, Dt)) +#define VSTR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt)) +#define VSTR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt)) + +#define VLDR_PC_gen(opc, imm19, Rt) ((opc)<<30 | 0b011<<27 | 1<<26 | (imm19)<<5 | (Rt)) +#define VLDR32_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b00, ((imm19)>>2)&0x7FFFF, Vt)) +#define VLDR64_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b01, ((imm19)>>2)&0x7FFFF, Vt)) +#define VLDR128_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b10, ((imm19)>>2)&0x7FFFF, Vt)) + + +#define LD1R_gen(Q, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | 0b110<<13 | (size)<<10 | (Rn)<<5 | (Rt)) +#define VLDQ1R_8(Vt, Rn) EMIT(LD1R_gen(1, 0b00, Rn, Vt)) +#define VLDQ1R_16(Vt, Rn) EMIT(LD1R_gen(1, 0b01, Rn, Vt)) +#define VLDQ1R_32(Vt, Rn) EMIT(LD1R_gen(1, 0b10, Rn, Vt)) +#define VLDQ1R_64(Vt, Rn) EMIT(LD1R_gen(1, 0b11, Rn, Vt)) +#define VLD1R_8(Vt, Rn) EMIT(LD1R_gen(0, 0b00, Rn, Vt)) +#define VLD1R_16(Vt, Rn) EMIT(LD1R_gen(0, 0b01, Rn, Vt)) +#define VLD1R_32(Vt, Rn) EMIT(LD1R_gen(0, 0b10, Rn, Vt)) + +#define LD1_single(Q, opcode, S, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | (opcode)<<13 | (S)<<12 | (size)<<10 | (Rn)<<5 | (Rt)) +#define VLD1_8(Vt, index, Rn) EMIT(LD1_single(((index)>>3)&1, 0b000, ((index)>>2)&1, (index)&3, Rn, Vt)) +#define VLD1_16(Vt, index, Rn) EMIT(LD1_single(((index)>>2)&1, 0b010, ((index)>>1)&1, ((index)&1)<<1, Rn, Vt)) +#define VLD1_32(Vt, index, Rn) EMIT(LD1_single(((index)>>1)&1, 0b100, ((index))&1, 0b00, Rn, Vt)) +#define VLD1_64(Vt, index, Rn) EMIT(LD1_single(((index))&1, 0b100, 0, 0b01, Rn, Vt)) + +#define ST1_single(Q, opcode, S, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 0<<22 | 0<<21 | (opcode)<<13 | (S)<<12 | (size)<<10 | (Rn)<<5 | (Rt)) +#define VST1_8(Vt, index, Rn) EMIT(ST1_single(((index)>>3)&1, 0b000, ((index)>>2)&1, (index)&3, Rn, Vt)) +#define VST1_16(Vt, index, Rn) EMIT(ST1_single(((index)>>2)&1, 0b010, ((index)>>1)&1, ((index)&1)<<1, Rn, Vt)) +#define VST1_32(Vt, index, Rn) EMIT(ST1_single(((index)>>1)&1, 0b100, ((index))&1, 0b00, Rn, Vt)) +#define VST1_64(Vt, index, Rn) EMIT(ST1_single(((index))&1, 0b100, 0, 0b01, Rn, Vt)) + +// LOGIC +#define VLOGIC_gen(Q, opc2, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (opc2)<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VEORQ(Vd, Vn, Vm) EMIT(VLOGIC_gen(1, 0b00, Vm, Vn, Vd)) +#define VEOR(Vd, Vn, Vm) EMIT(VLOGIC_gen(0, 0b00, Vm, Vn, Vd)) + +#define VLOGIC_immediate(Q, op, abc, cmade, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd)) +//#define V + +#define SHL_vector(Q, immh, immb, Rn, Rd) ((Q)<<30 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VSHLQ_8(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0001, (shift)&7, Vn, Vd)) +#define VSHLQ_16(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0010 | (((shift)>>3)&1), (shift)&7, Vn, Vd)) +#define VSHLQ_32(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) +#define VSHLQ_64(Vd, Vn, shift) EMIT(SHL_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) +#define VSHL_8(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0001, (shift)&7, Vn, Vd)) +#define VSHL_16(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0010 | (((shift)>>3)&1), (shift)&7, Vn, Vd)) +#define VSHL_32(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) + +#define SHL_scalar(U, size, Rm, R, S, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b010<<13 | (R)<<12 | (S)<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define SSHL_R_64(Vd, Vn, Vm) EMIT(SHL_scalar(0, 0b11, Vm, 0, 0, Vn, Vd)) +#define USHL_R_64(Vd, Vn, Vm) EMIT(SHL_scalar(1, 0b11, Vm, 0, 0, Vn, Vd)) + +#define SHL_scalar_imm(U, immh, immb, Rn, Rd) (0b01<<30 | 0b111110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define SHL_64(Vd, Vn, shift) EMIT(SHL_scalar_imm(0, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) + +#define SHL_vector_vector(Q, U, size, Rm, R, S, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b010<<13 | (R)<<12 | (S)<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define SSHL_8(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 0, 0b00, Vm, 0, 0, Vn, Vd)) +#define SSHL_16(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 0, 0b01, Vm, 0, 0, Vn, Vd)) +#define SSHL_32(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 0, 0b10, Vm, 0, 0, Vn, Vd)) +#define SSHLQ_8(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 0, 0b00, Vm, 0, 0, Vn, Vd)) +#define SSHLQ_16(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 0, 0b01, Vm, 0, 0, Vn, Vd)) +#define SSHLQ_32(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 0, 0b10, Vm, 0, 0, Vn, Vd)) +#define SSHLQ_64(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 0, 0b11, Vm, 0, 0, Vn, Vd)) +#define USHL_8(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 1, 0b00, Vm, 0, 0, Vn, Vd)) +#define USHL_16(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 1, 0b01, Vm, 0, 0, Vn, Vd)) +#define USHL_32(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 1, 0b10, Vm, 0, 0, Vn, Vd)) +#define USHLQ_8(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 1, 0b00, Vm, 0, 0, Vn, Vd)) +#define USHLQ_16(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 1, 0b01, Vm, 0, 0, Vn, Vd)) +#define USHLQ_32(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 1, 0b10, Vm, 0, 0, Vn, Vd)) +#define USHLQ_64(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 1, 0b11, Vm, 0, 0, Vn, Vd)) + +#define SHR_vector(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b00000<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VSHRQ_8(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0001, (8-(shift))&7, Vn, Vd)) +#define VSHRQ_16(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) +#define VSHRQ_32(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) +#define VSHRQ_64(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, Vn, Vd)) +#define VSHR_8(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0001, (8-(shift))&7, Vn, Vd)) +#define VSHR_16(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) +#define VSHR_32(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) +#define VSSHRQ_8(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0001, (8-(shift))&7, Vn, Vd)) +#define VSSHRQ_16(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) +#define VSSHRQ_32(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) +#define VSSHRQ_64(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, Vn, Vd)) +#define VSSHR_8(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0001, (8-(shift))&7, Vn, Vd)) +#define VSSHR_16(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) +#define VSSHR_32(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) + +#define SHR_scalar_imm(U, immh, immb, o1, o0, Rn, Rd) (0b01<<30 | (U)<<29 | 0b111110<<23 | (immh)<<19 | (immb)<<16 | (o1)<<13 | (o0)<<12 | 1<<10 | (Rn)<<5 | (Rd)) +#define SSHR_64(Vd, Vn, shift) EMIT(SHR_scalar_imm(0, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, 0, 0, Vn, Vd)) +#define USHR_64(Vd, Vn, shift) EMIT(SHR_scalar_imm(1, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, 0, 0, Vn, Vd)) + +#define EXT_vector(Q, Rm, imm4, Rn, Rd) ((Q)<<30 | 0b101110<<24 | (Rm)<<16 | (imm4)<<11 | (Rn)<<5 | (Rd)) +#define VEXT_8(Rd, Rn, Rm, index) EMIT(EXT_vector(0, Rm, index, Rn, Rd)) +#define VEXTQ_8(Rd, Rn, Rm, index) EMIT(EXT_vector(1, Rm, index, Rn, Rd)) + +// Shift Left and Insert (not touching lower part of dest) +#define SLI_vector(Q, immh, immb, Rn, Rd) ((Q)<<30 | 1<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<1 | 1<<10 | (Rn)<<5 | (Rd)) +#define VSLIQ_8(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0001, (shift)&7, Vn, Vd)) +#define VSLIQ_16(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) +#define VSLIQ_32(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) +#define VSLIQ_64(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) +#define VSLI_8(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0001, (shift)&7, Vn, Vd)) +#define VSLI_16(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) +#define VSLI_32(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) + +// Shift Right and Insert (not touching higher part of dest) +#define SRI_vector(Q, immh, immb, Rn, Rd) ((Q)<<30 | 1<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01000<<1 | 1<<10 | (Rn)<<5 | (Rd)) +#define VSRIQ_8(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0001, (shift)&7, Vn, Vd)) +#define VSRIQ_16(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) +#define VSRIQ_32(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) +#define VSRIQ_64(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) +#define VSRI_8(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0001, (shift)&7, Vn, Vd)) +#define VSRI_16(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) +#define VSRI_32(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) + +// Integer MATH +#define ADDSUB_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10000<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VADDQ_8(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 0, 0b00, Vm, Vn, Vd)) +#define VADDQ_16(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 0, 0b01, Vm, Vn, Vd)) +#define VADDQ_32(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 0, 0b10, Vm, Vn, Vd)) +#define VADDQ_64(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 0, 0b11, Vm, Vn, Vd)) +#define VADD_8(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 0, 0b00, Vm, Vn, Vd)) +#define VADD_16(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 0, 0b01, Vm, Vn, Vd)) +#define VADD_32(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 0, 0b10, Vm, Vn, Vd)) +#define VSUBQ_8(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 1, 0b00, Vm, Vn, Vd)) +#define VSUBQ_16(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 1, 0b01, Vm, Vn, Vd)) +#define VSUBQ_32(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 1, 0b10, Vm, Vn, Vd)) +#define VSUBQ_64(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 1, 0b11, Vm, Vn, Vd)) +#define VSUB_8(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 1, 0b00, Vm, Vn, Vd)) +#define VSUB_16(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 1, 0b01, Vm, Vn, Vd)) +#define VSUB_32(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 1, 0b10, Vm, Vn, Vd)) + +#define NEGABS_vector(Q, U, size, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b01011<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define NEG_8(Vd, Vn) EMIT(NEGABS_vector(0, 1, 0b00, Vn, Vd)) +#define NEG_16(Vd, Vn) EMIT(NEGABS_vector(0, 1, 0b01, Vn, Vd)) +#define NEG_32(Vd, Vn) EMIT(NEGABS_vector(0, 1, 0b10, Vn, Vd)) +#define NEGQ_8(Vd, Vn) EMIT(NEGABS_vector(1, 1, 0b00, Vn, Vd)) +#define NEGQ_16(Vd, Vn) EMIT(NEGABS_vector(1, 1, 0b01, Vn, Vd)) +#define NEGQ_32(Vd, Vn) EMIT(NEGABS_vector(1, 1, 0b10, Vn, Vd)) +#define NEGQ_64(Vd, Vn) EMIT(NEGABS_vector(1, 1, 0b11, Vn, Vd)) +#define ABS_8(Vd, Vn) EMIT(NEGABS_vector(0, 0, 0b00, Vn, Vd)) +#define ABS_16(Vd, Vn) EMIT(NEGABS_vector(0, 0, 0b01, Vn, Vd)) +#define ABS_32(Vd, Vn) EMIT(NEGABS_vector(0, 0, 0b10, Vn, Vd)) +#define ABSQ_8(Vd, Vn) EMIT(NEGABS_vector(1, 0, 0b00, Vn, Vd)) +#define ABSQ_16(Vd, Vn) EMIT(NEGABS_vector(1, 0, 0b01, Vn, Vd)) +#define ABSQ_32(Vd, Vn) EMIT(NEGABS_vector(1, 0, 0b10, Vn, Vd)) +#define ABSQ_64(Vd, Vn) EMIT(NEGABS_vector(1, 0, 0b11, Vn, Vd)) + +#define NEGABS_vector_scalar(U, size, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (size)<<22 | 0b10000<<17 | 0b01011<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define NEG_64(Vd, Vn) EMIT(NEGABS_vector_scalar(1, 0b11, Vn, Vd)) +#define ABS_64(Vd, Vn) EMIT(NEGABS_vector_scalar(0, 0b11, Vn, Vd)) + +// FMOV +#define FMOV_general(sf, type, mode, opcode, Rn, Rd) ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (mode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd)) +// 32-bit to single-precision +#define FMOVSw(Sd, Wn) EMIT(FMOV_general(0, 0b00, 0b00, 0b111, Wn, Sd)) +// Single-precision to 32-bit +#define FMOVwS(Wd, Sn) EMIT(FMOV_general(0, 0b00, 0b00, 0b110, Sn, Wd)) +// 64-bit to double-precision +#define FMOVDx(Dd, Xn) EMIT(FMOV_general(1, 0b01, 0b00, 0b111, Xn, Dd)) +// 64-bit to top half of 128-bit +#define FMOVD1x(Vd, Xn) EMIT(FMOV_general(1, 0b10, 0b01, 0b111, Xn, Vd)) +// Double-precision to 64-bit +#define FMOVxD(Xd, Dn) EMIT(FMOV_general(1, 0b01, 0b00, 0b110, Dn, Xd)) +// Top half of 128-bit to 64-bit +#define FMOVxD1(Xd, Vn) EMIT(FMOV_general(1, 0b10, 0b01, ob110, Vn, Xd)) + +#define FMOV_register(type, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b10000<<10 | (Rn)<<5 | (Rd)) +#define FMOVS(Sd, Sn) EMIT(FMOV_register(0b00, Sn, Sd)) +#define FMOVD(Dd, Dn) EMIT(FMOV_register(0b01, Dn, Dd)) + +#define FMOV_vector_imm(Q, op, abc, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | 0b1111<<12 | 1<<10 | (defgh)<<5 | (Rd)) +#define VFMOVS_8(Vd, u8) EMIT(FMOV_vector_imm(0, 0, ((u8)>>5)&0b111, (u8)&0b11111, Vd)) +#define VFMOVSQ_8(Vd, u8) EMIT(FMOV_vector_imm(1, 0, ((u8)>>5)&0b111, (u8)&0b11111, Vd)) +#define VFMOVDQ_8(Vd, u8) EMIT(FMOV_vector_imm(1, 1, ((u8)>>5)&0b111, (u8)&0b11111, Vd)) + +#define FMOV_scalar_imm(type, imm8, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (imm8)<<13 | 0b100<<10 | (Rd)) +#define FMOVS_8(Sd, u8) EMIT(FMOV_scalar_imm(0b00, u8, Sd)) +#define FMOVD_8(Dd, u8) EMIT(FMOV_scalar_imm(0b01, u8, Dd)) + +// VMOV +#define VMOV_element(imm5, imm4, Rn, Rd) (1<<30 | 1<<29 | 0b01110000<<21 | (imm5)<<16 | (imm4)<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VMOVeB(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<1) | 1, (i2), Vn, Vd)) +#define VMOVeH(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<2) | 2, (i2)<<1, Vn, Vd)) +#define VMOVeS(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<3) | 4, (i2)<<2, Vn, Vd)) +#define VMOVeD(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<4) | 8, (i2)<<3, Vn, Vd)) + +#define VMOV_from(imm5, Rn, Rd) (1<<30 | 0<<29 | 0b01110000<<21 | (imm5)<<16 | 0b0011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VMOVQBfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<1) | 1, Wn, Vd)) +#define VMOVQHfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<2) | 2, Wn, Vd)) +#define VMOVQSfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<3) | 4, Wn, Vd)) +#define VMOVQDfrom(Vd, index, Xn) EMIT(VMOV_from(((index)<<4) | 8, Xn, Vd)) + +#define UMOV_gen(Q, imm5, Rn, Rd) ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 0b01<<13 | 1<<12 | 1<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VMOVQDto(Xd, Vn, index) EMIT(UMOV_gen(1, ((index)<<4) | 8, Vn, Xd)) +#define VMOVBto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<1) | 1, Vn, Wd)) +#define VMOVHto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<2) | 2, Vn, Wd)) +#define VMOVSto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<3) | 4, Vn, Wd)) + +#define MVN_vector(Q, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | 0b10000<<17 | 0b00101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VMVNQ(Rd, Rn) EMIT(MVN_vector(1, Rn, Rd)) + +// VORR +#define ORR_vector(Q, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | 0b10<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VORRQ(Vd, Vn, Vm) EMIT(ORR_vector(1, Vm, Vn, Vd)) +#define VORR(Dd, Dn, Dm) EMIT(ORR_vector(0, Dm, Dn, Dd)) +#define VMOVQ(Vd, Vn) EMIT(ORR_vector(1, Vn, Vn, Vd)) +#define VMOV(Dd, Dn) EMIT(ORR_vector(0, Dn, Dn, Dd)) + +// VAND +#define AND_vector(Q, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | 0b00<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VANDQ(Vd, Vn, Vm) EMIT(AND_vector(1, Vm, Vn, Vd)) +#define VAND(Dd, Dn, Dm) EMIT(AND_vector(0, Dm, Dn, Dd)) + +// VBIC +#define BIC_vector(Q, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | 0b01<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VBICQ(Vd, Vn, Vm) EMIT(BIC_vector(1, Vm, Vn, Vd)) +#define VBIC(Dd, Dn, Dm) EMIT(BIC_vector(0, Dm, Dn, Dd)) + +// VORN +#define ORN_vector(Q, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | 0b11<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VORNQ(Vd, Vn, Vm) EMIT(ORN_vector(1, Vm, Vn, Vd)) +#define VORN(Dd, Dn, Dm) EMIT(ORN_vector(0, Dm, Dn, Dd)) + +// ADD / SUB +#define FADDSUB_vector(Q, U, op, sz, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (op)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11010<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VFADDQS(Vd, Vn, Vm) EMIT(FADDSUB_vector(1, 0, 0, 0, Vm, Vn, Vd)) +#define VFADDQD(Vd, Vn, Vm) EMIT(FADDSUB_vector(1, 0, 0, 1, Vm, Vn, Vd)) +#define VFADDS(Dd, Dn, Dm) EMIT(FADDSUB_vector(0, 0, 0, 0, Dm, Dn, Dd)) + +#define VFSUBQS(Vd, Vn, Vm) EMIT(FADDSUB_vector(1, 0, 1, 0, Vm, Vn, Vd)) +#define VFSUBQD(Vd, Vn, Vm) EMIT(FADDSUB_vector(1, 0, 1, 1, Vm, Vn, Vd)) +#define VFSUBS(Dd, Dn, Dm) EMIT(FADDSUB_vector(0, 0, 1, 0, Dm, Dn, Dd)) + +#define FADDSUB_scalar(type, Rm, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b001<<13 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define FADDS(Sd, Sn, Sm) EMIT(FADDSUB_scalar(0b00, Sm, 0, Sn, Sd)) +#define FADDD(Dd, Dn, Dm) EMIT(FADDSUB_scalar(0b01, Dm, 0, Dn, Dd)) + +#define FSUBS(Sd, Sn, Sm) EMIT(FADDSUB_scalar(0b00, Sm, 1, Sn, Sd)) +#define FSUBD(Dd, Dn, Dm) EMIT(FADDSUB_scalar(0b01, Dm, 1, Dn, Dd)) + +// ADD Pair +#define ADDP_vector(Q, size, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10111<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VADDPQ_8(Vd, Vn, Vm) EMIT(ADDP_vector(1, 0b00, Vm, Vn, Vd)) +#define VADDPQ_16(Vd, Vn, Vm) EMIT(ADDP_vector(1, 0b01, Vm, Vn, Vd)) +#define VADDPQ_32(Vd, Vn, Vm) EMIT(ADDP_vector(1, 0b10, Vm, Vn, Vd)) +#define VADDPQ_64(Vd, Vn, Vm) EMIT(ADDP_vector(1, 0b11, Vm, Vn, Vd)) +#define VADDP_8(Vd, Vn, Vm) EMIT(ADDP_vector(0, 0b00, Vm, Vn, Vd)) +#define VADDP_16(Vd, Vn, Vm) EMIT(ADDP_vector(0, 0b01, Vm, Vn, Vd)) +#define VADDP_32(Vd, Vn, Vm) EMIT(ADDP_vector(0, 0b10, Vm, Vn, Vd)) + +#define FADDP_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11010<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VFADDPQS(Vd, Vn, Vm) EMIT(FADDP_vector(1, 0, Vm, Vn, Vd)) +#define VFADDPQD(Vd, Vn, Vm) EMIT(FADDP_vector(1, 1, Vm, Vn, Vd)) + +// NEG / ABS +#define FNEGABS_scalar(type, opc, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (opc)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) +#define FNEGS(Sd, Sn) EMIT(FNEGABS_scalar(0b00, 0b10, Sn, Sd)) +#define FNEGD(Dd, Dn) EMIT(FNEGABS_scalar(0b01, 0b10, Dn, Dd)) + +#define FABSS(Sd, Sn) EMIT(FNEGABS_scalar(0b00, 0b01, Sn, Sd)) +#define FABSD(Dd, Dn) EMIT(FNEGABS_scalar(0b01, 0b01, Dn, Dd)) + + +// MUL +#define FMUL_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VFMULS(Sd, Sn, Sm) EMIT(FMUL_vector(0, 0, Sm, Sn, Sd)) +#define VFMULQS(Sd, Sn, Sm) EMIT(FMUL_vector(1, 0, Sm, Sn, Sd)) +#define VFMULQD(Sd, Sn, Sm) EMIT(FMUL_vector(1, 1, Sm, Sn, Sd)) + +#define FMUL_scalar(type, Rm, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b10<<10 | (Rn)<<5 | Rd) +#define FMULS(Sd, Sn, Sm) EMIT(FMUL_scalar(0b00, Sm, Sn, Sd)) +#define FMULD(Dd, Dn, Dm) EMIT(FMUL_scalar(0b01, Dm, Dn, Dd)) + +#define FMLA_vector(Q, op, sz, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (op)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11001<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VFMLAS(Sd, Sn, Sm) EMIT(FMLA_vector(0, 0, 0, Sm, Sn, Sd)) +#define VFMLAQS(Sd, Sn, Sm) EMIT(FMLA_vector(1, 0, 0, Sm, Sn, Sd)) +#define CFMLAQD(Dd, Dn, Dm) EMIT(FMLA_vector(1, 0, 1, Dm, Dn, Dd)) + +// DIV +#define FDIV_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11111<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VFDIVS(Sd, Sn, Sm) EMIT(FDIV_vector(0, 0, Sm, Sn, Sd)) +#define VFDIVQS(Sd, Sn, Sm) EMIT(FDIV_vector(1, 0, Sm, Sn, Sd)) +#define VFDIVQD(Sd, Sn, Sm) EMIT(FDIV_vector(1, 1, Sm, Sn, Sd)) + +#define FDIV_scalar(type, Rm, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b0001<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define FDIVS(Sd, Sn, Sm) EMIT(FDIV_scalar(0b00, Sm, Sn, Sd)) +#define FDIVD(Dd, Dn, Dm) EMIT(FDIV_scalar(0b01, Dm, Dn, Dd)) + +#define FRECPE_vector(Q, sz, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | 1<<23 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VFRECPES(Vd, Vn) EMIT(FRECPE_vector(0, 0, Vn, Vd)) +#define VFRECPEQS(Vd, Vn) EMIT(FRECPE_vector(1, 0, Vn, Vd)) +#define VFRECPEQD(Vd, Vn) EMIT(FRECPE_vector(1, 0, Vn, Vd)) + +#define FRECPS_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | 0<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11111<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VFRECPSS(Vd, Vn, Vm) EMIT(FRECPS_vector(0, 0, Vm, Vn, Vd)) +#define VFRECPSQS(Vd, Vn, Vm) EMIT(FRECPS_vector(1, 0, Vm, Vn, Vd)) +#define VFRECPSQD(Vd, Vn, Vm) EMIT(FRECPS_vector(1, 0, Vm, Vn, Vd)) + +// SQRT +#define FSQRT_vector(Q, sz, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | 1<<23 | (sz)<<22 | 0b10000<<17 | 0b11111<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VFSQRTS(Sd, Sn) EMIT(FSQRT_vector(0, 0, Sn, Sd)) +#define VFSQRTQS(Sd, Sn) EMIT(FSQRT_vector(1, 0, Sn, Sd)) +#define VFSQRTQD(Sd, Sn) EMIT(FSQRT_vector(1, 1, Sn, Sd)) + +#define FSQRT_scalar(type, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b11<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) +#define FSQRTS(Sd, Sn) EMIT(FSQRT_scalar(0b00, Sn, Sd)) +#define FSQRTD(Dd, Dn) EMIT(FSQRT_scalar(0b01, Dn, Dd)) + +#define FRSQRTE_vector(Q, sz, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | 1<<23 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VFRSQRTES(Vd, Vn) EMIT(FRSQRTE_vector(0, 0, Vn, Vd)) +#define VFRSQRTEQS(Vd, Vn) EMIT(FRSQRTE_vector(1, 0, Vn, Vd)) +#define VFRSQRTEQD(Vd, Vn) EMIT(FRSQRTE_vector(1, 0, Vn, Vd)) + +#define FRSQRTS_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | 1<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11111<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VFRSQRTSS(Vd, Vn, Vm) EMIT(FRSQRTS_vector(0, 0, Vm, Vn, Vd)) +#define VFRSQRTSQS(Vd, Vn, Vm) EMIT(FRSQRTS_vector(1, 0, Vm, Vn, Vd)) +#define VFRSQRTSQD(Vd, Vn, Vm) EMIT(FRSQRTS_vector(1, 0, Vm, Vn, Vd)) + +// CMP +#define FCMP_scalar(type, Rn, Rm, opc) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b1000<<10 | (Rn)<<5 | (opc)<<3) +#define FCMPS(Sn, Sm) EMIT(FCMP_scalar(0b00, Sn, Sm, 0b00)) +#define FCMPD(Dn, Dm) EMIT(FCMP_scalar(0b01, Dn, Dm, 0b00)) +#define FCMPS_0(Sn) EMIT(FCMP_scalar(0b00, 0, Sn, 0b01)) +#define FCMPD_0(Dn) EMIT(FCMP_scalar(0b01, 0, Dn, 0b01)) + +// CVT +#define FCVT_scalar(sf, type, rmode, opcode, Rn, Rd) ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd)) +// Floating-point Convert to Signed integer, rounding to nearest with ties to Away +#define FCVTASwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b100, Sn, Wd)) +#define FCVTASxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b100, Sn, Xd)) +#define FCVTASwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b100, Dn, Wd)) +#define FCVTASxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b100, Dn, Xd)) +// Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away +#define FCVTAUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b101, Sn, Wd)) +#define FCVTAUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b101, Sn, Xd)) +#define FCVTAUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b101, Dn, Wd)) +#define FCVTAUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b101, Dn, Xd)) +// Floating-point Convert to Signed integer, rounding toward Minus infinity +#define FCVTMSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b10, 0b100, Sn, Wd)) +#define FCVTMSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b10, 0b100, Sn, Xd)) +#define FCVTMSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b100, Sn, Xd)) +#define FCVTMSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b10, 0b100, Dn, Wd)) +#define FCVTMSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b10, 0b100, Dn, Xd)) +#define FCVTMSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b10, 0b100, Dn, Xd)) +// Floating-point Convert to Unsigned integer, rounding toward Minus infinity +#define FCVTMUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b10, 0b101, Sn, Wd)) +#define FCVTMUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b10, 0b101, Sn, Xd)) +#define FCVTMUxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b101, Sn, Xd)) +#define FCVTMUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b10, 0b101, Dn, Wd)) +#define FCVTMUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b10, 0b101, Dn, Xd)) +#define FCVTMUxwD(Xd, Dn) EMIT(FCVT_scalar(rfex.w, 0b01, 0b10, 0b101, Dn, Xd)) +// Floating-point Convert to Signed integer, rounding to nearest with ties to even +#define FCVTNSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b000, Sn, Wd)) +#define FCVTNSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b000, Sn, Xd)) +#define FCVTNSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b000, Sn, Xd)) +#define FCVTNSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b000, Dn, Wd)) +#define FCVTNSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b000, Dn, Xd)) +#define FCVTNSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b000, Dn, Xd)) +// Floating-point Convert to Unsigned integer, rounding to nearest with ties to even +#define FCVTNUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b001, Sn, Wd)) +#define FCVTNUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b001, Sn, Xd)) +#define FCVTNUxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b001, Sn, Xd)) +#define FCVTNUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b001, Dn, Wd)) +#define FCVTNUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b001, Dn, Xd)) +#define FCVTNUxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b001, Dn, Xd)) +// Floating-point Convert to Signed integer, rounding toward Plus infinity +#define FCVTPSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b01, 0b000, Sn, Wd)) +#define FCVTPSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b01, 0b000, Sn, Xd)) +#define FCVTPSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b01, 0b000, Sn, Xd)) +#define FCVTPSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b01, 0b000, Dn, Wd)) +#define FCVTPSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b01, 0b000, Dn, Xd)) +#define FCVTPSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b01, 0b000, Dn, Xd)) +// Floating-point Convert to Unsigned integer, rounding toward Plus infinity +#define FCVTPUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b01, 0b001, Sn, Wd)) +#define FCVTPUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b01, 0b001, Sn, Xd)) +#define FCVTPUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b01, 0b001, Dn, Wd)) +#define FCVTPUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b01, 0b001, Dn, Xd)) +// Floating-point Convert to Signed integer, rounding toward Zero +#define FCVTZSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b11, 0b000, Sn, Wd)) +#define FCVTZSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b11, 0b000, Sn, Xd)) +#define FCVTZSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b11, 0b000, Sn, Xd)) +#define FCVTZSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b11, 0b000, Dn, Wd)) +#define FCVTZSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b11, 0b000, Dn, Xd)) +#define FCVTZSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b11, 0b000, Dn, Xd)) +// Floating-point Convert to Unsigned integer, rounding toward Zero +#define FCVTZUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b11, 0b001, Sn, Wd)) +#define FCVTZUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b11, 0b001, Sn, Xd)) +#define FCVTZUxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b11, 0b001, Sn, Xd)) +#define FCVTZUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b11, 0b001, Dn, Wd)) +#define FCVTZUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b11, 0b001, Dn, Xd)) +#define FCVTZUxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b11, 0b001, Dn, Xd)) + +#define FCVT_vector_scalar(U, o2, sz, o1, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1110<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Floating-point Convert to (Un)signed integer, rounding to nearest with ties to Away +#define VFCVTASs(Vd, Vn) EMIT(FCVT_vector_scalar(0, 0, 0, 0, Vn, Vd)) +#define VFCVTASd(Vd, Vn) EMIT(FCVT_vector_scalar(0, 0, 1, 0, Vn, Vd)) +#define VFCVTAUs(Vd, Vn) EMIT(FCVT_vector_scalar(1, 0, 0, 0, Vn, Vd)) +#define VFCVTAUd(Vd, Vn) EMIT(FCVT_vector_scalar(1, 0, 1, 0, Vn, Vd)) +// Floating-point Convert to (Un)signed integer, rounding toward Minus infinity +#define VFCVTMSs(Vd, Vn) EMIT(FCVT_vector_scalar(0, 0, 0, 1, Vn, Vd)) +#define VFCVTMSd(Vd, Vn) EMIT(FCVT_vector_scalar(0, 0, 1, 1, Vn, Vd)) +#define VFCVTMUs(Vd, Vn) EMIT(FCVT_vector_scalar(1, 0, 0, 1, Vn, Vd)) +#define VFCVTMUd(Vd, Vn) EMIT(FCVT_vector_scalar(1, 0, 1, 1, Vn, Vd)) + +#define FCVT2_vector_scalar(U, o2, sz, o1, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1101<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Floating-point Convert to (Un)signed integer, rounding to nearest with ties to even +#define VFCVTNSs(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 0, 0, 0, Vn, Vd)) +#define VFCVTNSd(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 0, 1, 0, Vn, Vd)) +#define VFCVTNUs(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 0, 0, 0, Vn, Vd)) +#define VFCVTNUd(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 0, 1, 0, Vn, Vd)) +// Floating-point Convert to (Un)signed integer, rounding toward Plus infinity +#define VFCVTPSs(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 1, 0, 0, Vn, Vd)) +#define VFCVTPSd(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 1, 1, 0, Vn, Vd)) +#define VFCVTPUs(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 0, 0, Vn, Vd)) +#define VFCVTPUd(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 1, 0, Vn, Vd)) +// Floating-point Convert to (Un)signed integer, rounding toward Zero +#define VFCVTZSs(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 1, 0, 1, Vn, Vd)) +#define VFCVTZSd(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 1, 1, 1, Vn, Vd)) +#define VFCVTZUs(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 0, 1, Vn, Vd)) +#define VFCVTZUd(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 1, 1, Vn, Vd)) + +#define FCVT_vector(Q, U, o2, sz, o1, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1110<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Floating-point Convert to (Un)signed integer, rounding to nearest with ties to Away +#define VFCVTASS(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 0, 0, Vn, Vd)) +#define VFCVTASD(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 1, 0, Vn, Vd)) +#define VFCVTASQS(Vd, Vn) EMIT(FCVT_vector(1, 0, 0, 0, 0, Vn, Vd)) +#define VFCVTASQD(Vd, Vn) EMIT(FCVT_vector(1, 0, 0, 1, 0, Vn, Vd)) +#define VFCVTAUS(Vd, Vn) EMIT(FCVT_vector(0, 1, 0, 0, 0, Vn, Vd)) +#define VFCVTAUD(Vd, Vn) EMIT(FCVT_vector(0, 1, 0, 1, 0, Vn, Vd)) +#define VFCVTAUQS(Vd, Vn) EMIT(FCVT_vector(1, 1, 0, 0, 0, Vn, Vd)) +#define VFCVTAUQD(Vd, Vn) EMIT(FCVT_vector(1, 1, 0, 1, 0, Vn, Vd)) +// Floating-point Convert to (Un)signed integer, rounding toward Minus infinity +#define VFCVTMSS(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 0, 1, Vn, Vd)) +#define VFCVTMSD(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 1, 1, Vn, Vd)) +#define VFCVTMSQS(Vd, Vn) EMIT(FCVT_vector(1, 0, 0, 0, 1, Vn, Vd)) +#define VFCVTMSQD(Vd, Vn) EMIT(FCVT_vector(1, 0, 0, 1, 1, Vn, Vd)) +#define VFCVTMUS(Vd, Vn) EMIT(FCVT_vector(0, 1, 0, 0, 1, Vn, Vd)) +#define VFCVTMUD(Vd, Vn) EMIT(FCVT_vector(0, 1, 0, 1, 1, Vn, Vd)) +#define VFCVTMUQS(Vd, Vn) EMIT(FCVT_vector(1, 1, 0, 0, 1, Vn, Vd)) +#define VFCVTMUQD(Vd, Vn) EMIT(FCVT_vector(1, 1, 0, 1, 1, Vn, Vd)) + +#define FCVT2_vector(Q, U, o2, sz, o1, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1101<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Floating-point Convert to (Un)signed integer, rounding to nearest with ties to even +#define VFCVTNSS(Vd, Vn) EMIT(FCVT2_vector(0, 0, 0, 0, 0, Vn, Vd)) +#define VFCVTNSD(Vd, Vn) EMIT(FCVT2_vector(0, 0, 0, 1, 0, Vn, Vd)) +#define VFCVTNSQS(Vd, Vn) EMIT(FCVT2_vector(1, 0, 0, 0, 0, Vn, Vd)) +#define VFCVTNSQD(Vd, Vn) EMIT(FCVT2_vector(1, 0, 0, 1, 0, Vn, Vd)) +#define VFCVTNUS(Vd, Vn) EMIT(FCVT2_vector(0, 1, 0, 0, 0, Vn, Vd)) +#define VFCVTNUD(Vd, Vn) EMIT(FCVT2_vector(0, 1, 0, 1, 0, Vn, Vd)) +#define VFCVTNUQS(Vd, Vn) EMIT(FCVT2_vector(1, 1, 0, 0, 0, Vn, Vd)) +#define VFCVTNUQD(Vd, Vn) EMIT(FCVT2_vector(1, 1, 0, 1, 0, Vn, Vd)) +// Floating-point Convert to (Un)signed integer, rounding toward Plus infinity +#define VFCVTPSS(Vd, Vn) EMIT(FCVT2_vector(0, 0, 1, 0, 0, Vn, Vd)) +#define VFCVTPSD(Vd, Vn) EMIT(FCVT2_vector(0, 0, 1, 1, 0, Vn, Vd)) +#define VFCVTPSQS(Vd, Vn) EMIT(FCVT2_vector(1, 0, 1, 0, 0, Vn, Vd)) +#define VFCVTPSQD(Vd, Vn) EMIT(FCVT2_vector(1, 0, 1, 1, 0, Vn, Vd)) +#define VFCVTPUS(Vd, Vn) EMIT(FCVT2_vector(0, 1, 1, 0, 0, Vn, Vd)) +#define VFCVTPUD(Vd, Vn) EMIT(FCVT2_vector(0, 1, 1, 1, 0, Vn, Vd)) +#define VFCVTPUQS(Vd, Vn) EMIT(FCVT2_vector(1, 1, 1, 0, 0, Vn, Vd)) +#define VFCVTPUQD(Vd, Vn) EMIT(FCVT2_vector(1, 1, 1, 1, 0, Vn, Vd)) +// Floating-point Convert to (Un)signed integer, rounding toward Zero +#define VFCVTZSS(Vd, Vn) EMIT(FCVT2_vector(0, 0, 1, 0, 1, Vn, Vd)) +#define VFCVTZSD(Vd, Vn) EMIT(FCVT2_vector(0, 0, 1, 1, 1, Vn, Vd)) +#define VFCVTZSQS(Vd, Vn) EMIT(FCVT2_vector(1, 0, 1, 0, 1, Vn, Vd)) +#define VFCVTZSQD(Vd, Vn) EMIT(FCVT2_vector(1, 0, 1, 1, 1, Vn, Vd)) +#define VFCVTZUS(Vd, Vn) EMIT(FCVT2_vector(0, 1, 1, 0, 1, Vn, Vd)) +#define VFCVTZUD(Vd, Vn) EMIT(FCVT2_vector(0, 1, 1, 1, 1, Vn, Vd)) +#define VFCVTZUQS(Vd, Vn) EMIT(FCVT2_vector(1, 1, 1, 0, 1, Vn, Vd)) +#define VFCVTZUQD(Vd, Vn) EMIT(FCVT2_vector(1, 1, 1, 1, 1, Vn, Vd)) + +#define FCVT_precision(type, opc, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b0001<<17 | (opc)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) +#define FCVT_D_S(Dd, Sn) EMIT(FCVT_precision(0b00, 0b01, Sn, Dd)) +#define FCVT_S_D(Sd, Dn) EMIT(FCVT_precision(0b01, 0b00, Dn, Sd)) + +#define FCVTXN_vector(Q, sz, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10110<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Convert Vn from 2*Double to lower Vd as 2*float and clears the upper half +#define FCVTXN(Vd, Vn) EMIT(FCVTXN_vector(0, 1, Vn, Vd)) +// Convert Vn from 2*Double to higher Vd as 2*float +#define FCVTXN2(Vd, Vn) EMIT(FCVTXN_vector(1, 1, Vn, Vd)) + +#define FCVTL_vector(Q, sz, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10111<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Convert lower Vn from 2*float to Vd as 2*double +#define FCVTL(Vd, Vn) EMIT(FCVTL_vector(0, 1, Vn, Vd)) +// Convert higher Vn from 2*float to Vd as 2*double +#define FCVTL2(Vd, Vn) EMIT(FCVTL_vector(1, 1, Vn, Vd)) + +#define SCVTF_scalar(sf, type, rmode, opcode, Rn, Rd) ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd)) +#define SCVTFSw(Sd, Wn) EMIT(SCVTF_scalar(0, 0b00, 0b00, 0b010, Wn, Sd)) +#define SCVTFDw(Dd, Wn) EMIT(SCVTF_scalar(0, 0b01, 0b00, 0b010, Wn, Dd)) +#define SCVTFSx(Sd, Xn) EMIT(SCVTF_scalar(1, 0b00, 0b00, 0b010, Xn, Sd)) +#define SCVTFDx(Dd, Xn) EMIT(SCVTF_scalar(1, 0b01, 0b00, 0b010, Xn, Dd)) + +#define SCVTF_vector_scalar(U, sz, Rn, Rd) (1<<30 | (U)<<29 | 0b11110<<24 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define SCVTFSS(Vd, Vn) EMIT(SCVTF_vector_scalar(0, 0, Vn, Vd)) +#define SCVTFDD(Vd, Vn) EMIT(SCVTF_vector_scalar(0, 1, Vn, Vd)) + +#define SCVTF_vector(Q, U, sz, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define SCVTFS(Vd, Vn) EMIT(SCVTF_vector(0, 0, 0, Vn, Vd)) +#define SCVTFD(Vd, Vn) EMIT(SCVTF_vector(0, 0, 1, Vn, Vd)) +#define SCVTQFS(Vd, Vn) EMIT(SCVTF_vector(1, 0, 0, Vn, Vd)) +#define SCVTQFD(Vd, Vn) EMIT(SCVTF_vector(1, 0, 1, Vn, Vd)) + +// FRINTI Floating-point Round to Integral, using current rounding mode from FPCR (vector). +#define FRINT_vector(Q, U, o2, sz, o1, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1100<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VFRINTIS(Vd,Vn) EMIT(FRINT_vector(0, 1, 1, 0, 1, Vn, Vd)) +#define VFRINTISQ(Vd,Vn) EMIT(FRINT_vector(1, 1, 1, 0, 1, Vn, Vd)) +#define VFRINTIDQ(Vd,Vn) EMIT(FRINT_vector(1, 1, 1, 1, 1, Vn, Vd)) + +#define FRINTxx_scalar(type, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b0100<<17 | (op)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) +#define FRINT32ZS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b00, Sn, Sd)) +#define FRINT32ZD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b00, Dn, Dd)) +#define FRINT32XS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b01, Sn, Sd)) +#define FRINT32XD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b01, Dn, Dd)) +#define FRINT64ZS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b10, Sn, Sd)) +#define FRINT64ZD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b10, Dn, Dd)) +#define FRINT64XS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b11, Sn, Sd)) +#define FRINT64XD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b11, Dn, Dd)) + +#define FRINT_scalar(type, rmode, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b001<<18 | (rmode)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) +// round toward 0 (truncate) +#define FRINTZS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b011, Sn, Sd)) +// round toward 0 (truncate) +#define FRINTZD(Sd, Sn) EMIT(FRINT_scalar(0b01, 0b011, Sn, Sd)) +// round with current FPCR mode +#define FRINTXS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b110, Sn, Sd)) +// round with current FPCR mode +#define FRINTXD(Sd, Sn) EMIT(FRINT_scalar(0b01, 0b110, Sn, Sd)) +// round with mode, mode is 0 = TieEven, 1=+inf, 2=-inf, 3=zero +#define FRINTRRS(Sd, Sn, mode) EMIT(FRINT_scalar(0b00, ((mode)&3), Sn, Sd)) +// round with mode, mode is 0 = TieEven, 1=+inf, 2=-inf, 3=zero +#define FRINTRRD(Dd, Dn, mode) EMIT(FRINT_scalar(0b01, ((mode)&3), Dn, Dd)) + +// FMAX / FMIN +#define FMINMAX_vector(Q, U, o1, sz, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o1)<<23 | (sz)<<22 | 0b1<<21 | (Rm)<<16 | 0b11110<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VFMINS(Vd, Vn, Vm) EMIT(FMINMAX_vector(0, 0, 1, 0, Vm, Vn, Vd)) +#define VFMAXS(Vd, Vn, Vm) EMIT(FMINMAX_vector(0, 0, 0, 0, Vm, Vn, Vd)) +#define VFMINQS(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 1, 0, Vm, Vn, Vd)) +#define VFMAXQS(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 0, 0, Vm, Vn, Vd)) +#define VFMINQD(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 1, 1, Vm, Vn, Vd)) +#define VFMAXQD(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 0, 1, Vm, Vn, Vd)) + +#define FMINMAX_scalar(type, Rm, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b01<<14 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define FMINS(Sd, Sn, Sm) EMIT(FMINMAX_scalar(0b00, Sm, 0b01, Sn, Sd)) +#define FMIND(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b01, Dn, Dd)) +#define FMAXS(Sd, Sn, Sm) EMIT(FMINMAX_scalar(0b00, Sm, 0b00, Sn, Sd)) +#define FMAXD(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b00, Dn, Dd)) +// FMINNM NaN vs Number: number is picked +#define FMINNMS(Sd, Sn, Sm) EMIT(FMINMAX_scalar(0b00, Sm, 0b11, Sn, Sd)) +// FMINNM NaN vs Number: number is picked +#define FMINNMD(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b11, Dn, Dd)) +// FMAXNM NaN vs Number: number is picked +#define FMAXNMS(Sd, Sn, Sm) EMIT(FMINMAX_scalar(0b00, Sm, 0b10, Sn, Sd)) +// FMAXNM NaN vs Number: number is picked +#define FMAXNMD(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b10, Dn, Dd)) + +// ZIP / UZP +#define ZIP_gen(Q, size, Rm, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b11<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VZIP1Q_8(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b00, Rm, 0, Rn, Rt)) +#define VZIP2Q_8(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b00, Rm, 1, Rn, Rt)) +#define VZIP1_8(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b00, Rm, 0, Rn, Rt)) +#define VZIP2_8(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b00, Rm, 1, Rn, Rt)) +#define VZIP1Q_16(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b01, Rm, 0, Rn, Rt)) +#define VZIP2Q_16(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b01, Rm, 1, Rn, Rt)) +#define VZIP1_16(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b01, Rm, 0, Rn, Rt)) +#define VZIP2_16(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b01, Rm, 1, Rn, Rt)) +#define VZIP1Q_32(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b10, Rm, 0, Rn, Rt)) +#define VZIP2Q_32(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b10, Rm, 1, Rn, Rt)) +#define VZIP1_32(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b10, Rm, 0, Rn, Rt)) +#define VZIP2_32(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b10, Rm, 1, Rn, Rt)) +#define VZIP1Q_64(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b11, Rm, 0, Rn, Rt)) +#define VZIP2Q_64(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b11, Rm, 1, Rn, Rt)) + +#define UZP_gen(Q, size, Rm, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b01<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VUZP1Q_8(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b00, Rm, 0, Rn, Rt)) +#define VUZP2Q_8(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b00, Rm, 1, Rn, Rt)) +#define VUZP1_8(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b00, Rm, 0, Rn, Rt)) +#define VUZP2_8(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b00, Rm, 1, Rn, Rt)) +#define VUZP1Q_16(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b01, Rm, 0, Rn, Rt)) +#define VUZP2Q_16(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b01, Rm, 1, Rn, Rt)) +#define VUZP1_16(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b01, Rm, 0, Rn, Rt)) +#define VUZP2_16(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b01, Rm, 1, Rn, Rt)) +#define VUZP1Q_32(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b10, Rm, 0, Rn, Rt)) +#define VUZP2Q_32(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b10, Rm, 1, Rn, Rt)) +#define VUZP1_32(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b10, Rm, 0, Rn, Rt)) +#define VUZP2_32(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b10, Rm, 1, Rn, Rt)) +#define VUZP1Q_64(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b11, Rm, 0, Rn, Rt)) +#define VUZP2Q_64(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b11, Rm, 1, Rn, Rt)) + +#define DUP_gen(Q, imm5, Rn, Rd) ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 1<<10 | (Rn)<<5 | (Rd)) +#define VDUP_8(Vd, Vn, idx) EMIT(DUP_gen(0, ((idx)<<1|1), Vn, Vd)) +#define VDUPQ_8(Vd, Vn, idx) EMIT(DUP_gen(1, ((idx)<<1|1), Vn, Vd)) +#define VDUP_16(Vd, Vn, idx) EMIT(DUP_gen(0, ((idx)<<2|0b10), Vn, Vd)) +#define VDUPQ_16(Vd, Vn, idx) EMIT(DUP_gen(1, ((idx)<<2|0b10), Vn, Vd)) +#define VDUP_32(Vd, Vn, idx) EMIT(DUP_gen(0, ((idx)<<3|0b100), Vn, Vd)) +#define VDUPQ_32(Vd, Vn, idx) EMIT(DUP_gen(1, ((idx)<<3|0b100), Vn, Vd)) +#define VDUPQ_64(Vd, Vn, idx) EMIT(DUP_gen(1, ((idx)<<4|0b1000), Vn, Vd)) + +// TBL +#define TBL_gen(Q, Rm, len, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (Rm)<<16 | (len)<<13 | (op)<<12 | (Rn)<<5 | (Rd)) +//Use Rm[] to pick from Rn element and store in Rd. Out-of-range element gets 0 +#define VTBLQ1_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b00, 0, Rn, Rd)) +#define VTBL1_8(Rd, Rn, Rm) EMIT(TBL_gen(0, Rm, 0b00, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1 element and store in Rd. Out-of-range element gets 0 +#define VTBLQ2_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b01, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1, Rn+2 element and store in Rd. Out-of-range element gets 0 +#define VTBLQ3_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b10, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1, Rn+2, Rn+3 element and store in Rd. Out-of-range element gets 0 +#define VTBLQ4_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b11, 0, Rn, Rd)) +//Use Rm[] to pick from Rn element and store in Rd. Out-of-range element stay untouched +#define VTBXQ1_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b00, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1 element and store in Rd. Out-of-range element stay untouched +#define VTBXQ2_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b01, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1, Rn+2 element and store in Rd. Out-of-range element stay untouched +#define VTBXQ3_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b10, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1, Rn+2, Rn+3 element and store in Rd. Out-of-range element stay untouched +#define VTBXQ4_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b11, 0, Rn, Rd)) + +// TRN +#define TRN_gen(Q, size, Rm, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b10<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VTRNQ1_64(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b11, Vm, 0, Vn, Vd)) +#define VTRNQ1_32(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b10, Vm, 0, Vn, Vd)) +#define VTRNQ1_16(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b01, Vm, 0, Vn, Vd)) +#define VTRNQ1_8(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b00, Vm, 0, Vn, Vd)) +#define VSWP(Vd, Vn) VTRNQ1_64(Vd, Vn, Vn) +#define VTRNQ2_64(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b11, Vm, 1, Vn, Vd)) +#define VTRNQ2_32(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b10, Vm, 1, Vn, Vd)) +#define VTRNQ2_16(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b01, Vm, 1, Vn, Vd)) +#define VTRNQ2_8(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b00, Vm, 1, Vn, Vd)) + +// QXTN / QXTN2 +#define QXTN_scalar(U, size, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (size)<<22 | 0b10000<<17 | 0b10100<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Signed saturating extract Narrow, from D to S +#define SQXTN_S_D(Sd, Dn) EMIT(QXTN_scalar(0, 0b10, Dn, Sd)) +// Signed saturating extract Narrow, from S to H +#define SQXTN_H_S(Hd, Sn) EMIT(QXTN_scalar(0, 0b01, Sn, Hd)) +// Signed saturating extract Narrow, from H to B +#define SQXTN_B_H(Bd, Hn) EMIT(QXTN_scalar(0, 0b00, Hn, Bd)) + +#define QXTN_vector(Q, U, size, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b10100<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Signed saturating extract Narrow, takes Rn element and reduce 64->32 with Signed saturation and fit lower part of Rd +#define SQXTN_32(Rd, Rn) EMIT(QXTN_vector(0, 0, 0b10, Rn, Rd)) +// Signed saturating extract Narrow, takes Rn element and reduce 64->32 with Signed saturation and fit higher part of Rd +#define SQXTN2_32(Rd, Rn) EMIT(QXTN_vector(1, 0, 0b10, Rn, Rd)) +// Signed saturating extract Narrow, takes Rn element and reduce 32->16 with Signed saturation and fit lower part of Rd +#define SQXTN_16(Rd, Rn) EMIT(QXTN_vector(0, 0, 0b01, Rn, Rd)) +// Signed saturating extract Narrow, takes Rn element and reduce 32->16 with Signed saturation and fit higher part of Rd +#define SQXTN2_16(Rd, Rn) EMIT(QXTN_vector(1, 0, 0b01, Rn, Rd)) +// Signed saturating extract Narrow, takes Rn element and reduce 16->8 with Signed saturation and fit lower part of Rd +#define SQXTN_8(Rd, Rn) EMIT(QXTN_vector(0, 0, 0b00, Rn, Rd)) +// Signed saturating extract Narrow, takes Rn element and reduce 16->8 with Signed saturation and fit higher part of Rd +#define SQXTN2_8(Rd, Rn) EMIT(QXTN_vector(1, 0, 0b00, Rn, Rd)) +// Unsigned saturating Extract Narrow, takes Rn element and reduce 64->32 with Unsigned saturation and fit lower part of Rd +#define UQXTN_32(Rd, Rn) EMIT(QXTN_vector(0, 1, 0b10, Rn, Rd)) +// Unsigned saturating Extract Narrow, takes Rn element and reduce 64->32 with Unsigned saturation and fit higher part of Rd +#define UQXTN2_32(Rd, Rn) EMIT(QXTN_vector(1, 1, 0b10, Rn, Rd)) +// Unsigned saturating extract Narrow, takes Rn element and reduce 32->16 with Unsigned saturation and fit lower part of Rd +#define UQXTN_16(Rd, Rn) EMIT(QXTN_vector(0, 1, 0b01, Rn, Rd)) +// Unsigned saturating extract Narrow, takes Rn element and reduce 32->16 with Unsigned saturation and fit higher part of Rd +#define UQXTN2_16(Rd, Rn) EMIT(QXTN_vector(1, 1, 0b01, Rn, Rd)) +// Unsigned saturating extract Narrow, takes Rn element and reduce 16->8 with Unsigned saturation and fit lower part of Rd +#define UQXTN_8(Rd, Rn) EMIT(QXTN_vector(0, 1, 0b00, Rn, Rd)) +// Unsigned saturating extract Narrow, takes Rn element and reduce 16->8 with Unsigned saturation and fit higher part of Rd +#define UQXTN2_8(Rd, Rn) EMIT(QXTN_vector(1, 1, 0b00, Rn, Rd)) + +#define QXTUN_vector(Q, U, size, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b10010<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 64->32 with Unsigned saturation and fit lower part of Rd +#define SQXTUN_32(Rd, Rn) EMIT(QXTUN_vector(0, 1, 0b10, Rn, Rd)) +// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 64->32 with Unsigned saturation and fit higher part of Rd +#define SQXTUN2_32(Rd, Rn) EMIT(QXTUN_vector(1, 1, 0b10, Rn, Rd)) +// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 32->16 with Unsigned saturation and fit lower part of Rd +#define SQXTUN_16(Rd, Rn) EMIT(QXTUN_vector(0, 1, 0b01, Rn, Rd)) +// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 32->16 with Unsigned saturation and fit higher part of Rd +#define SQXTUN2_16(Rd, Rn) EMIT(QXTUN_vector(1, 1, 0b01, Rn, Rd)) +// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 16->8 with Unsigned saturation and fit lower part of Rd +#define SQXTUN_8(Rd, Rn) EMIT(QXTUN_vector(0, 1, 0b00, Rn, Rd)) +// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 16->8 with Unsigned saturation and fit higher part of Rd +#define SQXTUN2_8(Rd, Rn) EMIT(QXTUN_vector(1, 1, 0b00, Rn, Rd)) + +// Integer CMP +// EQual +#define CMEQ_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10001<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VCMEQ_8(Rd, Rn, Rm) EMIT(CMEQ_vector(0, 1, 0b00, Rm, Rn, Rd)) +#define VCMEQ_16(Rd, Rn, Rm) EMIT(CMEQ_vector(0, 1, 0b01, Rm, Rn, Rd)) +#define VCMEQ_32(Rd, Rn, Rm) EMIT(CMEQ_vector(0, 1, 0b10, Rm, Rn, Rd)) +#define VCMEQQ_8(Rd, Rn, Rm) EMIT(CMEQ_vector(1, 1, 0b00, Rm, Rn, Rd)) +#define VCMEQQ_16(Rd, Rn, Rm) EMIT(CMEQ_vector(1, 1, 0b01, Rm, Rn, Rd)) +#define VCMEQQ_32(Rd, Rn, Rm) EMIT(CMEQ_vector(1, 1, 0b10, Rm, Rn, Rd)) +#define VCMEQQ_64(Rd, Rn, Rm) EMIT(CMEQ_vector(1, 1, 0b11, Rm, Rn, Rd)) +// Greater test +#define CMG_vector(Q, U, size, eq, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b0011<<12 | (eq)<<11 | 1<<10 | (Rn)<<5 | (Rd)) +// Signed Greater or Equal +#define VCMGEQ_8(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b00, 1, Rm, Rn, Rd)) +#define VCMGEQ_16(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b01, 1, Rm, Rn, Rd)) +#define VCMGEQ_32(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b10, 1, Rm, Rn, Rd)) +#define VCMGEQ_64(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b11, 1, Rm, Rn, Rd)) +// Unsigned Higher or Same +#define VCMHSQ_8(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b00, 1, Rm, Rn, Rd)) +#define VCMHSQ_16(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b01, 1, Rm, Rn, Rd)) +#define VCMHSQ_32(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b10, 1, Rm, Rn, Rd)) +#define VCMHSQ_64(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b11, 1, Rm, Rn, Rd)) +// Signed Greater Than +#define VCMGTQ_8(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b00, 0, Rm, Rn, Rd)) +#define VCMGTQ_16(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b01, 0, Rm, Rn, Rd)) +#define VCMGTQ_32(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b10, 0, Rm, Rn, Rd)) +#define VCMGTQ_64(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b11, 0, Rm, Rn, Rd)) +#define VCMGT_8(Rd, Rn, Rm) EMIT(CMG_vector(0, 0, 0b00, 0, Rm, Rn, Rd)) +#define VCMGT_16(Rd, Rn, Rm) EMIT(CMG_vector(0, 0, 0b01, 0, Rm, Rn, Rd)) +#define VCMGT_32(Rd, Rn, Rm) EMIT(CMG_vector(0, 0, 0b10, 0, Rm, Rn, Rd)) +// Unsigned Higher +#define VCHIQQ_8(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b00, 0, Rm, Rn, Rd)) +#define VCHIQQ_16(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b01, 0, Rm, Rn, Rd)) +#define VCHIQQ_32(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b10, 0, Rm, Rn, Rd)) +#define VCHIQQ_64(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b11, 0, Rm, Rn, Rd)) + +// Less Than 0 +#define CMLT_0_vector(Q, size, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b01010<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define CMLT_0_8(Rd, Rn) EMIT(CMLT_0_vector(0, 0b00, Rn, Rd)) +#define CMLT_0_16(Rd, Rn) EMIT(CMLT_0_vector(0, 0b01, Rn, Rd)) +#define CMLT_0_32(Rd, Rn) EMIT(CMLT_0_vector(0, 0b10, Rn, Rd)) +#define CMLTQ_0_8(Rd, Rn) EMIT(CMLT_0_vector(1, 0b00, Rn, Rd)) +#define CMLTQ_0_16(Rd, Rn) EMIT(CMLT_0_vector(1, 0b01, Rn, Rd)) +#define CMLTQ_0_32(Rd, Rn) EMIT(CMLT_0_vector(1, 0b10, Rn, Rd)) +#define CMLTQ_0_64(Rd, Rn) EMIT(CMLT_0_vector(1, 0b11, Rn, Rd)) +// Equal 0 +#define CMEQ_0_vector(Q, size, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b0100<<13 | 1<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define CMEQ_0_8(Rd, Rn) EMIT(CMEQ_0_vector(0, 0b00, Rn, Rd)) +#define CMEQ_0_16(Rd, Rn) EMIT(CMEQ_0_vector(0, 0b01, Rn, Rd)) +#define CMEQ_0_32(Rd, Rn) EMIT(CMEQ_0_vector(0, 0b10, Rn, Rd)) +#define CMEQQ_0_8(Rd, Rn) EMIT(CMEQ_0_vector(1, 0b00, Rn, Rd)) +#define CMEQQ_0_16(Rd, Rn) EMIT(CMEQ_0_vector(1, 0b01, Rn, Rd)) +#define CMEQQ_0_32(Rd, Rn) EMIT(CMEQ_0_vector(1, 0b10, Rn, Rd)) +#define CMEQQ_0_64(Rd, Rn) EMIT(CMEQ_0_vector(1, 0b11, Rn, Rd)) + +// Vector Float CMP +// EQual +#define FCMP_vector(Q, U, E, sz, Rm, ac, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (E)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (ac)<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define FCMEQQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 0, 0, 1, Rm, 0, Rn, Rd)) +#define FCMEQQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 0, 0, 0, Rm, 0, Rn, Rd)) +// Greater or Equal +#define FCMGEQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 1, Rm, 0, Rn, Rd)) +#define FCMGEQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 0, Rm, 0, Rn, Rd)) +#define FCMGEQD_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 1, Rm, 1, Rn, Rd)) +#define FCMGEQS_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 0, Rm, 1, Rn, Rd)) +// Greater Than +#define FCMGTQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 1, Rm, 0, Rn, Rd)) +#define FCMGTQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 0, Rm, 0, Rn, Rd)) +#define FCMGTQD_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 1, Rm, 1, Rn, Rd)) +#define FCMGTQS_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 0, Rm, 1, Rn, Rd)) + +// UMULL / SMULL +#define MULL_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1100<<12 |(Rn)<<5 |(Rd)) +#define VUMULL_8(Rd, Rn, Rm) EMIT(MULL_vector(0, 1, 0b00, Rm, Rn, Rd)) +#define VUMULL_16(Rd, Rn, Rm) EMIT(MULL_vector(0, 1, 0b01, Rm, Rn, Rd)) +#define VUMULL_32(Rd, Rn, Rm) EMIT(MULL_vector(0, 1, 0b10, Rm, Rn, Rd)) +#define VUMULL2_8(Rd, Rn, Rm) EMIT(MULL_vector(1, 1, 0b00, Rm, Rn, Rd)) +#define VUMULL2_16(Rd, Rn, Rm) EMIT(MULL_vector(1, 1, 0b01, Rm, Rn, Rd)) +#define VUMULL2_32(Rd, Rn, Rm) EMIT(MULL_vector(1, 1, 0b10, Rm, Rn, Rd)) +#define VSMULL_8(Rd, Rn, Rm) EMIT(MULL_vector(0, 0, 0b00, Rm, Rn, Rd)) +#define VSMULL_16(Rd, Rn, Rm) EMIT(MULL_vector(0, 0, 0b01, Rm, Rn, Rd)) +#define VSMULL_32(Rd, Rn, Rm) EMIT(MULL_vector(0, 0, 0b10, Rm, Rn, Rd)) +#define VSMULL2_8(Rd, Rn, Rm) EMIT(MULL_vector(1, 0, 0b00, Rm, Rn, Rd)) +#define VSMULL2_16(Rd, Rn, Rm) EMIT(MULL_vector(1, 0, 0b01, Rm, Rn, Rd)) +#define VSMULL2_32(Rd, Rn, Rm) EMIT(MULL_vector(1, 0, 0b10, Rm, Rn, Rd)) + +// MUL +#define MUL_vector(Q, size, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VMUL_8(Vd, Vn, Vm) EMIT(MUL_vector(0, 0b00, Vm, Vn, Vd)) +#define VMUL_16(Vd, Vn, Vm) EMIT(MUL_vector(0, 0b01, Vm, Vn, Vd)) +#define VMUL_32(Vd, Vn, Vm) EMIT(MUL_vector(0, 0b10, Vm, Vn, Vd)) +#define VMULQ_8(Vd, Vn, Vm) EMIT(MUL_vector(1, 0b00, Vm, Vn, Vd)) +#define VMULQ_16(Vd, Vn, Vm) EMIT(MUL_vector(1, 0b01, Vm, Vn, Vd)) +#define VMULQ_32(Vd, Vn, Vm) EMIT(MUL_vector(1, 0b10, Vm, Vn, Vd)) + +// (S/Q)ADD +#define QADD_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b00001<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define SQADDQ_8(Vd, Vn, Vm) EMIT(QADD_vector(1, 0, 0b00, Vm, Vn, Vd)) +#define SQADDQ_16(Vd, Vn, Vm) EMIT(QADD_vector(1, 0, 0b01, Vm, Vn, Vd)) +#define SQADDQ_32(Vd, Vn, Vm) EMIT(QADD_vector(1, 0, 0b10, Vm, Vn, Vd)) +#define SQADDQ_64(Vd, Vn, Vm) EMIT(QADD_vector(1, 0, 0b11, Vm, Vn, Vd)) +#define UQADDQ_8(Vd, Vn, Vm) EMIT(QADD_vector(1, 1, 0b00, Vm, Vn, Vd)) +#define UQADDQ_16(Vd, Vn, Vm) EMIT(QADD_vector(1, 1, 0b01, Vm, Vn, Vd)) +#define UQADDQ_32(Vd, Vn, Vm) EMIT(QADD_vector(1, 1, 0b10, Vm, Vn, Vd)) +#define UQADDQ_64(Vd, Vn, Vm) EMIT(QADD_vector(1, 1, 0b11, Vm, Vn, Vd)) +#define SQADD_8(Vd, Vn, Vm) EMIT(QADD_vector(0, 0, 0b00, Vm, Vn, Vd)) +#define SQADD_16(Vd, Vn, Vm) EMIT(QADD_vector(0, 0, 0b01, Vm, Vn, Vd)) +#define SQADD_32(Vd, Vn, Vm) EMIT(QADD_vector(0, 0, 0b10, Vm, Vn, Vd)) +#define SQADD_64(Vd, Vn, Vm) EMIT(QADD_vector(0, 0, 0b11, Vm, Vn, Vd)) +#define UQADD_8(Vd, Vn, Vm) EMIT(QADD_vector(0, 1, 0b00, Vm, Vn, Vd)) +#define UQADD_16(Vd, Vn, Vm) EMIT(QADD_vector(0, 1, 0b01, Vm, Vn, Vd)) +#define UQADD_32(Vd, Vn, Vm) EMIT(QADD_vector(0, 1, 0b10, Vm, Vn, Vd)) +#define UQADD_64(Vd, Vn, Vm) EMIT(QADD_vector(0, 1, 0b11, Vm, Vn, Vd)) + +// Absolute Difference +#define AD_vector(Q, U, size, Rm, ac, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b0111<<12 | (ac)<<11 | 1<<10 | (Rn)<<5 | (Rd)) +// Signed Absolute Difference and accumulate +#define SABAQ_8(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b00, Rm, 1, Rn, Rd)) +#define SABAQ_16(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b01, Rm, 1, Rn, Rd)) +#define SABAQ_32(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b10, Rm, 1, Rn, Rd)) +#define SABA_8(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b00, Rm, 1, Rn, Rd)) +#define SABA_16(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b01, Rm, 1, Rn, Rd)) +#define SABA_32(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b10, Rm, 1, Rn, Rd)) +// Signed Absolute Difference +#define SABDQ_8(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b00, Rm, 0, Rn, Rd)) +#define SABDQ_16(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b01, Rm, 0, Rn, Rd)) +#define SABDQ_32(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b10, Rm, 0, Rn, Rd)) +#define SABD_8(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b00, Rm, 0, Rn, Rd)) +#define SABD_16(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b01, Rm, 0, Rn, Rd)) +#define SABD_32(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b10, Rm, 0, Rn, Rd)) + +#define ADL_vector(Q, U, size, Rm, op, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b01<<14 | (op)<<13 | 1<<12 | (Rn)<<5 | (Rd)) +#define SABAL_8(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b00, Rm, 0, Rn, Rd)) +#define SABAL2_8(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b00, Rm, 0, Rn, Rd)) +#define SABAL_16(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b01, Rm, 0, Rn, Rd)) +#define SABAL2_16(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b01, Rm, 0, Rn, Rd)) +#define SABAL_32(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b10, Rm, 0, Rn, Rd)) +#define SABAL2_32(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b10, Rm, 0, Rn, Rd)) +#define UABAL_8(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b00, Rm, 0, Rn, Rd)) +#define UABAL2_8(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b00, Rm, 0, Rn, Rd)) +#define UABAL_16(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b01, Rm, 0, Rn, Rd)) +#define UABAL2_16(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b01, Rm, 0, Rn, Rd)) +#define UABAL_32(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b10, Rm, 0, Rn, Rd)) +#define UABAL2_32(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b10, Rm, 0, Rn, Rd)) +#define SABDL_8(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b00, Rm, 1, Rn, Rd)) +#define SABDL2_8(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b00, Rm, 1, Rn, Rd)) +#define SABDL_16(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b01, Rm, 1, Rn, Rd)) +#define SABDL2_16(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b01, Rm, 1, Rn, Rd)) +#define SABDL_32(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b10, Rm, 1, Rn, Rd)) +#define SABDL2_32(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b10, Rm, 1, Rn, Rd)) +#define UABDL_8(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b00, Rm, 1, Rn, Rd)) +#define UABDL2_8(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b00, Rm, 1, Rn, Rd)) +#define UABDL_16(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b01, Rm, 1, Rn, Rd)) +#define UABDL2_16(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b01, Rm, 1, Rn, Rd)) +#define UABDL_32(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b10, Rm, 1, Rn, Rd)) +#define UABDL2_32(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b10, Rm, 1, Rn, Rd)) + +// Add Pairwise +#define ADDLP_vector(Q, U, size, op, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (op)<<14 | 0b10<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define SADDLPQ_8(Rd, Rn) EMIT(ADDLP_vector(1, 0, 0b00, 0, Rn, Rd)) +#define SADDLPQ_16(Rd, Rn) EMIT(ADDLP_vector(1, 0, 0b01, 0, Rn, Rd)) +#define SADDLPQ_32(Rd, Rn) EMIT(ADDLP_vector(1, 0, 0b10, 0, Rn, Rd)) +#define SADDLP_8(Rd, Rn) EMIT(ADDLP_vector(0, 0, 0b00, 0, Rn, Rd)) +#define SADDLP_16(Rd, Rn) EMIT(ADDLP_vector(0, 0, 0b01, 0, Rn, Rd)) +#define SADDLP_32(Rd, Rn) EMIT(ADDLP_vector(0, 0, 0b10, 0, Rn, Rd)) +#define UADDLPQ_8(Rd, Rn) EMIT(ADDLP_vector(1, 1, 0b00, 0, Rn, Rd)) +#define UADDLPQ_16(Rd, Rn) EMIT(ADDLP_vector(1, 1, 0b01, 0, Rn, Rd)) +#define UADDLPQ_32(Rd, Rn) EMIT(ADDLP_vector(1, 1, 0b10, 0, Rn, Rd)) +#define UADDLP_8(Rd, Rn) EMIT(ADDLP_vector(0, 1, 0b00, 0, Rn, Rd)) +#define UADDLP_16(Rd, Rn) EMIT(ADDLP_vector(0, 1, 0b01, 0, Rn, Rd)) +#define UADDLP_32(Rd, Rn) EMIT(ADDLP_vector(0, 1, 0b10, 0, Rn, Rd)) + +// Add accros vector +#define ADDLV_vector(Q, U, size, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b11000<<17 | 0b00011<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define SADDLVQ_8(Rd, Rn) EMIT(ADDLV_vector(1, 0, 0b00, Rn, Rd)) +#define SADDLVQ_16(Rd, Rn) EMIT(ADDLV_vector(1, 0, 0b01, Rn, Rd)) +#define SADDLVQ_32(Rd, Rn) EMIT(ADDLV_vector(1, 0, 0b10, Rn, Rd)) +#define SADDLV_8(Rd, Rn) EMIT(ADDLV_vector(0, 0, 0b00, Rn, Rd)) +#define SADDLV_16(Rd, Rn) EMIT(ADDLV_vector(0, 0, 0b01, Rn, Rd)) +#define SADDLV_32(Rd, Rn) EMIT(ADDLV_vector(0, 0, 0b10, Rn, Rd)) +#define UADDLVQ_8(Rd, Rn) EMIT(ADDLV_vector(1, 1, 0b00, Rn, Rd)) +#define UADDLVQ_16(Rd, Rn) EMIT(ADDLV_vector(1, 1, 0b01, Rn, Rd)) +#define UADDLVQ_32(Rd, Rn) EMIT(ADDLV_vector(1, 1, 0b10, Rn, Rd)) +#define UADDLV_8(Rd, Rn) EMIT(ADDLV_vector(0, 1, 0b00, Rn, Rd)) +#define UADDLV_16(Rd, Rn) EMIT(ADDLV_vector(0, 1, 0b01, Rn, Rd)) +#define UADDLV_32(Rd, Rn) EMIT(ADDLV_vector(0, 1, 0b10, Rn, Rd)) + +// MOV Immediate +#define MOVI_vector(Q, op, abc, cmode, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd)) +#define MOVIQ_8(Rd, imm8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) +#define MOVI_8(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) + +// SHLL and eXtend Long +#define SHLL_vector(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b10100<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define USHLL2_8(Vd, Vn, imm) EMIT(SHLL_vector(1, 1, 0b0001, (imm)&0x7, Vn, Vd)) +#define USHLL_8(Vd, Vn, imm) EMIT(SHLL_vector(0, 1, 0b0001, (imm)&0x7, Vn, Vd)) +#define SSHLL2_8(Vd, Vn, imm) EMIT(SHLL_vector(1, 0, 0b0001, (imm)&0x7, Vn, Vd)) +#define SSHLL_8(Vd, Vn, imm) EMIT(SHLL_vector(0, 0, 0b0001, (imm)&0x7, Vn, Vd)) +#define USHLL2_16(Vd, Vn, imm) EMIT(SHLL_vector(1, 1, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) +#define USHLL_16(Vd, Vn, imm) EMIT(SHLL_vector(0, 1, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) +#define SSHLL2_16(Vd, Vn, imm) EMIT(SHLL_vector(1, 0, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) +#define SSHLL_16(Vd, Vn, imm) EMIT(SHLL_vector(0, 0, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) +#define USHLL2_32(Vd, Vn, imm) EMIT(SHLL_vector(1, 1, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) +#define USHLL_32(Vd, Vn, imm) EMIT(SHLL_vector(0, 1, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) +#define SSHLL2_32(Vd, Vn, imm) EMIT(SHLL_vector(1, 0, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) +#define SSHLL_32(Vd, Vn, imm) EMIT(SHLL_vector(0, 0, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) + +#define UXTL_8(Vd, Vn) USHLL_8(Vd, Vn, 0) +#define UXTL2_8(Vd, Vn) USHLL2_8(Vd, Vn, 0) +#define UXTL_16(Vd, Vn) USHLL_16(Vd, Vn, 0) +#define UXTL2_16(Vd, Vn) USHLL2_16(Vd, Vn, 0) +#define UXTL_32(Vd, Vn) USHLL_32(Vd, Vn, 0) +#define UXTL2_32(Vd, Vn) USHLL2_32(Vd, Vn, 0) + +#define SXTL_8(Vd, Vn) SSHLL_8(Vd, Vn, 0) +#define SXTL2_8(Vd, Vn) SSHLL2_8(Vd, Vn, 0) +#define SXTL_16(Vd, Vn) SSHLL_16(Vd, Vn, 0) +#define SXTL2_16(Vd, Vn) SSHLL2_16(Vd, Vn, 0) +#define SXTL_32(Vd, Vn) SSHLL_32(Vd, Vn, 0) +#define SXTL2_32(Vd, Vn) SSHLL2_32(Vd, Vn, 0) + +// SHRN +#define QSHRN_vector(Q, U, immh, immb, op, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b1001<<12 | (op)<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define UQSHRN_8(Vd, Vn, imm) EMIT(QSHRN_vector(0, 1, 0b0001, (8-(imm))&0x7, 0, Vn, Vd)) +#define UQSHRN2_8(Vd, Vn, imm) EMIT(QSHRN_vector(1, 1, 0b0001, (8-(imm))&0x7, 0, Vn, Vd)) +#define SQSHRN_8(Vd, Vn, imm) EMIT(QSHRN_vector(0, 0, 0b0001, (8-(imm))&0x7, 0, Vn, Vd)) +#define SQSHRN2_8(Vd, Vn, imm) EMIT(QSHRN_vector(1, 0, 0b0001, (8-(imm))&0x7, 0, Vn, Vd)) +#define UQSHRN_16(Vd, Vn, imm) EMIT(QSHRN_vector(0, 1, 0b0010|(((16-(imm))>>3)&1), (16-(imm))&0x7, 0, Vn, Vd)) +#define UQSHRN2_16(Vd, Vn, imm) EMIT(QSHRN_vector(1, 1, 0b0010|(((16-(imm))>>3)&1), (16-(imm))&0x7, 0, Vn, Vd)) +#define SQSHRN_16(Vd, Vn, imm) EMIT(QSHRN_vector(0, 0, 0b0010|(((16-(imm))>>3)&1), (16-(imm))&0x7, 0, Vn, Vd)) +#define SQSHRN2_16(Vd, Vn, imm) EMIT(QSHRN_vector(1, 0, 0b0010|(((16-(imm))>>3)&1), (16-(imm))&0x7, 0, Vn, Vd)) +#define UQSHRN_32(Vd, Vn, imm) EMIT(QSHRN_vector(0, 1, 0b0100|(((32-(imm))>>3)&3), (32-(imm))&0x7, 0, Vn, Vd)) +#define UQSHRN2_32(Vd, Vn, imm) EMIT(QSHRN_vector(1, 1, 0b0100|(((32-(imm))>>3)&3), (32-(imm))&0x7, 0, Vn, Vd)) +#define SQSHRN_32(Vd, Vn, imm) EMIT(QSHRN_vector(0, 0, 0b0100|(((32-(imm))>>3)&3), (32-(imm))&0x7, 0, Vn, Vd)) +#define SQSHRN2_32(Vd, Vn, imm) EMIT(QSHRN_vector(1, 0, 0b0100|(((32-(imm))>>3)&3), (32-(imm))&0x7, 0, Vn, Vd)) + +// UQSUB +#define QSUB_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b00101<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define UQSUB_8(Vd, Vn, Vm) EMIT(QSUB_vector(0, 1, 0b00, Vm, Vn, Vd)) +#define UQSUB_16(Vd, Vn, Vm) EMIT(QSUB_vector(0, 1, 0b01, Vm, Vn, Vd)) +#define UQSUB_32(Vd, Vn, Vm) EMIT(QSUB_vector(0, 1, 0b10, Vm, Vn, Vd)) +#define UQSUB_64(Vd, Vn, Vm) EMIT(QSUB_vector(0, 1, 0b11, Vm, Vn, Vd)) +#define SQSUB_8(Vd, Vn, Vm) EMIT(QSUB_vector(0, 0, 0b00, Vm, Vn, Vd)) +#define SQSUB_16(Vd, Vn, Vm) EMIT(QSUB_vector(0, 0, 0b01, Vm, Vn, Vd)) +#define SQSUB_32(Vd, Vn, Vm) EMIT(QSUB_vector(0, 0, 0b10, Vm, Vn, Vd)) +#define SQSUB_64(Vd, Vn, Vm) EMIT(QSUB_vector(0, 0, 0b11, Vm, Vn, Vd)) +#define UQSUBQ_8(Vd, Vn, Vm) EMIT(QSUB_vector(1, 1, 0b00, Vm, Vn, Vd)) +#define UQSUBQ_16(Vd, Vn, Vm) EMIT(QSUB_vector(1, 1, 0b01, Vm, Vn, Vd)) +#define UQSUBQ_32(Vd, Vn, Vm) EMIT(QSUB_vector(1, 1, 0b10, Vm, Vn, Vd)) +#define UQSUBQ_64(Vd, Vn, Vm) EMIT(QSUB_vector(1, 1, 0b11, Vm, Vn, Vd)) +#define SQSUBQ_8(Vd, Vn, Vm) EMIT(QSUB_vector(1, 0, 0b00, Vm, Vn, Vd)) +#define SQSUBQ_16(Vd, Vn, Vm) EMIT(QSUB_vector(1, 0, 0b01, Vm, Vn, Vd)) +#define SQSUBQ_32(Vd, Vn, Vm) EMIT(QSUB_vector(1, 0, 0b10, Vm, Vn, Vd)) +#define SQSUBQ_64(Vd, Vn, Vm) EMIT(QSUB_vector(1, 0, 0b11, Vm, Vn, Vd)) + +// MAX/MIN vector +#define MINMAX_vector(Q, U, size, Rm, op, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b0110<<12 | (op)<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define SMAX_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b00, Vm, 0, Vn, Vd)) +#define SMAX_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b01, Vm, 0, Vn, Vd)) +#define SMAX_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b10, Vm, 0, Vn, Vd)) +#define SMAX_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b11, Vm, 0, Vn, Vd)) +#define UMAX_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b00, Vm, 0, Vn, Vd)) +#define UMAX_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b01, Vm, 0, Vn, Vd)) +#define UMAX_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b10, Vm, 0, Vn, Vd)) +#define UMAX_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b11, Vm, 0, Vn, Vd)) +#define SMIN_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b00, Vm, 1, Vn, Vd)) +#define SMIN_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b01, Vm, 1, Vn, Vd)) +#define SMIN_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b10, Vm, 1, Vn, Vd)) +#define SMIN_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b11, Vm, 1, Vn, Vd)) +#define UMIN_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b00, Vm, 1, Vn, Vd)) +#define UMIN_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b01, Vm, 1, Vn, Vd)) +#define UMIN_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b10, Vm, 1, Vn, Vd)) +#define UMIN_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b11, Vm, 1, Vn, Vd)) +#define SMAXQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b00, Vm, 0, Vn, Vd)) +#define SMAXQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b01, Vm, 0, Vn, Vd)) +#define SMAXQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b10, Vm, 0, Vn, Vd)) +#define SMAXQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b11, Vm, 0, Vn, Vd)) +#define UMAXQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b00, Vm, 0, Vn, Vd)) +#define UMAXQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b01, Vm, 0, Vn, Vd)) +#define UMAXQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b10, Vm, 0, Vn, Vd)) +#define UMAXQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b11, Vm, 0, Vn, Vd)) +#define SMINQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b00, Vm, 1, Vn, Vd)) +#define SMINQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b01, Vm, 1, Vn, Vd)) +#define SMINQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b10, Vm, 1, Vn, Vd)) +#define SMINQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b11, Vm, 1, Vn, Vd)) +#define UMINQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b00, Vm, 1, Vn, Vd)) +#define UMINQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b01, Vm, 1, Vn, Vd)) +#define UMINQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b10, Vm, 1, Vn, Vd)) +#define UMINQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b11, Vm, 1, Vn, Vd)) + +// HADD vector +#define HADD_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 1<<10 | (Rn)<<5 | (Rd)) +#define SHADD_8(Vd, Vn, Vm) EMIT(HADD_vector(0, 0, 0b00, Vm, Vn, Vd)) +#define SHADD_16(Vd, Vn, Vm) EMIT(HADD_vector(0, 0, 0b01, Vm, Vn, Vd)) +#define SHADD_32(Vd, Vn, Vm) EMIT(HADD_vector(0, 0, 0b10, Vm, Vn, Vd)) +#define SHADDQ_8(Vd, Vn, Vm) EMIT(HADD_vector(1, 0, 0b00, Vm, Vn, Vd)) +#define SHADDQ_16(Vd, Vn, Vm) EMIT(HADD_vector(1, 0, 0b01, Vm, Vn, Vd)) +#define SHADDQ_32(Vd, Vn, Vm) EMIT(HADD_vector(1, 0, 0b10, Vm, Vn, Vd)) +#define UHADD_8(Vd, Vn, Vm) EMIT(HADD_vector(0, 1, 0b00, Vm, Vn, Vd)) +#define UHADD_16(Vd, Vn, Vm) EMIT(HADD_vector(0, 1, 0b01, Vm, Vn, Vd)) +#define UHADD_32(Vd, Vn, Vm) EMIT(HADD_vector(0, 1, 0b10, Vm, Vn, Vd)) +#define UHADDQ_8(Vd, Vn, Vm) EMIT(HADD_vector(1, 1, 0b00, Vm, Vn, Vd)) +#define UHADDQ_16(Vd, Vn, Vm) EMIT(HADD_vector(1, 1, 0b01, Vm, Vn, Vd)) +#define UHADDQ_32(Vd, Vn, Vm) EMIT(HADD_vector(1, 1, 0b10, Vm, Vn, Vd)) + +#define RHADD_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b00010<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define SRHADD_8(Vd, Vn, Vm) EMIT(RHADD_vector(0, 0, 0b00, Vm, Vn, Vd)) +#define SRHADD_16(Vd, Vn, Vm) EMIT(RHADD_vector(0, 0, 0b01, Vm, Vn, Vd)) +#define SRHADD_32(Vd, Vn, Vm) EMIT(RHADD_vector(0, 0, 0b10, Vm, Vn, Vd)) +#define SRHADDQ_8(Vd, Vn, Vm) EMIT(RHADD_vector(1, 0, 0b00, Vm, Vn, Vd)) +#define SRHADDQ_16(Vd, Vn, Vm) EMIT(RHADD_vector(1, 0, 0b01, Vm, Vn, Vd)) +#define SRHADDQ_32(Vd, Vn, Vm) EMIT(RHADD_vector(1, 0, 0b10, Vm, Vn, Vd)) +#define URHADD_8(Vd, Vn, Vm) EMIT(RHADD_vector(0, 1, 0b00, Vm, Vn, Vd)) +#define URHADD_16(Vd, Vn, Vm) EMIT(RHADD_vector(0, 1, 0b01, Vm, Vn, Vd)) +#define URHADD_32(Vd, Vn, Vm) EMIT(RHADD_vector(0, 1, 0b10, Vm, Vn, Vd)) +#define URHADDQ_8(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b00, Vm, Vn, Vd)) +#define URHADDQ_16(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b01, Vm, Vn, Vd)) +#define URHADDQ_32(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b10, Vm, Vn, Vd)) + +// QRDMULH Signed saturating (Rounding) Doubling Multiply returning High half +#define QDMULH_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10110<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define SQRDMULH_8(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b00, Vm, Vn, Vd)) +#define SQRDMULH_16(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b01, Vm, Vn, Vd)) +#define SQRDMULH_32(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b10, Vm, Vn, Vd)) +#define SQRDMULHQ_8(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 1, 0b00, Vm, Vn, Vd)) +#define SQRDMULHQ_16(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 1, 0b01, Vm, Vn, Vd)) +#define SQRDMULHQ_32(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 1, 0b10, Vm, Vn, Vd)) +#define SQDMULH_8(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 0, 0b00, Vm, Vn, Vd)) +#define SQDMULH_16(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 0, 0b01, Vm, Vn, Vd)) +#define SQDMULH_32(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 0, 0b10, Vm, Vn, Vd)) +#define SQDMULHQ_8(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b00, Vm, Vn, Vd)) +#define SQDMULHQ_16(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b01, Vm, Vn, Vd)) +#define SQDMULHQ_32(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b10, Vm, Vn, Vd)) + +// AES extensions +#define AES_gen(D, Rn, Rd) (0b01001110<<24 | 0b00<<22 | 0b10100<<17 | 0b0010<<13 | (D)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define AESD(Vd, Vn) EMIT(AES_gen(1, Vn, Vd)) +#define AESE(Vd, Vn) EMIT(AES_gen(0, Vn, Vd)) + +#define AESMC_gen(D, Rn, Rd) (0b01001110<<24 | 0b00<<22 | 0b10100<<17 | 0b0011<<13 | (D)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define AESIMC(Vd, Vn) EMIT(AESMC_gen(1, Vn, Vd)) +#define AESMC(Vd, Vn) EMIT(AESMC_gen(0, Vn, Vd)) + +#endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/arm64/arm64_epilog.S b/src/dynarec/arm64/arm64_epilog.S new file mode 100755 index 00000000..4b73803a --- /dev/null +++ b/src/dynarec/arm64/arm64_epilog.S @@ -0,0 +1,54 @@ +//arm epilog for dynarec +//Save stuff, prepare stack and register +//called with pointer to emu as 1st parameter +//and address to jump to as 2nd parameter + +.text +.align 4 + +.global arm64_epilog +arm64_epilog: + //update register -> emu + stp x10, x11, [x0, (8 * 0)] + stp x12, x13, [x0, (8 * 2)] + stp x14, x15, [x0, (8 * 4)] + stp x16, x17, [x0, (8 * 6)] + stp x18, x19, [x0, (8 * 8)] + stp x20, x21, [x0, (8 * 10)] + stp x22, x23, [x0, (8 * 12)] + stp x24, x25, [x0, (8 * 14)] + stp x26, x27, [x0, (8 * 16)] // put back reg value in emu, including EIP (so x27 must be EIP now) + //restore all used register + //vpop {d8-d15} + ldp x19, x20, [sp, (8 * 0)] + ldp x21, x22, [sp, (8 * 2)] + ldp x23, x24, [sp, (8 * 4)] + ldp x25, x26, [sp, (8 * 6)] + ldr x27, [sp, (8 * 8)] + ldp d8, d9, [sp, (8 *10)] + ldp d10, d11, [sp, (8 *12)] + ldp d12, d13, [sp, (8 *14)] + ldp d14, d15, [sp, (8 *16)] + add sp, sp, (8 * 18) + ldp lr, fp, [sp], 16 // saved lr + //end, return... + ret + + +.global arm64_epilog_fast +arm64_epilog_fast: + //restore all used register + //vpop {d8-d15} + ldp x19, x20, [sp, (8 * 0)] + ldp x21, x22, [sp, (8 * 2)] + ldp x23, x24, [sp, (8 * 4)] + ldp x25, x26, [sp, (8 * 6)] + ldr x27, [sp, (8 * 8)] + ldp d8, d9, [sp, (8 *10)] + ldp d10, d11, [sp, (8 *12)] + ldp d12, d13, [sp, (8 *14)] + ldp d14, d15, [sp, (8 *16)] + add sp, sp, (8 * 18) + ldp lr, fp, [sp], 16 // saved lr + //end, return... + ret diff --git a/src/dynarec/arm64/arm64_lock.S b/src/dynarec/arm64/arm64_lock.S new file mode 100755 index 00000000..9d097ff5 --- /dev/null +++ b/src/dynarec/arm64/arm64_lock.S @@ -0,0 +1,131 @@ +//arm lock helper +//there is 2 part: read and write +// write return 0 on success, 1 on fail (value has been changed) + +.text +.align 4 + +.global arm64_lock_read_b +.global arm64_lock_write_b +.global arm64_lock_read_h +.global arm64_lock_write_h +.global arm64_lock_read_d +.global arm64_lock_write_d +.global arm64_lock_read_dd +.global arm64_lock_write_dd +.global arm64_lock_read_dq +.global arm64_lock_write_dq +.global arm64_lock_xchg +.global arm64_lock_storeifnull +.global arm64_lock_storeifref + + +arm64_lock_read_b: + dmb ish + // address is x0, return is x0 + ldaxrb w0, [x0] + ret + +arm64_lock_write_b: + // address is x0, value is x1, return is x0 + mov x2, x0 + stlxrb w0, w1, [x2] + dmb ish + ret + +arm64_lock_read_h: + dmb ish + // address is x0, return is x0 + ldaxrh w0, [x0] + ret + +arm64_lock_write_h: + // address is x0, value is x1, return is x0 + mov x2, x0 + stlxrh w0, w1, [x2] + dmb ish + ret + +arm64_lock_read_d: + dmb ish + // address is x0, return is x0 + ldaxr w0, [x0] + ret + +arm64_lock_write_d: + // address is x0, value is w1, return is x0 + mov x2, x0 + stlxr w0, w1, [x2] + dmb ish + ret + +arm64_lock_read_dd: + dmb ish + // address is x0, return is x0 + ldaxr x0, [x0] + ret + +arm64_lock_write_dd: + // address is x0, value is x1, return is x0 + mov x2, x0 + stlxr w0, x1, [x2] + dmb ish + ret + +arm64_lock_read_dq: + dmb ish + // address is r2, return is r0, r1 + ldaxp x4, x3, [x2] + str x4, [x0] + str x3, [x1] + ret + +arm64_lock_write_dq: + // address is r2, value is r0, r1, return is r0 + // r0 needs to be aligned + stlxp w3, x0, x1, [x2] + mov w0, w3 + dmb ish + ret + + +arm64_lock_xchg: + dmb ish +arm64_lock_xchg_0: + // address is x0, value is x1, return old value in x0 + ldaxr x2, [x0] + stlxr w3, x1, [x0] + cbnz w3, arm64_lock_xchg_0 + dmb ish + mov x0, x2 + ret + +arm64_lock_storeifnull: + dmb ish +arm64_lock_storeifnull_0: + // address is x0, value is x1, x1 store to x0 only if [x0] is 0. return new [x0] value (so x1 or old value) + ldaxr x2, [x0] + cbnz x2, arm64_lock_storeifnull_exit + mov x2, x1 + stlxr w3, x2, [x0] + cbnz w3, arm64_lock_storeifnull_0 +arm64_lock_storeifnull_exit: + dmb ish + mov x0, x2 + ret + +arm64_lock_storeifref: + dmb ish +arm64_lock_storeifref_0: + // address is x0, value is x1, x1 store to x0 only if [x0] is x3. return new [x0] value (so x1 or old value) + ldaxr x3, [x0] + cmp x2, x3 + bne arm64_lock_storeifref_exit + stlxr w4, x1, [x0] + cbnz w4, arm64_lock_storeifref_0 + mov x0, x1 + ret +arm64_lock_storeifref_exit: + dmb ish + mov x0, x3 + ret diff --git a/src/dynarec/arm64/arm64_lock.h b/src/dynarec/arm64/arm64_lock.h new file mode 100755 index 00000000..8f6bd14d --- /dev/null +++ b/src/dynarec/arm64/arm64_lock.h @@ -0,0 +1,39 @@ +#ifndef __ARM64_LOCK__H__ +#define __ARM64_LOCK__H__ +#include + +// LDAXRB of ADDR +extern uint8_t arm64_lock_read_b(void* addr); +// STLXRB of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_b(void* addr, uint8_t val); + +// LDAXRH of ADDR +extern uint16_t arm64_lock_read_h(void* addr); +// STLXRH of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_h(void* addr, uint16_t val); + +// LDAXR of ADDR +extern uint32_t arm64_lock_read_d(void* addr); +// STLXR of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_d(void* addr, uint32_t val); + +// LDAXR of ADDR +extern uint64_t arm64_lock_read_dd(void* addr); +// STLXR of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_dd(void* addr, uint64_t val); + +// LDAXRD of ADDR +extern void arm64_lock_read_dq(uint64_t * a, uint64_t* b, void* addr); +// STLXRD of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_dq(uint64_t a, uint64_t b, void* addr); + +// Atomicaly exchange value at [p] with val, return old p +extern uintptr_t arm64_lock_xchg(void* p, uintptr_t val); + +// Atomicaly store value to [p] only if [p] is NULL. Return new [p] value (so val or old) +extern void* arm64_lock_storeifnull(void*p, void* val); + +// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old) +extern void* arm64_lock_storeifref(void*p, void* val, void* ref); + +#endif //__ARM64_LOCK__H__ \ No newline at end of file diff --git a/src/dynarec/arm64/arm64_next.S b/src/dynarec/arm64/arm64_next.S new file mode 100755 index 00000000..5e890137 --- /dev/null +++ b/src/dynarec/arm64/arm64_next.S @@ -0,0 +1,39 @@ +//arm update linker table for dynarec +//called with pointer to emu as 1st parameter +//and address of table to as 2nd parameter +//ip is at r12 + +.text +.align 4 + +.extern LinkNext + +.global arm64_next +arm64_next: + // emu is r0 + // IP address is r1 + sub sp, sp, (8 * 12) + stp x0, x1, [sp, (8 * 0)] + stp x10, x11, [sp, (8 * 2)] + stp x12, x13, [sp, (8 * 4)] + stp x14, x15, [sp, (8 * 6)] + stp x16, x17, [sp, (8 * 8)] + stp x18, x27, [sp, (8 * 10)] // also save x27(rip) to allow change in LinkNext + + mov x2, lr // "from" is in lr, so put in x2 + add x3, sp, 8*11 // x3 is address to change rip + // call the function + bl LinkNext + // preserve return value + mov x3, x0 + // pop regs + ldp x0, x1, [sp, (8 * 0)] + ldp x10, x11, [sp, (8 * 2)] + ldp x12, x13, [sp, (8 * 4)] + ldp x14, x15, [sp, (8 * 6)] + ldp x16, x17, [sp, (8 * 8)] + ldp x18, x27, [sp, (8 * 10)] + add sp, sp, (8 * 12) + // return offset is jump address + br x3 + diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c new file mode 100755 index 00000000..b07d40a4 --- /dev/null +++ b/src/dynarec/arm64/arm64_printer.c @@ -0,0 +1,1353 @@ +#include +#include +#include +#include + +#include "arm64_printer.h" +#include "debug.h" + +static const char* Xt[] = {"xEmu", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "xRAX", "xRCX", "xRDX", "xRBX", "xRSP", "xRBP", "xRSI", "xRDI", "xR8", "xR9", "xR10", "xR11", "xR12", "xR13", "xR14", "xR15", "xFlags", "xRIP", "x28", "FP", "LR", "xZR"}; +static const char* XtSp[] = {"xEmu", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "xRAX", "xRCX", "xRDX", "xRBX", "xRSP", "xRBP", "xRSI", "xRDI", "xR8", "xR9", "xR10", "xR11", "xR12", "xR13", "xR14", "xR15", "xFlags", "xRIP", "x28", "FP", "LR", "SP"}; +static const char* Wt[] = {"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "wEAX", "wECX", "wEDX", "wEBX", "wESP", "wEBP", "wESI", "wEDI", "wR8", "wR9", "wR10", "wR11", "wR12", "wR13", "wR14", "wR15", "wFlags", "w27", "w28", "w29", "w30", "wZR"}; +static const char* WtSp[] = {"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "wEAX", "wECX", "wEDX", "wEBX", "wESP", "wEBP", "wESI", "wEDI", "wR8", "wR9", "wR10", "wR11", "wR12", "wR13", "wR14", "wR15", "wFlags", "w27", "w28", "w29", "w30", "wSP"}; + +static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", "cVC", "cHI", "cLS", "cGE", "cLT", "cGT", "cLE", "c__", "inv"}; + +#define abs(A) (((A)<0)?(-(A)):(A)) + +typedef struct arm64_print_s { + int N, S, U, L, Q; + int t, n, m, d, t2, a; + int f, c, o, h, p; + int i, r, s; + int x, w; +} arm64_print_t; + +uint64_t DecodeBitMasks(int N, int imms, int immr) +{ + int len = 31-__builtin_clz(N<<6 | ((~imms)&0b111111)); + if(len<1) return 0; + int levels = (1<>r)|(mask<<(e-r)); + mask&=((1LL<>i)&1; + switch(*mask) { + case '0': if(v!=0) return 0; break; + case '1': if(v!=1) return 0; break; + case 'N': a->N = (a->N<<1) | v; break; + case 'S': a->S = (a->S<<1) | v; break; + case 'U': a->U = (a->U<<1) | v; break; + case 'L': a->L = (a->L<<1) | v; break; + case 'Q': a->Q = (a->Q<<1) | v; break; + case 't': a->t = (a->t<<1) | v; break; + case '2': a->t2 = (a->t2<<1) | v; break; + case 'n': a->n = (a->n<<1) | v; break; + case 'p': a->p = (a->p<<1) | v; break; + case 'm': a->m = (a->m<<1) | v; break; + case 'a': a->a = (a->a<<1) | v; break; + case 'd': a->d = (a->d<<1) | v; break; + case 'f': a->f = (a->f<<1) | v; break; + case 'c': a->c = (a->c<<1) | v; break; + case 'i': a->i = (a->i<<1) | v; break; + case 'r': a->r = (a->r<<1) | v; break; + case 's': a->s = (a->s<<1) | v; break; + case 'o': a->o = (a->o<<1) | v; break; + case 'h': a->h = (a->h<<1) | v; break; + case 'w': a->w = (a->w<<1) | v; break; + case 'x': a->x = (a->x<<1) | v; break; + default: + printf_log(LOG_NONE, "Warning, printer mask use unhandled '%c'\n", *mask); + } + mask++; + --i; + } + + return 1; +} + +int64_t signExtend(uint32_t val, int sz) +{ + int64_t ret = val; + if((val>>(sz-1))&1) + ret |= (0xffffffffffffffffll<>30)&3; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "LDR %s, [%s], %s0x%x", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "1x111000010iiiiiiiii11nnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "LDR %s, [%s, %s0x%x]!", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "1x11100101iiiiiiiiiiiinnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = (imm)<>30)&1)?3:2; + int offset = signExtend(imm, 19)<<2; + snprintf(buff, sizeof(buff), "LDR %s, [#%+d]\t;%p", (size==2)?Wt[Rt]:Xt[Rt], offset, (void*)(addr+offset)); + return buff; + } + if(isMask(opcode, "10011000iiiiiiiiiiiiiiiiiiittttt", &a)) { + int offset = signExtend(imm, 19)<<2; + snprintf(buff, sizeof(buff), "LDRSW %s, [#%+d]\t;%p", Xt[Rt], offset, (void*)(addr+offset)); + return buff; + } + if(isMask(opcode, "ff011100iiiiiiiiiiiiiiiiiiittttt", &a)) { + int offset = signExtend(imm, 19)<<2; + const char* Y[] = {"S", "D", "Q", "?"}; + snprintf(buff, sizeof(buff), "LDR %s%d, [#%+d]\t;%p", Y[sf], Rt, offset, (void*)(addr+offset)); + return buff; + } + if(isMask(opcode, "1x111000011mmmmmoooS10nnnnnttttt", &a)) { + int size = (opcode>>30)&3; + const char* extend[] = {"?0", "?1", "UXTW", "LSL", "?4", "?5", "SXTW", "SXTX"}; + int amount = size*a.S; + if(option==3 && !amount) + snprintf(buff, sizeof(buff), "LDR %s, [%s, %s]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm]); + else + snprintf(buff, sizeof(buff), "LDR %s, [%s, %s, %s %d]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm], extend[option], amount); + return buff; + } + if(isMask(opcode, "1x111000000iiiiiiiii01nnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "STR %s, [%s], %s0x%x", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "1x111000000iiiiiiiii11nnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "STR %s, [%s, %s0x%x]!", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "1x11100100iiiiiiiiiiiinnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = (imm)<>30)&3; + const char* extend[] = {"?0", "?1", "UXTW", "LSL", "?4", "?5", "SXTW", "SXTX"}; + int amount = size*a.S; + if(option==3 && !amount) + snprintf(buff, sizeof(buff), "STR %s, [%s, %s]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm]); + else + snprintf(buff, sizeof(buff), "STR %s, [%s, %s, %s %d]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm], extend[option], amount); + return buff; + } + if(isMask(opcode, "0x111000010iiiiiiiii01nnnnnttttt", &a)) { + int size = a.x; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "LDR%c %s, [%s], %s0x%x", size?'H':'B', Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "0x111000010iiiiiiiii11nnnnnttttt", &a)) { + int size = a.x; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "LDR%c %s, [%s, %s0x%x]!", size?'H':'B', Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "0x11100101iiiiiiiiiiiinnnnnttttt", &a)) { + int size = a.x; + int offset = (imm)<=immr) + snprintf(buff, sizeof(buff), "UBFX %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms-immr+1); + else + snprintf(buff, sizeof(buff), "UBFM %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms); + + return buff; + } + + if(isMask(opcode, "f0011010110mmmmm001010nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "ASR %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); + return buff; + } + + if(isMask(opcode, "f00100110Nrrrrrrssssssnnnnnddddd", &a)) { + if(sf && imms==0b111111) + snprintf(buff, sizeof(buff), "ASR %s, %s, %d", Xt[Rd], Xt[Rn], immr); + else if(!sf && imms==0b011111) + snprintf(buff, sizeof(buff), "ASR %s, %s, %d", Wt[Rd], Wt[Rn], immr); + else if(immr==0 && imms==0b000111) + snprintf(buff, sizeof(buff), "SXTB %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); + else if(immr==0 && imms==0b001111) + snprintf(buff, sizeof(buff), "SXTH %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); + else if(sf && immr==0 && imms==0b011111) + snprintf(buff, sizeof(buff), "SXTW %s, %s", Xt[Rd], Wt[Rn]); + else if(imms>=immr) + snprintf(buff, sizeof(buff), "SBFX %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms-immr+1); + else + snprintf(buff, sizeof(buff), "SBFM %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms); + return buff; + } + + if(isMask(opcode, "f00100111N0mmmmmssssssnnnnnddddd", &a)) { + if(Rn==Rm) + snprintf(buff, sizeof(buff), "ROR %s, %s, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], imms); + else + snprintf(buff, sizeof(buff), "EXTR %s, %s, %s, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], imms); + return buff; + } + + if(isMask(opcode, "f0011010110mmmmm001011nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "ROR %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); + return buff; + } + + if(isMask(opcode, "f0011010110mmmmm001001nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "LSR %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); + return buff; + } + + if(isMask(opcode, "f0011010110mmmmm001000nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "LSL %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); + return buff; + } + + if(isMask(opcode, "f01100110Nrrrrrrssssssnnnnnddddd", &a)) { + if(imms>2, (void*)(addr + offset)); + return buff; + } + if(isMask(opcode, "000101iiiiiiiiiiiiiiiiiiiiiiiiii", &a)) { + int offset = signExtend(imm, 26)<<2; + snprintf(buff, sizeof(buff), "B #+%di\t; %p", offset>>2, (void*)(addr + offset)); + return buff; + } + if(isMask(opcode, "f0110100iiiiiiiiiiiiiiiiiiittttt", &a)) { + int offset = signExtend(imm, 19)<<2; + snprintf(buff, sizeof(buff), "CBZ %s, #%+di\t; %p", Xt[Rt], offset>>2, (void*)(addr + offset)); + return buff; + } + if(isMask(opcode, "f0110101iiiiiiiiiiiiiiiiiiittttt", &a)) { + int offset = signExtend(imm, 19)<<2; + snprintf(buff, sizeof(buff), "CBNZ %s, #%+di\t; %p", Xt[Rt], offset>>2, (void*)(addr + offset)); + return buff; + } + if(isMask(opcode, "f0110100iiiiiiiiiiiiiiiiiiittttt", &a)) { + int offset = signExtend(imm, 19)<<2; + snprintf(buff, sizeof(buff), "CBZ %s, #%+di\t; %p", Xt[Rt], offset>>2, (void*)(addr + offset)); + return buff; + } + if(isMask(opcode, "s0110110sssssiiiiiiiiiiiiiittttt", &a)) { + int offset = signExtend(imm, 14)<<2; + snprintf(buff, sizeof(buff), "TBZ %s, 0x%x, #%+di\t; %p", (imms<31)?Xt[Rt]:Wt[Rt], imms, offset>>2, (void*)(addr + offset)); + return buff; + } + if(isMask(opcode, "s0110111sssssiiiiiiiiiiiiiittttt", &a)) { + int offset = signExtend(imm, 14)<<2; + snprintf(buff, sizeof(buff), "TBNZ %s, 0x%x, #%+di\t; %p", (imms<31)?Xt[Rt]:Wt[Rt], imms, offset>>2, (void*)(addr + offset)); + return buff; + } + + if(isMask(opcode, "f0011010100mmmmmcccc01nnnnnddddd", &a)) { + if(Rm!=31 && (cond&0b1110)!=0b1110 && Rn!=31 && Rn==Rm) + snprintf(buff, sizeof(buff), "CINC %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond^1]); + else if(Rm==31 && (cond&0b1110)!=0b1110 && Rn==31) + snprintf(buff, sizeof(buff), "CSET %s,%s", sf?Xt[Rd]:Wt[Rd], conds[cond^1]); + else + snprintf(buff, sizeof(buff), "CSINC %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond]); + return buff; + } + + if(isMask(opcode, "f1011010100mmmmmcccc00nnnnnddddd", &a)) { + if(Rm!=31 && (cond&0b1110)!=0b1110 && Rn!=31 && Rn==Rm) + snprintf(buff, sizeof(buff), "CINV %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond^1]); + else if(Rm==31 && (cond&0b1110)!=0b1110 && Rn==31) + snprintf(buff, sizeof(buff), "CSETM %s,%s", sf?Xt[Rd]:Wt[Rd], conds[cond^1]); + else + snprintf(buff, sizeof(buff), "CSINV %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond]); + return buff; + } + + if(isMask(opcode, "f1011010100mmmmmcccc01nnnnnddddd", &a)) { + if((cond&0b1110)!=0b1110 && Rn==Rm) + snprintf(buff, sizeof(buff), "CNEG %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], conds[cond^1]); + else + snprintf(buff, sizeof(buff), "CSNEG %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond]); + return buff; + } + if(isMask(opcode, "f0011010100mmmmmcccc00nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "CSEL %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond]); + return buff; + } + // MISC Bits + if(isMask(opcode, "f10110101100000000010onnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "CL%c %s, %s", option?'S':'Z', sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); + return buff; + } + if(isMask(opcode, "f101101011000000000000nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "RBIT %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); + return buff; + } + if(isMask(opcode, "f1011010110000000000oonnnnnddddd", &a)) { + if(!sf && option==2) + snprintf(buff, sizeof(buff), "REV %s, %s", Wt[Rd], Wt[Rn]); + else if (sf && option==3) + snprintf(buff, sizeof(buff), "REV %s, %s", Xt[Rd], Xt[Rn]); + else + snprintf(buff, sizeof(buff), "REV%d %s, %s", 8< nzcv + //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2 => fpcr + if(a.o==1 && a.p==3 && a.n==4 && a.m==2 && a.t2==0) + reg="nzcv"; + else if(a.o==1 && a.p==3 && a.n==4 && a.m==4 && a.t2==2) + reg="fpcr"; + + if(!reg) + snprintf(buff, sizeof(buff), "MSR S%d_%d_%d_%d_%d, %s", 2+a.o, a.p, a.n, a.m, a.t2, Xt[Rt]); + else + snprintf(buff, sizeof(buff), "MSR %s, %s", reg, Xt[Rt]); + return buff; + } + if(isMask(opcode, "110101010011opppnnnnmmmm222ttttt", &a)) { + const char* reg=NULL; + //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 => nzcv + //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2 => fpcr + if(a.o==1 && a.p==3 && a.n==4 && a.m==2 && a.t2==0) + reg="nzcv"; + else if(a.o==1 && a.p==3 && a.n==4 && a.m==4 && a.t2==2) + reg="fpcr"; + + if(!reg) + snprintf(buff, sizeof(buff), "MRS %s, S%d_%d_%d_%d_%d", Xt[Rt], 2+a.o, a.p, a.n, a.m, a.t2); + else + snprintf(buff, sizeof(buff), "MRS %s, %s", Xt[Rt], reg); + return buff; + } + + // ----------- NEON / FPU + + // VORR/VAND/VBIC/VORN + if(isMask(opcode, "0Q001110101mmmmm000111nnnnnddddd", &a)) { + char q = a.Q?'Q':'D'; + if(Rn==Rm) + snprintf(buff, sizeof(buff), "VMOV %c%d, %c%d", q, Rd, q, Rn); + else + snprintf(buff, sizeof(buff), "VORR %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); + return buff; + } + if(isMask(opcode, "0Q001110111mmmmm000111nnnnnddddd", &a)) { + char q = a.Q?'Q':'D'; + snprintf(buff, sizeof(buff), "VORN %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); + return buff; + } + if(isMask(opcode, "0Q001110001mmmmm000111nnnnnddddd", &a)) { + char q = a.Q?'Q':'D'; + snprintf(buff, sizeof(buff), "VAND %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); + return buff; + } + if(isMask(opcode, "0Q001110011mmmmm000111nnnnnddddd", &a)) { + char q = a.Q?'Q':'D'; + snprintf(buff, sizeof(buff), "VBIC %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); + return buff; + } + // UMOV + if(isMask(opcode, "0Q001110000rrrrr001111nnnnnddddd", &a)) { + char q = a.Q?'Q':'D'; + char s = '?'; + int sz=0; + if(a.Q==0 && immr&1) {s='B'; sz=0; } + else if(a.Q==0 && (immr&3)==2) {s='H'; sz=1; } + else if(a.Q==0 && (immr&7)==4) {s='S'; sz=2; } + else if(a.Q==1 && (immr&15)==8) {s='D'; sz=3; } + int index = (immr)>>(sz+1); + if(sz>2) + snprintf(buff, sizeof(buff), "MOV %s, %c%d.%c[%d]", a.Q?Xt[Rd]:Wt[Rd], q, Rn, s, index); + else + snprintf(buff, sizeof(buff), "UMOV %s, %c%d.%c[%d]", a.Q?Xt[Rd]:Wt[Rd], q, Rn, s, index); + return buff; + } + // VEOR + if(isMask(opcode, "0Q101110001mmmmm000111nnnnnddddd", &a)) { + char q = a.Q?'Q':'D'; + snprintf(buff, sizeof(buff), "VEOR %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); + return buff; + } + + // VADD / VSUB + if(isMask(opcode, "0QU01110ff1mmmmm100001nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + const char* Vd = Y[((sf)<<1) | a.Q]; + snprintf(buff, sizeof(buff), "V%s V%d.%s, V%d.%s, V%d.%s", a.U?"SUB":"ADD", Rd, Vd, Rn, Vd, Rm, Vd); + return buff; + } + + // VMUL + if(isMask(opcode, "0Q001110ff1mmmmm100111nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + const char* Vd = Y[((sf)<<1) | a.Q]; + snprintf(buff, sizeof(buff), "VMUL V%d.%s, V%d.%s, V%d.%s", Rd, Vd, Rn, Vd, Rm, Vd); + return buff; + } + // CMP + if(isMask(opcode, "0Q101110ff1mmmmm100011nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + const char* Vd = Y[((sf)<<1) | a.Q]; + snprintf(buff, sizeof(buff), "VCMEQ V%d.%s, V%d.%s, V%d.%s", Rd, Vd, Rn, Vd, Rm, Vd); + return buff; + } + + // Shift + if(isMask(opcode, "0QU011110hhhhrrr000001nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + const char* Vd ="??"; + int s = 0; + if(shift==0b0001) {Vd = Y[a.Q]; s=16-((shift)<<3 | immr);} + else if((shift&0b1110)==0b0010) {Vd = Y[2+a.Q]; s=32-((shift)<<3 | immr);} + else if((shift&0b1100)==0b0100) {Vd = Y[4+a.Q]; s=64-((shift)<<3 | immr);} + else if((shift&0b1000)==0b1000) {Vd = Y[6+a.Q]; s=128-((shift)<<3 | immr);} + snprintf(buff, sizeof(buff), "%cSHR V%d.%s, V%d.%s, #%d", a.U?'U':'S', Rd, Vd, Rn, Vd, s); + return buff; + } + + // INS + if(isMask(opcode, "01101110000rrrrr0ssss1nnnnnddddd", &a)) { + char s = '?'; + int idx1=0, idx2=0; + if(immr&1) {s='B'; idx1=(immr)>>1; idx2 = imms; } + else if((immr&3)==2) {s='H'; idx1=(immr)>>2; idx2=(imms)>>1;} + else if((immr&7)==4) {s='S'; idx1=(immr)>>3; idx2=(imms)>>2;} + else if((immr&15)==8) {s='D'; idx1=(immr)>>4; idx2=(imms)>>3;} + snprintf(buff, sizeof(buff), "INS V%d.%c[%d], V%d.%c[%d]", Rd, s, idx1, Rn, s, idx2); + return buff; + } + if(isMask(opcode, "01001110000rrrrr000111nnnnnddddd", &a)) { + char s = '?', R = 0; + int idx1=0; + if(immr&1) {s='B'; idx1=(immr)>>1; } + else if((immr&3)==2) {s='H'; idx1=(immr)>>2;} + else if((immr&7)==4) {s='S'; idx1=(immr)>>3;} + else if((immr&15)==8) {s='D'; idx1=(immr)>>4; R=1;} + snprintf(buff, sizeof(buff), "INS V%d.%c[%d], %s", Rd, s, idx1, R?Xt[Rn]:Wt[Rn]); + return buff; + } + + // ADR + if(isMask(opcode, "0ss10000iiiiiiiiiiiiiiiiiiiddddd", &a)) { + snprintf(buff, sizeof(buff), "ADR, %s, %ld", Xt[Rd], signExtend((imm)<<2|(imms), 20)); + return buff; + } + + // LDR / STR + if(isMask(opcode, "ss111101cciiiiiiiiiiiinnnnnttttt", &a)) { + char s = '?'; + int size=imms; + int op=0; + if(size==0 && opc==1) {s='B';} + else if(size==1 && opc==1) {s='H';} + else if(size==2 && opc==1) {s='S';} + else if(size==3 && opc==1) {s='D';} + else if(size==0 && opc==3) {s='Q'; size = 4;} + else if(size==0 && opc==0) {s='B'; op=1;} + else if(size==1 && opc==0) {s='H'; op=1;} + else if(size==2 && opc==0) {s='S'; op=1;} + else if(size==3 && opc==0) {s='D'; op=1;} + else if(size==0 && opc==2) {s='Q'; op=1; size = 4;} + + int offset = imm<>1); break; + case 2: if(!(sf&1)) + idx = (a.Q<<1) | a.S; + else { + scale = 3; + idx = a.Q; + } + break; + } + snprintf(buff, sizeof(buff), "%s1 {V%d.%s}[%d], [%s]", a.L?"LD":"ST", Rt, Y[scale], idx, XtSp[Rn]); + return buff; + } + // LDUR/STUR + if(isMask(opcode, "ff111100cL0iiiiiiiii00nnnnnttttt", &a)) { + const char* Y[] = {"B", "H", "S", "D", "Q"}; + int sz = sf; + if(sz==0 && a.c) + sz = 4; + int offset = signExtend(imm, 9); + if(!offset) + snprintf(buff, sizeof(buff), "%sUR %s%d, [%s]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn]); + else + snprintf(buff, sizeof(buff), "%sUR %s%d, [%s, %+d]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn], imm); + return buff; + } + // LDR/STR vector immediate + if(isMask(opcode, "ff111101cLiiiiiiiiiiiinnnnnttttt", &a)) { + const char* Y[] = {"B", "H", "S", "D", "Q"}; + int sz = sf; + if(sz==0 && a.c) + sz = 4; + int offset = imm< register + ldp x10, x11, [x0, (8 * 0)] + ldp x12, x13, [x0, (8 * 2)] + ldp x14, x15, [x0, (8 * 4)] + ldp x16, x17, [x0, (8 * 6)] + ldp x18, x19, [x0, (8 * 8)] + ldp x20, x21, [x0, (8 * 10)] + ldp x22, x23, [x0, (8 * 12)] + ldp x24, x25, [x0, (8 * 14)] + ldp x26, x27, [x0, (8 * 16)] + //jump to function + br x1 diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c new file mode 100755 index 00000000..3e6098de --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -0,0 +1,2453 @@ +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "bridge.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +int isSimpleWrapper(wrapper_t fun); + +uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + uint8_t nextop, opcode; + uint8_t gd, ed; + int8_t i8; + int32_t i32, tmp; + int64_t i64, j64; + uint8_t u8; + uint8_t gb1, gb2, eb1, eb2; + uint32_t u32; + uint64_t u64; + uint8_t wback, wb1, wb2, wb; + int64_t fixedaddress; + + opcode = F8; + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(j64); + MAYUSE(wb); + + switch(opcode) { + case 0x00: + INST_NAME("ADD Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_add8(dyn, ninst, x1, x2, x4, x5); + EBBACK; + break; + case 0x01: + INST_NAME("ADD Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_add32(dyn, ninst, rex, ed, gd, x3, x4); + WBACK; + break; + case 0x02: + INST_NAME("ADD Gb, Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x2, 0); + GETGB(x1); + emit_add8(dyn, ninst, x1, x2, x3, x4); + GBBACK; + break; + case 0x03: + INST_NAME("ADD Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_add32(dyn, ninst, rex, gd, ed, x3, x4); + break; + case 0x04: + INST_NAME("ADD AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + UXTBw(x1, xRAX); + emit_add8c(dyn, ninst, x1, u8, x3, x4); + BFIx(xRAX, x1, 0, 8); + break; + case 0x05: + INST_NAME("ADD EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5); + break; + + case 0x08: + INST_NAME("OR Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_or8(dyn, ninst, x1, x2, x4, x2); + EBBACK; + break; + case 0x09: + INST_NAME("OR Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_or32(dyn, ninst, rex, ed, gd, x3, x4); + WBACK; + break; + case 0x0A: + INST_NAME("OR Gb, Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x2, 0); + GETGB(x1); + emit_or8(dyn, ninst, x1, x2, x3, x4); + GBBACK; + break; + case 0x0B: + INST_NAME("OR Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_or32(dyn, ninst, rex, gd, ed, x3, x4); + break; + case 0x0C: + INST_NAME("OR AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + UXTBw(x1, xRAX); + emit_or8c(dyn, ninst, x1, u8, x3, x4); + BFIx(xRAX, x1, 0, 8); + break; + case 0x0D: + INST_NAME("OR EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + emit_or32c(dyn, ninst, rex, xRAX, i64, x3, x4); + break; + + case 0x0F: + switch(rep) { + case 1: + addr = dynarec64_F20F(dyn, addr, ip, ninst, rex, ok, need_epilog); + break; + case 2: + addr = dynarec64_F30F(dyn, addr, ip, ninst, rex, ok, need_epilog); + break; + default: + addr = dynarec64_0F(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + } + break; + case 0x10: + INST_NAME("ADC Eb, Gb"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_adc8(dyn, ninst, x1, x2, x4, x5); + EBBACK; + break; + case 0x11: + INST_NAME("ADC Ed, Gd"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_adc32(dyn, ninst, rex, ed, gd, x3, x4); + WBACK; + break; + case 0x12: + INST_NAME("ADC Gb, Eb"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x2, 0); + GETGB(x1); + emit_adc8(dyn, ninst, x1, x2, x4, x3); + GBBACK; + break; + case 0x13: + INST_NAME("ADC Gd, Ed"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_adc32(dyn, ninst, rex, gd, ed, x3, x4); + break; + case 0x14: + INST_NAME("ADC AL, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + UXTBw(x1, xRAX); + emit_adc8c(dyn, ninst, x1, u8, x3, x4, x5); + BFIx(xRAX, x1, 0, 8); + break; + case 0x15: + INST_NAME("ADC EAX, Id"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + MOV64xw(x1, i64); + emit_adc32(dyn, ninst, rex, xRAX, x1, x3, x4); + break; + + case 0x18: + INST_NAME("SBB Eb, Gb"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_sbb8(dyn, ninst, x1, x2, x4, x5); + EBBACK; + break; + case 0x19: + INST_NAME("SBB Ed, Gd"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_sbb32(dyn, ninst, rex, ed, gd, x3, x4); + WBACK; + break; + case 0x1A: + INST_NAME("SBB Gb, Eb"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x2, 0); + GETGB(x1); + emit_sbb8(dyn, ninst, x1, x2, x3, x4); + GBBACK; + break; + case 0x1B: + INST_NAME("SBB Gd, Ed"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_sbb32(dyn, ninst, rex, gd, ed, x3, x4); + break; + case 0x1C: + INST_NAME("SBB AL, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + UXTBw(x1, xRAX); + emit_sbb8c(dyn, ninst, x1, u8, x3, x4, x5); + BFIx(xRAX, x1, 0, 8); + break; + case 0x1D: + INST_NAME("SBB EAX, Id"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + MOV64xw(x2, i64); + emit_sbb32(dyn, ninst, rex, xRAX, x2, x3, x4); + break; + + case 0x20: + INST_NAME("AND Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_and8(dyn, ninst, x1, x2, x4, x5); + EBBACK; + break; + case 0x21: + INST_NAME("AND Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_and32(dyn, ninst, rex, ed, gd, x3, x4); + WBACK; + break; + case 0x22: + INST_NAME("AND Gb, Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x2, 0); + GETGB(x1); + emit_and8(dyn, ninst, x1, x2, x3, x4); + GBBACK; + break; + case 0x23: + INST_NAME("AND Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_and32(dyn, ninst, rex, gd, ed, x3, x4); + break; + case 0x24: + INST_NAME("AND AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + UXTBw(x1, xRAX); + emit_and8c(dyn, ninst, x1, u8, x3, x4); + BFIx(xRAX, x1, 0, 8); + break; + case 0x25: + INST_NAME("AND EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + emit_and32c(dyn, ninst, rex, xRAX, i64, x3, x4); + break; + + case 0x28: + INST_NAME("SUB Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_sub8(dyn, ninst, x1, x2, x4, x5); + EBBACK; + break; + case 0x29: + INST_NAME("SUB Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_sub32(dyn, ninst, rex, ed, gd, x3, x4); + WBACK; + break; + case 0x2A: + INST_NAME("SUB Gb, Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x2, 0); + GETGB(x1); + emit_sub8(dyn, ninst, x1, x2, x3, x4); + GBBACK; + break; + case 0x2B: + INST_NAME("SUB Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_sub32(dyn, ninst, rex, gd, ed, x3, x4); + break; + case 0x2C: + INST_NAME("SUB AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + UXTBw(x1, xRAX); + emit_sub8c(dyn, ninst, x1, u8, x3, x4, x5); + BFIx(xRAX, x1, 0, 8); + break; + case 0x2D: + INST_NAME("SUB EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + emit_sub32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5); + break; + case 0x2E: + INST_NAME("CS:"); + break; + + case 0x30: + INST_NAME("XOR Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_xor8(dyn, ninst, x1, x2, x4, x5); + EBBACK; + break; + case 0x31: + INST_NAME("XOR Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_xor32(dyn, ninst, rex, ed, gd, x3, x4); + WBACK; + break; + case 0x32: + INST_NAME("XOR Gb, Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x2, 0); + GETGB(x1); + emit_xor8(dyn, ninst, x1, x2, x3, x4); + GBBACK; + break; + case 0x33: + INST_NAME("XOR Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_xor32(dyn, ninst, rex, gd, ed, x3, x4); + break; + case 0x34: + INST_NAME("XOR AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + UXTBw(x1, xRAX); + emit_xor8c(dyn, ninst, x1, u8, x3, x4); + BFIx(xRAX, x1, 0, 8); + break; + case 0x35: + INST_NAME("XOR EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4); + break; + case 0x36: + INST_NAME("SS:"); + break; + + case 0x38: + INST_NAME("CMP Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + break; + case 0x39: + INST_NAME("CMP Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_cmp32(dyn, ninst, rex, ed, gd, x3, x4, x5); + break; + case 0x3A: + INST_NAME("CMP Gb, Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x2, 0); + GETGB(x1); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + break; + case 0x3B: + INST_NAME("CMP Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_cmp32(dyn, ninst, rex, gd, ed, x3, x4, x5); + break; + case 0x3C: + INST_NAME("CMP AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + UXTBw(x1, xRAX); + if(u8) { + MOV32w(x2, u8); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + } else { + emit_cmp8_0(dyn, ninst, x1, x3, x4); + } + break; + case 0x3D: + INST_NAME("CMP EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + if(i64) { + MOV64xw(x2, i64); + emit_cmp32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); + } else + emit_cmp32_0(dyn, ninst, rex, xRAX, x3, x4); + break; + + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + INST_NAME("PUSH reg"); + gd = xRAX+(opcode&0x07)+(rex.b<<3); + if(gd==xRSP) { + MOVx_REG(x1, gd); + gd = x1; + } + PUSH1(gd); + break; + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: + INST_NAME("POP reg"); + gd = xRAX+(opcode&0x07)+(rex.b<<3); + if(gd == xRSP) { + POP1(x1); + MOVx_REG(gd, x1); + } else { + POP1(gd); + } + break; + + case 0x63: + INST_NAME("MOVSXD Gd, Ed"); + nextop = F8; + GETGD; + if(rex.w) { + if(MODREG) { // reg <= reg + SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + LDRSW_U12(gd, ed, fixedaddress); + } + } else { + if(MODREG) { // reg <= reg + MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + LDRw_U12(gd, ed, fixedaddress); + } + } + break; + case 0x64: + addr = dynarec64_64(dyn, addr, ip, ninst, rex, rep, _FS, ok, need_epilog); + break; + case 0x65: + addr = dynarec64_64(dyn, addr, ip, ninst, rex, rep, _GS, ok, need_epilog); + break; + case 0x66: + addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + case 0x67: + addr = dynarec64_67(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + case 0x68: + INST_NAME("PUSH Id"); + i64 = F32S; + if(PK(0)==0xC3) { + MESSAGE(LOG_DUMP, "PUSH then RET, using indirect\n"); + TABLE64(x3, ip+1); + LDRSW_U12(x1, x3, 0); + PUSH1(x1); + } else { + MOV64x(x3, i64); + PUSH1(x3); + } + break; + case 0x69: + INST_NAME("IMUL Gd, Ed, Id"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + GETGD; + GETED(4); + i64 = F32S; + MOV64xw(x4, i64); + if(rex.w) { + // 64bits imul + UFLAG_IF { + SMULH(x3, ed, x4); + MULx(gd, ed, x4); + UFLAG_OP1(x3); + UFLAG_RES(gd); + UFLAG_DF(x3, d_imul64); + } else { + MULxw(gd, ed, x4); + } + } else { + // 32bits imul + UFLAG_IF { + SMULL(gd, ed, x4); + UFLAG_RES(gd); + LSRx(x3, gd, 32); + UFLAG_OP1(x3); + UFLAG_DF(x3, d_imul32); + MOVw_REG(gd, gd); + } else { + MULxw(gd, ed, x4); + } + } + break; + case 0x6A: + INST_NAME("PUSH Ib"); + i64 = F8S; + MOV64x(x3, i64); + PUSH1(x3); + break; + case 0x6B: + INST_NAME("IMUL Gd, Ed, Ib"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + GETGD; + GETED(1); + i64 = F8S; + MOV64xw(x4, i64); + if(rex.w) { + // 64bits imul + UFLAG_IF { + SMULH(x3, ed, x4); + MULx(gd, ed, x4); + UFLAG_OP1(x3); + UFLAG_RES(gd); + UFLAG_DF(x3, d_imul64); + } else { + MULxw(gd, ed, x4); + } + } else { + // 32bits imul + UFLAG_IF { + SMULL(gd, ed, x4); + UFLAG_RES(gd); + LSRx(x3, gd, 32); + UFLAG_OP1(x3); + UFLAG_DF(x3, d_imul32); + MOVw_REG(gd, gd); + } else { + MULxw(gd, ed, x4); + } + } + break; + + #define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + i8 = F8S; \ + BARRIER(2); \ + JUMP(addr+i8);\ + GETFLAGS; \ + if(dyn->insts[ninst].x64.jmp_insts==-1) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst+1].address-(dyn->native_size); \ + Bcond(NO, i32); \ + jump_to_next(dyn, addr+i8, 0, ninst); \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ + Bcond(YES, i32); \ + } + + GOCOND(0x70, "J", "ib"); + + #undef GO + + case 0x80: + nextop = F8; + switch((nextop>>3)&7) { + case 0: //ADD + INST_NAME("ADD Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_add8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 1: //OR + INST_NAME("OR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_or8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 2: //ADC + INST_NAME("ADC Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 3: //SBB + INST_NAME("SBB Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 4: //AND + INST_NAME("AND Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_and8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 5: //SUB + INST_NAME("SUB Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 6: //XOR + INST_NAME("XOR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_xor8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 7: //CMP + INST_NAME("CMP Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + if(u8) { + MOV32w(x2, u8); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + } else { + emit_cmp8_0(dyn, ninst, x1, x3, x4); + } + break; + default: + DEFAULT; + } + break; + case 0x81: + case 0x83: + nextop = F8; + switch((nextop>>3)&7) { + case 0: //ADD + if(opcode==0x81) {INST_NAME("ADD Ed, Id");} else {INST_NAME("ADD Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5); + WBACK; + break; + case 1: //OR + if(opcode==0x81) {INST_NAME("OR Ed, Id");} else {INST_NAME("OR Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); + WBACK; + break; + case 2: //ADC + if(opcode==0x81) {INST_NAME("ADC Ed, Id");} else {INST_NAME("ADC Ed, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MOV64xw(x5, i64); + emit_adc32(dyn, ninst, rex, ed, x5, x3, x4); + WBACK; + break; + case 3: //SBB + if(opcode==0x81) {INST_NAME("SBB Ed, Id");} else {INST_NAME("SBB Ed, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MOV64xw(x5, i64); + emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4); + WBACK; + break; + case 4: //AND + if(opcode==0x81) {INST_NAME("AND Ed, Id");} else {INST_NAME("AND Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_and32c(dyn, ninst, rex, ed, i64, x3, x4); + WBACK; + break; + case 5: //SUB + if(opcode==0x81) {INST_NAME("SUB Ed, Id");} else {INST_NAME("SUB Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5); + WBACK; + break; + case 6: //XOR + if(opcode==0x81) {INST_NAME("XOR Ed, Id");} else {INST_NAME("XOR Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_xor32c(dyn, ninst, rex, ed, i64, x3, x4); + WBACK; + break; + case 7: //CMP + if(opcode==0x81) {INST_NAME("CMP Ed, Id");} else {INST_NAME("CMP Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + if(i64) { + MOV64xw(x2, i64); + emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5); + } else + emit_cmp32_0(dyn, ninst, rex, ed, x3, x4); + break; + } + break; + case 0x84: + INST_NAME("TEST Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop=F8; + GETEB(x1, 0); + GETGB(x2); + emit_test8(dyn, ninst, x1, x2, x3, x4, x5); + break; + case 0x85: + INST_NAME("TEST Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop=F8; + GETGD; + GETED(0); + emit_test32(dyn, ninst, rex, ed, gd, x3, x5); + break; + case 0x86: + INST_NAME("(LOCK)XCHG Eb, Gb"); + // Do the swap + nextop = F8; + if(MODREG) { + GETGB(x4); + if(rex.rex) { + ed = xRAX+(nextop&7)+(rex.b<<3); + eb1 = ed; + eb2 = 0; + } else { + ed = (nextop&7); + eb1 = xRAX+(ed&3); + eb2 = ((ed&4)>>2); + } + UBFXw(x1, eb1, eb2*8, 8); + // do the swap 14 -> ed, 1 -> gd + BFIx(gb1, x1, gb2*8, 8); + BFIx(eb1, x4, eb2*8, 8); + } else { + DMB_ISH(); + GETGB(x4); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); + MARKLOCK; + // do the swap with exclusive locking + LDAXRB(x1, ed); + // do the swap 14 -> strb(ed), 1 -> gd + STLXRB(x3, x4, ed); + CBNZx_MARKLOCK(x3); + DMB_ISH(); + BFIx(gb1, x1, gb2*8, 8); + } + break; + case 0x87: + INST_NAME("(LOCK)XCHG Ed, Gd"); + nextop = F8; + if(MODREG) { + GETGD; + GETED(0); + MOVxw_REG(x1, gd); + MOVxw_REG(gd, ed); + MOVxw_REG(ed, x1); + } else { + GETGD; + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); + DMB_ISH(); + TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); + MARKLOCK; + LDAXRxw(x1, ed); + STLXRxw(x3, gd, ed); + CBNZx_MARKLOCK(x3); + B_MARK2_nocond; + MARK; + LDRxw_U12(x1, ed, 0); + STRxw_U12(gd, ed, 0); + MARK2; + DMB_ISH(); + MOVxw_REG(gd, x1); + } + break; + case 0x88: + INST_NAME("MOV Eb, Gb"); + nextop = F8; + gd = ((nextop&0x38)>>3)+(rex.r<<3); + if(rex.rex) { + gb2 = 0; + gb1 = xRAX + gd; + } else { + gb2 = ((gd&4)>>2); + gb1 = xRAX+(gd&3); + } + if(gb2) { + gd = x4; + UBFXw(gd, gb1, gb2*8, 8); + } else { + gd = gb1; // no need to extract + } + if(MODREG) { + ed = (nextop&7) + (rex.b<<3); + if(rex.rex) { + eb1 = xRAX+ed; + eb2 = 0; + } else { + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = ((ed&4)>>2); // L or H + } + BFIx(eb1, gd, eb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); + STRB_U12(gd, ed, fixedaddress); + } + break; + case 0x89: + INST_NAME("MOV Ed, Gd"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg + MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + STRxw_U12(gd, ed, fixedaddress); + if(box64_dynarec_strongmem && + (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp))) { + DMB_ISH(); + } + } + break; + case 0x8A: + INST_NAME("MOV Gb, Eb"); + nextop = F8; + if(rex.rex) { + gb1 = gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); + gb2=0; + } else { + gd = (nextop&0x38)>>3; + gb1 = xRAX+(gd&3); + gb2 = ((gd&4)>>2); + } + if(MODREG) { + if(rex.rex) { + wback = xRAX+(nextop&7)+(rex.b<<3); + wb2 = 0; + } else { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xRAX+(wback&3); + } + if(wb2) { + UBFXw(x4, wback, wb2*8, 8); + ed = x4; + } else { + ed = wback; + } + } else { + if(box64_dynarec_strongmem && + (dyn->insts[ninst].x64.barrier || !ninst || box64_dynarec_strongmem>1 || (ninst && dyn->insts[ninst-1].x64.barrier))) { + DMB_ISH(); + } + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, 0, 0); + LDRB_U12(x4, wback, fixedaddress); + ed = x4; + } + BFIx(gb1, ed, gb2*8, 8); + break; + case 0x8B: + INST_NAME("MOV Gd, Ed"); + nextop=F8; + GETGD; + if(MODREG) { + MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { + if(box64_dynarec_strongmem && + (dyn->insts[ninst].x64.barrier || !ninst || box64_dynarec_strongmem>1 || (ninst && dyn->insts[ninst-1].x64.barrier))) { + DMB_ISH(); + } + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + LDRxw_U12(gd, ed, fixedaddress); + } + break; + case 0x8C: + INST_NAME("MOV Ed, Seg"); + nextop=F8; + if((nextop&0xC0)==0xC0) { // reg <= seg + LDRH_U12(xRAX+(nextop&7)+(rex.b<<3), xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); + } else { // mem <= seg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); + LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); + STRH_U12(x3, ed, fixedaddress); + } + break; + case 0x8D: + INST_NAME("LEA Gd, Ed"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg? that's an invalid operation + DEFAULT; + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, gd, &fixedaddress, 0, 0, rex, 0, 0); + if(gd!=ed) { // it's sometimes used as a 3 bytes NOP + MOVxw_REG(gd, ed); + } + else if(!rex.w) { + MOVw_REG(gd, gd); //truncate the higher 32bits as asked + } + } + break; + case 0x8E: + INST_NAME("MOV Seg,Ew"); + nextop = F8; + if((nextop&0xC0)==0xC0) { + ed = xRAX+(nextop&7)+(rex.b<<3); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 0, rex, 0, 0); + LDRH_U12(x1, ed, fixedaddress); + ed = x1; + } + STRw_U12(ed, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); + STRw_U12(wZR, xEmu, offsetof(x64emu_t, segs_serial[(nextop&0x38)>>3])); + break; + case 0x8F: + INST_NAME("POP Ed"); + nextop = F8; + if((nextop&0xC0)==0xC0) { + POP1(xRAX+(nextop&7)+(rex.b<<3)); + } else { + POP1(x2); // so this can handle POP [ESP] and maybe some variant too + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + if(ed==xRSP) { + STRx_U12(x2, ed, fixedaddress); + } else { + // complicated to just allow a segfault that can be recovered correctly + SUBx_U12(xRSP, xRSP, 8); + STRx_U12(x2, ed, fixedaddress); + ADDx_U12(xRSP, xRSP, 8); + } + } + break; + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + gd = xRAX+(opcode&0x07)+(rex.b<<3); + if(gd==xRAX) { + INST_NAME("NOP"); + } else { + INST_NAME("XCHG EAX, Reg"); + MOVxw_REG(x2, xRAX); + MOVxw_REG(xRAX, gd); + MOVxw_REG(gd, x2); + } + break; + + case 0x98: + INST_NAME("CWDE"); + if(rex.w) { + SXTWx(xRAX, xRAX); + } else { + SXTHw(xRAX, xRAX); + } + break; + case 0x99: + INST_NAME("CDQ"); + SBFXxw(xRDX, xRAX, rex.w?63:31, 1); + break; + + case 0x9B: + INST_NAME("FWAIT"); + break; + case 0x9C: + INST_NAME("PUSHF"); + READFLAGS(X_ALL); + PUSH1(xFlags); + break; + case 0x9D: + INST_NAME("POPF"); + SETFLAGS(X_ALL, SF_SET); + POP1(xFlags); + MOV32w(x1, 0x3F7FD7); + ANDw_REG(xFlags, xFlags, x1); + ORRw_mask(xFlags, xFlags, 0b011111, 0); //mask=0x00000002 + SET_DFNONE(x1); + break; + case 0x9E: + INST_NAME("SAHF"); + SETFLAGS(X_CF|X_PF|X_AF|X_ZF|X_SF, SF_SUBSET); + MOV32w(x2, 0b11010101); + BICw_REG(xFlags, xFlags, x2); + UBFXx(x1, xRAX, 8, 8); + ANDw_REG(x1, x1, x2); + ORRw_REG(xFlags, xFlags, x1); + SET_DFNONE(x1); + break; + case 0x9F: + INST_NAME("LAHF"); + READFLAGS(X_CF|X_PF|X_AF|X_ZF|X_SF); + BFIx(xRAX, xFlags, 8, 8); + break; + case 0xA0: + INST_NAME("MOV AL,Ob"); + u64 = F64; + MOV64x(x1, u64); + LDRB_U12(x2, x1, 0); + BFIx(xRAX, x2, 0, 8); + break; + case 0xA1: + INST_NAME("MOV EAX,Od"); + u64 = F64; + MOV64x(x1, u64); + LDRxw_U12(xRAX, x1, 0); + break; + case 0xA2: + INST_NAME("MOV Ob,AL"); + u64 = F64; + MOV64x(x1, u64); + STRB_U12(xRAX, x1, 0); + break; + case 0xA3: + INST_NAME("MOV Od,EAX"); + u64 = F64; + MOV64x(x1, u64); + STRxw_U12(xRAX, x1, 0); + break; + case 0xA4: + if(rep) { + INST_NAME("REP MOVSB"); + CBZx_NEXT(xRCX); + TBNZ_MARK2(xFlags, F_DF); + MARK; // Part with DF==0 + LDRB_S9_postindex(x1, xRSI, 1); + STRB_S9_postindex(x1, xRDI, 1); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + LDRB_S9_postindex(x1, xRSI, -1); + STRB_S9_postindex(x1, xRDI, -1); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK2(xRCX); + // done + } else { + INST_NAME("MOVSB"); + GETDIR(x3, 1); + LDRB_U12(x1, xRSI, 0); + STRB_U12(x1, xRDI, 0); + ADDx_REG(xRSI, xRSI, x3); + ADDx_REG(xRDI, xRDI, x3); + } + break; + case 0xA5: + if(rep) { + INST_NAME("REP MOVSD"); + CBZx_NEXT(xRCX); + TBNZ_MARK2(xFlags, F_DF); + MARK; // Part with DF==0 + LDRxw_S9_postindex(x1, xRSI, rex.w?8:4); + STRxw_S9_postindex(x1, xRDI, rex.w?8:4); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + LDRxw_S9_postindex(x1, xRSI, rex.w?-8:-4); + STRxw_S9_postindex(x1, xRDI, rex.w?-8:-4); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK2(xRCX); + // done + } else { + INST_NAME("MOVSD"); + GETDIR(x3, rex.w?8:4); + LDRxw_U12(x1, xRSI, 0); + STRxw_U12(x1, xRDI, 0); + ADDx_REG(xRSI, xRSI, x3); + ADDx_REG(xRDI, xRDI, x3); + } + break; + case 0xA6: + switch(rep) { + case 1: + case 2: + if(rep==1) {INST_NAME("REPNZ CMPSB");} else {INST_NAME("REPZ CMPSB");} + SETFLAGS(X_ALL, SF_MAYSET); + CBZx_NEXT(xRCX); + TBNZ_MARK2(xFlags, F_DF); + MARK; // Part with DF==0 + LDRB_S9_postindex(x1, xRSI, 1); + LDRB_S9_postindex(x2, xRDI, 1); + SUBx_U12(xRCX, xRCX, 1); + CMPSw_REG(x1, x2); + B_MARK3((rep==1)?cEQ:cNE); + CBNZx_MARK(xRCX); + B_MARK3_nocond; + MARK2; // Part with DF==1 + LDRB_S9_postindex(x1, xRSI, -1); + LDRB_S9_postindex(x2, xRDI, -1); + SUBx_U12(xRCX, xRCX, 1); + CMPSw_REG(x1, x2); + B_MARK3((rep==1)?cEQ:cNE); + CBNZx_MARK2(xRCX); + MARK3; // end + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + break; + default: + INST_NAME("CMPSB"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETDIR(x3, 1); + LDRB_U12(x1, xRSI, 0); + LDRB_U12(x2, xRDI, 0); + ADDx_REG(xRSI, xRSI, x3); + ADDx_REG(xRDI, xRDI, x3); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + break; + } + break; + + case 0xA8: + INST_NAME("TEST AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + UXTBx(x1, xRAX); + u8 = F8; + MOV32w(x2, u8); + emit_test8(dyn, ninst, x1, x2, x3, x4, x5); + break; + case 0xA9: + INST_NAME("TEST EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + MOV64xw(x2, i64); + emit_test32(dyn, ninst, rex, xRAX, x2, x3, x4); + break; + case 0xAA: + if(rep) { + INST_NAME("REP STOSB"); + CBZx_NEXT(xRCX); + TBNZ_MARK2(xFlags, F_DF); + MARK; // Part with DF==0 + STRB_S9_postindex(xRAX, xRDI, 1); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + STRB_S9_postindex(xRAX, xRDI, -1); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK2(xRCX); + // done + } else { + INST_NAME("STOSB"); + GETDIR(x3, 1); + STRB_U12(xRAX, xRDI, 0); + ADDx_REG(xRDI, xRDI, x3); + } + break; + case 0xAB: + if(rep) { + INST_NAME("REP STOSD"); + CBZx_NEXT(xRCX); + TBNZ_MARK2(xFlags, F_DF); + MARK; // Part with DF==0 + STRxw_S9_postindex(xRAX, xRDI, rex.w?8:4); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + STRxw_S9_postindex(xRAX, xRDI, rex.w?-8:-4); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK2(xRCX); + // done + } else { + INST_NAME("STOSD"); + GETDIR(x3, rex.w?8:4); + STRxw_U12(xRAX, xRDI, 0); + ADDx_REG(xRDI, xRDI, x3); + } + break; + + case 0xAE: + switch(rep) { + case 1: + case 2: + if(rep==1) {INST_NAME("REPNZ SCASB");} else {INST_NAME("REPZ SCASB");} + SETFLAGS(X_ALL, SF_MAYSET); + CBZx_NEXT(xRCX); + UBFXw(x1, xRAX, 0, 8); + TBNZ_MARK2(xFlags, F_DF); + MARK; // Part with DF==0 + LDRB_S9_postindex(x2, xRDI, 1); + SUBx_U12(xRCX, xRCX, 1); + CMPSw_REG(x1, x2); + B_MARK3((rep==1)?cEQ:cNE); + CBNZx_MARK(xRCX); + B_MARK3_nocond; + MARK2; // Part with DF==1 + LDRB_S9_postindex(x2, xRDI, -1); + SUBx_U12(xRCX, xRCX, 1); + CMPSw_REG(x1, x2); + B_MARK3((rep==1)?cEQ:cNE); + CBNZx_MARK2(xRCX); + MARK3; // end + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + break; + default: + INST_NAME("SCASB"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETDIR(x3, 1); + UBFXw(x1, xRAX, 0, 8); + LDRB_U12(x2, xRDI, 0); + ADDx_REG(xRDI, xRDI, x3); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + break; + } + break; + + + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + INST_NAME("MOV xL, Ib"); + u8 = F8; + MOV32w(x1, u8); + if(rex.rex) + gb1 = xRAX+(opcode&7)+(rex.b<<3); + else + gb1 = xRAX+(opcode&3); + BFIx(gb1, x1, 0, 8); + break; + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + INST_NAME("MOV xH, Ib"); + u8 = F8; + MOV32w(x1, u8); + if(rex.rex) { + gb1 = xRAX+(opcode&7)+(rex.b<<3); + BFIx(gb1, x1, 0, 8); + } else { + gb1 = xRAX+(opcode&3); + BFIx(gb1, x1, 8, 8); + } + break; + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + INST_NAME("MOV Reg, Id"); + gd = xRAX+(opcode&7)+(rex.b<<3); + if(rex.w) { + u64 = F64; + MOV64x(gd, u64); + } else { + u32 = F32; + MOV32w(gd, u32); + } + break; + case 0xC0: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Eb, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEB(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rol8, ed, x3); + EBBACK; + break; + case 1: + INST_NAME("ROR Eb, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEB(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(ror8, ed, x3); + EBBACK; + break; + case 2: + INST_NAME("RCL Eb, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEB(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rcl8, ed, x3); + EBBACK; + break; + case 3: + INST_NAME("RCR Eb, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEB(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rcr8, ed, x3); + EBBACK; + break; + case 4: + case 6: + INST_NAME("SHL Eb, Ib"); + GETEB(x1, 1); + u8 = (F8)&0x1f; + if(u8) { + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_IF{ + MOV32w(x4, u8); UFLAG_OP2(x4); + }; + UFLAG_OP1(ed); + LSLw(ed, ed, u8); + EBBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_shl8); + } else { + NOP; + } + break; + case 5: + INST_NAME("SHR Eb, Ib"); + GETEB(x1, 1); + u8 = (F8)&0x1f; + if(u8) { + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_IF{ + MOV32w(x4, u8); UFLAG_OP2(x4); + }; + UFLAG_OP1(ed); + if(u8) { + LSRw(ed, ed, u8); + EBBACK; + } + UFLAG_RES(ed); + UFLAG_DF(x3, d_shr8); + } else { + NOP; + } + break; + case 7: + INST_NAME("SAR Eb, Ib"); + GETSEB(x1, 1); + u8 = (F8)&0x1f; + if(u8) { + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_IF{ + MOV32w(x4, u8); UFLAG_OP2(x4); + }; + UFLAG_OP1(ed); + if(u8) { + ASRw(ed, ed, u8); + EBBACK; + } + UFLAG_RES(ed); + UFLAG_DF(x3, d_sar8); + } else { + NOP; + } + break; + } + break; + case 0xC1: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Ed, Ib"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { WBACK; } + break; + case 1: + INST_NAME("ROR Ed, Ib"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { WBACK; } + break; + case 2: + INST_NAME("RCL Ed, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEDW(x4, x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4); + WBACK; + break; + case 3: + INST_NAME("RCR Ed, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEDW(x4, x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); + WBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ed, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_shl32c(dyn, ninst, rex, ed, u8, x3, x4); + WBACK; + break; + case 5: + INST_NAME("SHR Ed, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_shr32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { + WBACK; + } + break; + case 7: + INST_NAME("SAR Ed, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_sar32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { + WBACK; + } + break; + } + break; + case 0xC2: + INST_NAME("RETN"); + //SETFLAGS(X_ALL, SF_SET); // Hack, set all flags (to an unknown state...) + READFLAGS(X_PEND); // lets play safe here too + BARRIER(2); + i32 = F16; + retn_to_epilog(dyn, ninst, i32); + *need_epilog = 0; + *ok = 0; + break; + case 0xC3: + INST_NAME("RET"); + // SETFLAGS(X_ALL, SF_SET); // Hack, set all flags (to an unknown state...) + READFLAGS(X_PEND); // so instead, force the defered flags, so it's not too slow, and flags are not lost + BARRIER(2); + ret_to_epilog(dyn, ninst); + *need_epilog = 0; + *ok = 0; + break; + + case 0xC6: + INST_NAME("MOV Eb, Ib"); + nextop=F8; + if(MODREG) { // reg <= u8 + u8 = F8; + if(!rex.rex) { + ed = (nextop&7); + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = (ed&4)>>2; // L or H + } else { + eb1 = xRAX+(nextop&7)+(rex.b<<3); + eb2 = 0; + } + MOV32w(x3, u8); + BFIx(eb1, x3, eb2*8, 8); + } else { // mem <= u8 + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 0, rex, 0, 1); + u8 = F8; + MOV32w(x3, u8); + STRB_U12(x3, ed, fixedaddress); + } + break; + case 0xC7: + INST_NAME("MOV Ed, Id"); + nextop=F8; + if(MODREG) { // reg <= i32 + i64 = F32S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(ed, i64); + } else { // mem <= i32 + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 4); + i64 = F32S; + MOV64xw(x3, i64); + STRxw_U12(x3, ed, fixedaddress); + } + break; + + case 0xC9: + INST_NAME("LEAVE"); + MOVx_REG(xRSP, xRBP); + POP1(xRBP); + break; + + case 0xCC: + SETFLAGS(X_ALL, SF_SET); // Hack, set all flags (to an unknown state...) + if(PK(0)=='S' && PK(1)=='C') { + addr+=2; + BARRIER(2); + INST_NAME("Special Box64 instruction"); + if((PK64(0)==0)) + { + addr+=8; + MESSAGE(LOG_DEBUG, "Exit x64 Emu\n"); + //GETIP(ip+1+2); // no use + //STORE_XEMU_REGS(xRIP); // no need, done in epilog + MOV32w(x1, 1); + STRw_U12(x1, xEmu, offsetof(x64emu_t, quit)); + *ok = 0; + *need_epilog = 1; + } else { + MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip))); + x87_forget(dyn, ninst, x3, x4, 0); + sse_purge07cache(dyn, ninst, x3); + tmp = isSimpleWrapper(*(wrapper_t*)(addr)); + if(box64_log<2 && tmp) { + //GETIP(ip+3+8+8); // read the 0xCC + call_n(dyn, ninst, *(void**)(addr+8), tmp); + addr+=8+8; + } else { + GETIP(ip+1); // read the 0xCC + STORE_XEMU_CALL(xRIP); + CALL_S(x64Int3, -1); + LOAD_XEMU_CALL(xRIP); + addr+=8+8; + TABLE64(x3, addr); // expected return address + CMPSx_REG(xRIP, x3); + B_MARK(cNE); + LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); + CBZw_NEXT(w1); + MARK; + LOAD_XEMU_REM(); + jump_to_epilog(dyn, 0, xRIP, ninst); + } + } + } else { + #if 1 + INST_NAME("INT 3"); + // check if TRAP signal is handled + LDRx_U12(x1, xEmu, offsetof(x64emu_t, context)); + MOV32w(x2, offsetof(box64context_t, signals[SIGTRAP])); + LDRx_REG(x3, x1, x2); + CMPSx_U12(x3, 0); + B_NEXT(cNE); + MOV32w(x1, SIGTRAP); + CALL_(raise, -1, 0); + break; + #else + DEFAULT; + #endif + } + break; + + case 0xCF: + INST_NAME("IRET"); + SETFLAGS(X_ALL, SF_SET); // Not a hack, EFLAGS are restored + BARRIER(2); + iret_to_epilog(dyn, ninst, rex.w); + *need_epilog = 0; + *ok = 0; + break; + case 0xD0: + case 0xD2: // TODO: Jump if CL is 0 + nextop = F8; + switch((nextop>>3)&7) { + case 0: + if(opcode==0xD0) { + INST_NAME("ROL Eb, 1"); + MOV32w(x2, 1); + } else { + INST_NAME("ROL Eb, CL"); + ANDSw_mask(x2, xRCX, 0, 0b00100); + } + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEB(x1, 0); + CALL_(rol8, x1, x3); + EBBACK; + break; + case 1: + if(opcode==0xD0) { + INST_NAME("ROR Eb, 1"); + MOV32w(x2, 1); + } else { + INST_NAME("ROR Eb, CL"); + ANDSw_mask(x2, xRCX, 0, 0b00100); + } + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEB(x1, 0); + CALL_(ror8, x1, x3); + EBBACK; + break; + case 2: + if(opcode==0xD0) {INST_NAME("RCL Eb, 1");} else {INST_NAME("RCL Eb, CL");} + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + if(opcode==0xD0) {MOV32w(x2, 1);} else {ANDSw_mask(x2, xRCX, 0, 0b00100);} + GETEB(x1, 0); + CALL_(rcl8, x1, x3); + EBBACK; + break; + case 3: + if(opcode==0xD0) {INST_NAME("RCR Eb, 1");} else {INST_NAME("RCR Eb, CL");} + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + if(opcode==0xD0) {MOV32w(x2, 1);} else {ANDSw_mask(x2, xRCX, 0, 0b00100);} + GETEB(x1, 0); + CALL_(rcr8, x1, x3); + EBBACK; + break; + case 4: + case 6: + if(opcode==0xD0) { + INST_NAME("SHL Eb, 1"); + MOV32w(x2, 1); + } else { + INST_NAME("SHL Eb, CL"); + ANDSw_mask(x2, xRCX, 0, 0b00100); + } + SETFLAGS(X_ALL, SF_PENDING); + GETEB(x1, 0); + UFLAG_OP12(ed, x2) + LSLw_REG(ed, ed, x2); + EBBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_shl8); + break; + case 5: + if(opcode==0xD0) { + INST_NAME("SHR Eb, 1"); + MOV32w(x2, 1); + } else { + INST_NAME("SHR Eb, CL"); + ANDSw_mask(x2, xRCX, 0, 0b00100); + } + SETFLAGS(X_ALL, SF_PENDING); + GETEB(x1, 0); + UFLAG_OP12(ed, x2); + LSRw_REG(ed, ed, x2); + EBBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_shr8); + break; + case 7: + if(opcode==0xD0) { + INST_NAME("SAR Eb, 1"); + MOV32w(x2, 1); + } else { + INST_NAME("SAR Eb, CL"); + ANDSw_mask(x2, xRCX, 0, 0b00100); + } + SETFLAGS(X_ALL, SF_PENDING); + GETSEB(x1, 0); + UFLAG_OP12(ed, x2) + ASRw_REG(ed, ed, x2); + EBBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_sar8); + break; + } + break; + case 0xD1: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Ed, 1"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED(0); + emit_rol32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACK; + break; + case 1: + INST_NAME("ROR Ed, 1"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED(0); + emit_ror32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACK; + break; + case 2: + INST_NAME("RCL Ed, 1"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + MOV32w(x2, 1); + GETEDW(x4, x1, 0); + CALL_(rcl32, ed, x4); + WBACK; + break; + case 3: + INST_NAME("RCR Ed, 1"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + MOV32w(x2, 1); + GETEDW(x4, x1, 0); + CALL_(rcr32, ed, x4); + WBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ed, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED(0); + emit_shl32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACK; + break; + case 5: + INST_NAME("SHR Ed, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED(0); + emit_shr32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACK; + break; + case 7: + INST_NAME("SAR Ed, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED(0); + emit_sar32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACK; + break; + } + break; + case 0xD3: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Ed, CL"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + MOV64xw(x4, (rex.w?64:32)); + SUBx_REG(x3, x4, x3); + GETEDW(x4, x2, 0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + RORxw_REG(ed, ed, x3); + WBACK; + UFLAG_IF { // calculate flags directly + CMPSw_U12(x3, rex.w?63:31); + B_MARK(cNE); + LSRxw(x1, ed, rex.w?63:31); + ADDxw_REG(x1, x1, ed); + BFIw(xFlags, x1, F_OF, 1); + MARK; + BFIw(xFlags, ed, F_CF, 1); + UFLAG_DF(x2, d_none); + } + break; + case 1: + INST_NAME("ROR Ed, CL"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETEDW(x4, x2, 0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + RORxw_REG(ed, ed, x3); + WBACK; + UFLAG_IF { // calculate flags directly + CMPSw_U12(x3, 1); + B_MARK(cNE); + LSRxw(x2, ed, rex.w?62:30); // x2 = d>>30 + EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1)) + BFIw(xFlags, x2, F_OF, 1); + MARK; + LSRxw(x2, ed, rex.w?63:31); + BFIw(xFlags, x2, F_CF, 1); + UFLAG_DF(x2, d_none); + } + break; + case 2: + INST_NAME("RCL Ed, CL"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + if(rex.w) { + ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETEDW(x4, x1, 0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4); + WBACK; + break; + case 3: + INST_NAME("RCR Ed, CL"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + if(rex.w) { + ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETEDW(x4, x1, 0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); + WBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ed, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETED(0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + emit_shl32(dyn, ninst, rex, ed, x3, x5, x4); + WBACK; + break; + case 5: + INST_NAME("SHR Ed, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETED(0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); + WBACK; + break; + case 7: + INST_NAME("SAR Ed, CL"); + SETFLAGS(X_ALL, SF_PENDING); + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETED(0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + UFLAG_OP12(ed, x3); + ASRxw_REG(ed, ed, x3); + WBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, rex.w?d_sar64:d_sar32); + break; + } + break; + + case 0xD8: + addr = dynarec64_D8(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + case 0xD9: + addr = dynarec64_D9(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + + case 0xDB: + addr = dynarec64_DB(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + case 0xDC: + addr = dynarec64_DC(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + case 0xDD: + addr = dynarec64_DD(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + + case 0xDF: + addr = dynarec64_DF(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + #define GO(Z) \ + BARRIER(2); \ + JUMP(addr+i8); \ + if(dyn->insts[ninst].x64.jmp_insts==-1) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst+1].address-(dyn->native_size); \ + if(Z) {CBNZx(xRCX, i32);} else {CBZx(xRCX, i32);}; \ + jump_to_next(dyn, addr+i8, 0, ninst); \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ + if(Z) {CBZx(xRCX, i32);} else {CBNZx(xRCX, i32);}; \ + } + case 0xE0: + INST_NAME("LOOPNZ"); + READFLAGS(X_ZF); + i8 = F8S; + SUBx_U12(xRCX, xRCX, 1); + TBNZ_NEXT(xFlags, 1<insts[ninst].natcall, &dyn->insts[ninst].retn)) + tmp = dyn->insts[ninst].pass2choice = 3; + else + tmp = dyn->insts[ninst].pass2choice = 0; + #else + tmp = dyn->insts[ninst].pass2choice; + #endif + switch(tmp) { + case 3: + SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state + BARRIER(1); + BARRIER_NEXT(1); + TABLE64(x2, addr); + PUSH1(x2); + MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn); + // calling a native function + sse_purge07cache(dyn, ninst, x3); + if(box64_log<2 && dyn->insts[ninst].natcall && (tmp=isSimpleWrapper(*(wrapper_t*)(dyn->insts[ninst].natcall+2)))) { + //GETIP(ip+3+8+8); // read the 0xCC + call_n(dyn, ninst, *(void**)(dyn->insts[ninst].natcall+2+8), tmp); + POP1(xRIP); // pop the return address + } else { + GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already + STORE_XEMU_CALL(xRIP); + CALL_S(x64Int3, -1); + LOAD_XEMU_CALL(xRIP); + TABLE64(x3, dyn->insts[ninst].natcall); + ADDx_U12(x3, x3, 2+8+8); + CMPSx_REG(xRIP, x3); + B_MARK(cNE); // Not the expected address, exit dynarec block + POP1(xRIP); // pop the return address + if(dyn->insts[ninst].retn) { + ADDx_U12(xRSP, xRSP, dyn->insts[ninst].retn); + } + TABLE64(x3, addr); + CMPSx_REG(xRIP, x3); + B_MARK(cNE); // Not the expected address again + LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); + CBZw_NEXT(w1); // not quitting, so lets continue + MARK; + LOAD_XEMU_REM(); // load remaining register, has they have changed + jump_to_epilog(dyn, 0, xRIP, ninst); + } + break; + default: + if(ninst && dyn->insts[ninst-1].x64.set_flags) { + READFLAGS(X_PEND); // that's suspicious + } else { + SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state + } + // regular call + BARRIER(1); + BARRIER_NEXT(1); + *need_epilog = 0; + *ok = 0; + TABLE64(x2, addr); + PUSH1(x2); + if(addr+i32==0) { // self modifying code maybe? so use indirect address fetching + TABLE64(x4, addr-4); + LDRx_U12(x4, x4, 0); + jump_to_next(dyn, 0, x4, ninst); + } else + jump_to_next(dyn, addr+i32, 0, ninst); + break; + } + break; + case 0xE9: + case 0xEB: + BARRIER(1); + if(opcode==0xE9) { + INST_NAME("JMP Id"); + i32 = F32S; + } else { + INST_NAME("JMP Ib"); + i32 = F8S; + } + JUMP(addr+i32); + PASS2IF(dyn->insts[ninst].x64.jmp_insts==-1, 1) { + // out of the block + jump_to_next(dyn, addr+i32, 0, ninst); + } else { + // inside the block + tmp = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); + if(tmp==4) { + NOP; + } else { + B(tmp); + } + } + *need_epilog = 0; + *ok = 0; + break; + + case 0xF0: + addr = dynarec64_F0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + + case 0xF5: + INST_NAME("CMC"); + READFLAGS(X_CF); + SETFLAGS(X_CF, SF_SUBSET); + EORw_mask(xFlags, xFlags, 0, 0); //mask=0x00000001 + break; + case 0xF6: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + case 1: + INST_NAME("TEST Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + MOV32w(x2, u8); + emit_test8(dyn, ninst, x1, x2, x3, x4, x5); + break; + case 2: + INST_NAME("NOT Eb"); + GETEB(x1, 0); + MVNw_REG(x1, x1); + EBBACK; + break; + case 3: + INST_NAME("NEG Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 0); + emit_neg8(dyn, ninst, x1, x2, x4); + EBBACK; + break; + case 4: + INST_NAME("MUL AL, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x1, d_mul8); + GETEB(x1, 0); + UXTBw(x2, xRAX); + MULw(x1, x2, x1); + UFLAG_RES(x1); + BFIx(xRAX, x1, 0, 16); + break; + case 5: + INST_NAME("IMUL AL, Eb"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x1, d_imul8); + GETSEB(x1, 0); + SXTBw(x2, xRAX); + MULw(x1, x2, x1); + UFLAG_RES(x1); + BFIx(xRAX, x1, 0, 16); + break; + case 6: + INST_NAME("DIV Eb"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 0); + CALL(div8, -1); + break; + case 7: + INST_NAME("IDIV Eb"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 0); + CALL(idiv8, -1); + break; + } + break; + case 0xF7: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + case 1: + INST_NAME("TEST Ed, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDH(x1, 4); + i64 = F32S; + MOV64xw(x2, i64); + emit_test32(dyn, ninst, rex, ed, x2, x3, x4); + break; + case 2: + INST_NAME("NOT Ed"); + GETED(4); + MVNxw_REG(ed, ed); + WBACK; + break; + case 3: + INST_NAME("NEG Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED(0); + emit_neg32(dyn, ninst, rex, ed, x3, x4); + WBACK; + break; + case 4: + INST_NAME("MUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x2, rex.w?d_mul64:d_mul32); + GETED(0); + if(rex.w) { + if(ed==xRDX) gd=x3; else gd=xRDX; + UMULH(gd, xRAX, ed); + MULx(xRAX, xRAX, ed); + if(gd!=xRDX) {MOVx_REG(xRDX, gd);} + } else { + UMULL(xRDX, xRAX, ed); //64 <- 32x32 + MOVw_REG(xRAX, xRDX); + LSRx(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + break; + case 5: + INST_NAME("IMUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x2, rex.w?d_imul64:d_imul32); + GETED(0); + if(rex.w) { + if(ed==xRDX) gd=x3; else gd=xRDX; + SMULH(gd, xRAX, ed); + MULx(xRAX, xRAX, ed); + if(gd!=xRDX) {MOVx_REG(xRDX, gd);} + } else { + SMULL(xRDX, xRAX, ed); //64 <- 32x32 + MOVw_REG(xRAX, xRDX); + LSRx(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + break; + case 6: + INST_NAME("DIV Ed"); + SETFLAGS(X_ALL, SF_SET); + if(!rex.w) { + SET_DFNONE(x2); + GETED(0); + MOVw_REG(x3, xRAX); + ORRx_REG_LSL(x3, x3, xRDX, 32); + if(MODREG) { + MOVw_REG(x4, ed); + ed = x4; + } + UDIVx(x2, x3, ed); + MSUBx(x4, x2, ed, xRAX); + MOVw_REG(xRAX, x2); + MOVw_REG(xRDX, x4); + } else { + if(ninst + && dyn->insts[ninst-1].x64.addr + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) { + SET_DFNONE(x2); + GETED(0); + UDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + } else { + GETEDH(x1, 0); // get edd changed addr, so cannot be called 2 times for same op... + CBZxw_MARK(xRDX); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(div64, -1); + B_NEXT_nocond; + MARK; + UDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + SET_DFNONE(x2); + } + } + break; + case 7: + INST_NAME("IDIV Ed"); + SETFLAGS(X_ALL, SF_SET); + if(!rex.w) { + SET_DFNONE(x2) + GETSEDw(0); + MOVw_REG(x3, xRAX); + ORRx_REG_LSL(x3, x3, xRDX, 32); + SDIVx(x2, x3, wb); + MSUBx(x4, x2, wb, x3); + MOVw_REG(xRAX, x2); + MOVw_REG(xRDX, x4); + } else { + if(ninst && dyn->insts + && dyn->insts[ninst-1].x64.addr + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48 + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) { + SET_DFNONE(x2) + GETED(0); + SDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + } else { + GETEDH(x1, 0); // get edd changed addr, so cannot be called 2 times for same op... + //Need to see if RDX==0 and RAX not signed + // or RDX==-1 and RAX signed + CBNZx_MARK2(xRDX); + TBZ_MARK(xRAX, 31); + MARK2; + MVNx_REG(x2, xRDX); + CBNZx_MARK3(x2); + TBNZ_MARK(xRAX, 31); + MARK3; + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL((void*)idiv64, -1); + B_NEXT_nocond; + MARK; + SDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + SET_DFNONE(x2) + } + } + break; + } + break; + + case 0xFC: + INST_NAME("CLD"); + BFCw(xFlags, F_DF, 1); + break; + case 0xFD: + INST_NAME("STD"); + MOV32w(x1, 1); + BFIw(xFlags, x1, F_DF, 1); + break; + case 0xFE: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + INST_NAME("INC Eb"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + GETEB(x1, 0); + emit_inc8(dyn, ninst, x1, x2, x4); + EBBACK; + break; + case 1: + INST_NAME("DEC Eb"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + GETEB(x1, 0); + emit_dec8(dyn, ninst, x1, x2, x4); + EBBACK; + break; + default: + DEFAULT; + } + break; + case 0xFF: + nextop = F8; + switch((nextop>>3)&7) { + case 0: // INC Ed + INST_NAME("INC Ed"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + GETED(0); + emit_inc32(dyn, ninst, rex, ed, x3, x4); + WBACK; + break; + case 1: //DEC Ed + INST_NAME("DEC Ed"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + GETED(0); + emit_dec32(dyn, ninst, rex, ed, x3, x4); + WBACK; + break; + case 2: // CALL Ed + INST_NAME("CALL Ed"); + PASS2IF(((ninst && dyn->insts[ninst-1].x64.set_flags) + || ((ninst>1) && dyn->insts[ninst-2].x64.set_flags)), 1) + { + READFLAGS(X_PEND); // that's suspicious + } else { + SETFLAGS(X_ALL, SF_SET); //Hack to put flag in "don't care" state + } + GETEDx(0); + BARRIER(1); + BARRIER_NEXT(1); + if(!dyn->insts || ninst==dyn->size-1) { + *need_epilog = 0; + *ok = 0; + } + GETIP(addr); + PUSH1(xRIP); + jump_to_next(dyn, 0, ed, ninst); + break; + case 4: // JMP Ed + INST_NAME("JMP Ed"); + BARRIER(1); + GETEDx(0); + jump_to_next(dyn, 0, ed, ninst); + *need_epilog = 0; + *ok = 0; + break; + case 6: // Push Ed + INST_NAME("PUSH Ed"); + GETEDx(0); + PUSH1(ed); + break; + + default: + DEFAULT; + } + break; + + default: + DEFAULT; + } + + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c new file mode 100755 index 00000000..b726db9a --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -0,0 +1,1911 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "my_cpuid.h" +#include "emu/x87emu_private.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +#define GETG \ + gd = ((nextop&0x38)>>3)+(rex.r<<3) \ + +#define GETGX(a) \ + gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg(dyn, ninst, x1, gd) + +#define GETGX_empty(a) \ + gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg_empty(dyn, ninst, x1, gd) + +#define GETEX(a, D) \ + if(MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, D); \ + a = fpu_get_scratch(dyn); \ + VLDR128_U12(a, ed, fixedaddress); \ + } + +#define GETGM(a) \ + gd = ((nextop&0x38)>>3); \ + a = mmx_get_reg(dyn, ninst, x1, gd) + +#define GETEM(a, D) \ + if(MODREG) { \ + a = mmx_get_reg(dyn, ninst, x1, (nextop&7));\ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \ + a = fpu_get_scratch(dyn); \ + VLDR64_U12(a, ed, fixedaddress); \ + } + +#define PUTEM(a) \ + if(!MODREG) { \ + VSTR64_U12(a, ed, fixedaddress); \ + } + +uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop, u8; + uint8_t gd, ed; + uint8_t wback, wb2; + uint8_t eb1, eb2; + int32_t i32, i32_; + int v0, v1; + int q0, q1; + int d0, d1; + int s0; + uint64_t tmp64u; + int64_t j64; + int64_t fixedaddress; + MAYUSE(wb2); + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(q0); + MAYUSE(q1); + MAYUSE(d0); + MAYUSE(d1); + MAYUSE(s0); + MAYUSE(j64); + #if STEP > 1 + static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 }; + #endif + + switch(opcode) { + + case 0x01: + INST_NAME("FAKE xgetbv"); + nextop = F8; + addr = fakeed(dyn, addr, ninst, nextop); + SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state + GETIP(ip); + STORE_XEMU_CALL(xRIP); + CALL(arm_ud, -1); + break; + + case 0x05: + INST_NAME("SYSCALL"); + GETIP(addr); + STORE_XEMU_CALL(xRIP); + CALL_S(x64Syscall, -1); + LOAD_XEMU_CALL(xRIP); + TABLE64(x3, addr); // expected return address + CMPSx_REG(xRIP, x3); + B_MARK(cNE); + LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); + CBZw_NEXT(w1); + MARK; + LOAD_XEMU_REM(); + jump_to_epilog(dyn, 0, xRIP, ninst); + break; + + case 0x09: + INST_NAME("WBINVD"); + break; + + case 0x0B: + INST_NAME("UD2"); + SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state + GETIP(ip); + STORE_XEMU_CALL(xRIP); + CALL(arm_ud, -1); + break; + + case 0x0D: + nextop = F8; + switch((nextop>>3)&7) { + case 1: + INST_NAME("PREFETCHW"); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); + PST_L1_STREAM_U12(ed, fixedaddress); + break; + default: //??? + DEFAULT; + } + break; + + case 0x10: + INST_NAME("MOVUPS Gx,Ex"); + nextop = F8; + GETG; + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg(dyn, ninst, x1, ed); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VMOVQ(v0, v1); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VLDR128_U12(v0, ed, fixedaddress); // no alignment issue with ARMv8 NEON :) + } + break; + case 0x11: + INST_NAME("MOVUPS Ex,Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VMOVQ(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + } + break; + case 0x12: + nextop = F8; + if(MODREG) { + INST_NAME("MOVHLPS Gx,Ex"); + GETGX(v0); + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v0, 0, v1, 1); + } else { + INST_NAME("MOVLPS Gx,Ex"); + GETGX(v0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VLD1_64(v0, 0, ed); + } + break; + case 0x13: + nextop = F8; + INST_NAME("MOVLPS Ex,Gx"); + GETGX(v0); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v1, 0, v0, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VST1_64(v0, 0, ed); // better to use VST1 than VSTR_64, to avoid NEON->VFPU transfert I assume + } + break; + case 0x14: + INST_NAME("UNPCKLPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VZIP1Q_32(v0, v0, q0); + break; + case 0x15: + INST_NAME("UNPCKHPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VZIP2Q_32(v0, v0, q0); + break; + case 0x16: + nextop = F8; + if(MODREG) { + INST_NAME("MOVLHPS Gx,Ex"); + GETGX(v0); + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v0, 1, v1, 0); + } else { + INST_NAME("MOVHPS Gx,Ex"); + GETGX(v0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VLD1_64(v0, 1, ed); + } + break; + case 0x17: + nextop = F8; + INST_NAME("MOVHPS Ex,Gx"); + GETGX(v0); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v1, 0, v0, 1); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VST1_64(v0, 1, ed); + } + break; + case 0x18: + nextop = F8; + if((nextop&0xC0)==0xC0) { + INST_NAME("NOP (multibyte)"); + } else + switch((nextop>>3)&7) { + case 0: + INST_NAME("PREFETCHh Ed"); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); + PLD_L1_STREAM_U12(ed, fixedaddress); + break; + case 1: + INST_NAME("PREFETCHh Ed"); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); + PLD_L1_KEEP_U12(ed, fixedaddress); + break; + case 2: + INST_NAME("PREFETCHh Ed"); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); + PLD_L2_KEEP_U12(ed, fixedaddress); + break; + case 3: + INST_NAME("PREFETCHh Ed"); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); + PLD_L3_KEEP_U12(ed, fixedaddress); + break; + default: + INST_NAME("NOP (multibyte)"); + FAKEED; + } + break; + + case 0x1F: + INST_NAME("NOP (multibyte)"); + nextop = F8; + FAKEED; + break; + + case 0x28: + INST_NAME("MOVAPS Gx,Ex"); + nextop = F8; + GETG; + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg(dyn, ninst, x1, ed); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VMOVQ(v0, v1); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VLDR128_U12(v0, ed, fixedaddress); + } + break; + case 0x29: + INST_NAME("MOVAPS Ex,Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VMOVQ(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + } + break; + + case 0x2B: + INST_NAME("MOVNTPS Ex,Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VMOVQ(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + } + break; + + case 0x2E: + // no special check... + case 0x2F: + if(opcode==0x2F) {INST_NAME("COMISS Gx, Ex");} else {INST_NAME("UCOMISS Gx, Ex");} + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGX(v0); + if(MODREG) { + s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + } else { + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + } + FCMPS(v0, s0); + FCOMI(x1, x2); + break; + + case 0x31: + INST_NAME("RDTSC"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + CALL(ReadTSC, xRAX); // will return the u64 in xEAX + LSRx(xRDX, xRAX, 32); + MOVw_REG(xRAX, xRAX); // wipe upper part + break; + + case 0x38: + //SSE3 + nextop=F8; + switch(nextop) { + case 0x00: + INST_NAME("PSHUFB Gm, Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 0); + d0 = fpu_get_scratch(dyn); + MOVI_8(d0, 0b10001111); + VAND(d0, d0, q1); // mask the index + VTBL1_8(q0, q0, d0); + break; + + case 0x04: + INST_NAME("PMADDUBSW Gm,Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + UXTL_8(v0, q0); // this is unsigned, so 0 extended + SXTL_8(v1, q1); // this is signed + VMULQ_16(v0, v0, v1); + SADDLPQ_16(v1, v0); + SQXTN_16(q0, v1); + break; + + case 0x0B: + INST_NAME("PMULHRSW Gm,Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 0); + SQRDMULH_16(q0, q0, q1); + break; + + default: + DEFAULT; + } + break; + + case 0x3A: // these are some more SSSE3 opcodes + opcode = F8; + switch(opcode) { + case 0x0F: + INST_NAME("PALIGNR Gm, Em, Ib"); + nextop = F8; + GETGM(q0); + GETEM(q1, 1); + u8 = F8; + if(u8>15) { + VEOR(q0, q0, q0); + } else if(u8>7) { + d0 = fpu_get_scratch(dyn); + VEOR(d0, d0, d0); + VEXT_8(q0, q0, d0, u8-8); + } else { + VEXT_8(q0, q1, q0, u8); + } + break; + default: + DEFAULT; + } + break; + + #define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + GETFLAGS; \ + nextop=F8; \ + GETGD; \ + if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + CSELxw(gd, ed, gd, YES); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); \ + Bcond(NO, +8); \ + LDRxw_U12(gd, ed, fixedaddress); \ + if(!rex.w) {MOVw_REG(gd, gd);} \ + } + + GOCOND(0x40, "CMOV", "Gd, Ed"); + #undef GO + case 0x50: + INST_NAME("MOVMSPKPS Gd, Ex"); + nextop = F8; + GETGD; + MOV32w(gd, 0); + if((nextop&0xC0)==0xC0) { + // EX is an xmm reg + GETEX(q0, 0); + VMOVQDto(x1, q0, 0); + LSRx(x1, x1, 31); + BFIx(gd, x1, 0, 1); + LSRx(x1, x1, 32); + BFIx(gd, x1, 1, 1); + VMOVQDto(x1, q0, 1); + LSRx(x1, x1, 31); + BFIx(gd, x1, 2, 1); + LSRx(x1, x1, 32); + BFIx(gd, x1, 3, 1); + } else { + // EX is memory + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, (0xfff<<3)-8, 7, rex, 0, 0); + LDRx_U12(x1, ed, fixedaddress+0); + LSRx(x1, x1, 31); + BFIx(gd, x1, 0, 1); + LSRx(x1, x1, 32); + BFIx(gd, x1, 1, 1); + LDRx_U12(x1, ed, fixedaddress+8); + LSRx(x1, x1, 31); + BFIx(gd, x1, 2, 1); + LSRx(x1, x1, 32); + BFIx(gd, x1, 3, 1); + } + break; + case 0x51: + INST_NAME("SQRTPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX_empty(v0); + VFSQRTQS(v0, q0); + break; + case 0x52: + INST_NAME("RSQRTPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX_empty(q1); + v0 = fpu_get_scratch(dyn); + // more precise + if(q1==q0) + v1 = fpu_get_scratch(dyn); + else + v1 = q1; + VFRSQRTEQS(v0, q0); + VFMULQS(v1, v0, q0); + VFRSQRTSQS(v1, v1, v0); + VFMULQS(q1, v1, v0); + break; + case 0x53: + INST_NAME("RCPPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX_empty(q1); + if(q0 == q1) + v1 = fpu_get_scratch(dyn); + else + v1 = q1; + v0 = fpu_get_scratch(dyn); + VFRECPEQS(v0, q0); + VFRECPSQS(v1, v0, q0); + VFMULQS(q1, v0, v1); + break; + case 0x54: + INST_NAME("ANDPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VANDQ(v0, v0, q0); + break; + case 0x55: + INST_NAME("ANDNPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VBICQ(v0, q0, v0); + break; + case 0x56: + INST_NAME("ORPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VORRQ(v0, v0, q0); + break; + case 0x57: + INST_NAME("XORPS Gx, Ex"); + nextop = F8; + GETG; + if(MODREG && ((nextop&7)+(rex.b<<3)==gd)) { + // special case for XORPS Gx, Gx + q0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VEORQ(q0, q0, q0); + } else { + q0 = sse_get_reg(dyn, ninst, x1, gd); + GETEX(q1, 0); + VEORQ(q0, q0, q1); + } + break; + case 0x58: + INST_NAME("ADDPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFADDQS(v0, v0, q0); + break; + case 0x59: + INST_NAME("MULPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFMULQS(v0, v0, q0); + break; + case 0x5A: + INST_NAME("CVTPS2PD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(q1); + FCVTL(q1, q0); + break; + case 0x5B: + INST_NAME("CVTDQ2PS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX_empty(q1); + SCVTQFS(q1, q0); + break; + case 0x5C: + INST_NAME("SUBPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFSUBQS(v0, v0, q0); + break; + case 0x5D: + INST_NAME("MINPS Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + VFMINQS(v0, v0, v1); + break; + case 0x5E: + INST_NAME("DIVPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFDIVQS(v0, v0, q0); + break; + case 0x5F: + INST_NAME("MAXPS Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + VFMAXQS(v0, v0, v1); + break; + case 0x60: + INST_NAME("PUNPCKLBW Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + VZIP1_8(d0, d0, d1); + break; + case 0x61: + INST_NAME("PUNPCKLWD Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + VZIP1_16(d0, d0, d1); + break; + case 0x62: + INST_NAME("PUNPCKLDQ Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + VZIP1_32(d0, d0, d1); + break; + case 0x63: + INST_NAME("PACKSSWB Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + q0 = fpu_get_scratch(dyn); + VMOVeD(q0, 0, d0, 0); + VMOVeD(q0, 1, d1, 0); + SQXTN_8(d0, q0); + break; + case 0x64: + INST_NAME("PCMPGTB Gx,Ex"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VCMGT_8(v0, v0, v1); + break; + case 0x65: + INST_NAME("PCMPGTW Gx,Ex"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VCMGT_16(v0, v0, v1); + break; + case 0x66: + INST_NAME("PCMPGTD Gx,Ex"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VCMGT_32(v0, v0, v1); + break; + case 0x67: + INST_NAME("PACKUSWB Gm, Em"); + nextop = F8; + GETGM(v0); + q0 = fpu_get_scratch(dyn); + VMOVeD(q0, 0, v0, 0); + if(MODREG) { + v1 = mmx_get_reg(dyn, ninst, x1, (nextop&7)); + VMOVeD(q0, 1, v1, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VLD1_64(q0, 1, ed); + } + SQXTUN_8(v0, q0); + break; + case 0x68: + INST_NAME("PUNPCKHBW Gm,Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 1); + VZIP2_8(q0, q0, q1); + break; + case 0x69: + INST_NAME("PUNPCKHWD Gm,Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 1); + VZIP2_16(q0, q0, q1); + break; + case 0x6A: + INST_NAME("PUNPCKHDQ Gm,Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 1); + VZIP2_32(q0, q0, q1); + break; + case 0x6B: + INST_NAME("PACKSSDW Gm,Em"); + nextop = F8; + GETGM(v0); + if(MODREG) { + GETEM(v1, 0); + q0 = fpu_get_scratch(dyn); + VMOVeD(q0, 1, v1, 0); + } else { + q0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VLD1_64(q0, 1, ed); + } + VMOVeD(q0, 0, v0, 0); + SQXTN_16(v0, q0); + break; + + case 0x6E: + INST_NAME("MOVD Gm, Ed"); + nextop = F8; + gd = (nextop&0x38)>>3; + v0 = mmx_get_reg_empty(dyn, ninst, x3, gd); + if(MODREG) { + ed = xRAX + (nextop&7) + (rex.b<<3); + if(rex.w) { + FMOVDx(v0, ed); + } else { + FMOVSw(v0, ed); + } + } else { + v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + if(rex.w) { + VLDR64_U12(v0, ed, fixedaddress); + } else { + VLDR32_U12(v0, ed, fixedaddress); + } + } + break; + case 0x6F: + INST_NAME("MOVQ Gm, Em"); + nextop = F8; + GETG; + if(MODREG) { + v1 = mmx_get_reg(dyn, ninst, x1, nextop&7); // no rex.b on MMX + v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); + VMOVeD(v0, 0, v1, 0); + } else { + v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VLDR64_U12(v0, ed, fixedaddress); + } + break; + case 0x70: + INST_NAME("PSHUFW Gm,Em,Ib"); + nextop = F8; + gd = (nextop&0x38)>>3; + if(MODREG) { + u8 = F8; + v1 = mmx_get_reg(dyn, ninst, x1, (nextop&7)); + v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); + if(u8==0x4E) { + if(v0==v1) { + VEXT_8(v0, v0, v0, 4); // Swap Up/Lower 32bits parts + } else { + VMOVeS(v0, 0, v1, 1); + VMOVeS(v0, 1, v1, 0); + } + } else if(u8==0x00) { + // dumplicate lower 16bits to all spot + if(v0!=v1) { + VMOVeH(v0, 0, v1, 0); + } + VMOVeH(v0, 1, v1, 0); + VMOVeS(v0, 1, v1, 0); + } else if(u8==0x55) { + // dumplicate 16bits slot 1 to all spot + if(v0!=v1) { + VMOVeH(v0, 1, v1, 1); + } + VMOVeH(v0, 0, v1, 1); + VMOVeS(v0, 1, v1, 0); + } else if(u8==0xAA) { + // dumplicate 16bits slot 2 to all spot + if(v0!=v1) { + VMOVeH(v0, 2, v1, 2); + } + VMOVeH(v0, 3, v1, 2); + VMOVeS(v0, 0, v1, 1); + } else if(u8==0xFF) { + // dumplicate 16bits slot 3 to all spot + if(v0!=v1) { + VMOVeH(v0, 3, v1, 3); + } + VMOVeH(v0, 2, v1, 3); + VMOVeS(v0, 0, v1, 1); + } else if(v0!=v1) { + VMOVeH(v0, 0, v1, (u8>>(0*2))&3); + VMOVeH(v0, 1, v1, (u8>>(1*2))&3); + VMOVeH(v0, 2, v1, (u8>>(2*2))&3); + VMOVeH(v0, 3, v1, (u8>>(3*2))&3); + } else { + uint64_t swp[4] = { + (0)|(1<<8), + (2)|(3<<8), + (4)|(5<<8), + (6)|(7<<8) + }; + d0 = fpu_get_scratch(dyn); + tmp64u = swp[(u8>>(0*2))&3] | (swp[(u8>>(1*2))&3]<<16); + tmp64u |= (swp[(u8>>(2*2))&3]<<32) | (swp[(u8>>(3*2))&3]<<48); + MOV64x(x2, tmp64u); + VMOVQDfrom(d0, 0, x2); + VTBL1_8(v0, v1, d0); + } + } else { + v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + if (u8) { + i32 = -1; + for (int i=0; i<4; ++i) { + int32_t idx = (u8>>(i*2))&3; + if(idx!=i32) { + ADDx_U12(x2, ed, idx*2); + i32 = idx; + } + VLD1_16(v0, i, x2); + } + } else { + VLD1R_16(v0, ed); + } + } + break; + case 0x71: + nextop = F8; + switch((nextop>>3)&7) { + case 2: + INST_NAME("PSRLW Em, Ib"); + GETEM(q0, 1); + u8 = F8; + if(u8) { + if (u8>15) { + VEOR(q0, q0, q0); + } else if(u8) { + VSHR_16(q0, q0, u8); + } + if(!MODREG) { + VSTR64_U12(q0, ed, fixedaddress); + } + } + break; + case 4: + INST_NAME("PSRAW Ex, Ib"); + GETEM(q0, 1); + u8 = F8; + if(u8>15) u8=15; + if(u8) { + VSSHR_16(q0, q0, u8); + } + if(!MODREG) { + VSTR64_U12(q0, ed, fixedaddress); + } + break; + case 6: + INST_NAME("PSLLW Ex, Ib"); + GETEM(q0, 1); + u8 = F8; + if(u8) { + if (u8>15) { + VEOR(q0, q0, q0); + } else { + VSHL_16(q0, q0, u8); + } + if(!MODREG) { + VSTR64_U12(q0, ed, fixedaddress); + } + } + break; + default: + *ok = 0; + DEFAULT; + } + break; + case 0x72: + nextop = F8; + switch((nextop>>3)&7) { + case 2: + INST_NAME("PSRLD Em, Ib"); + GETEM(d0, 1); + u8 = F8; + if(u8) { + if (u8>31) { + VEOR(d0, d0, d0); + } else if(u8) { + VSHR_32(d0, d0, u8); + } + if(!MODREG) { + VSTR64_U12(d0, ed, fixedaddress); + } + } + break; + case 4: + INST_NAME("PSRAD Em, Ib"); + GETEM(d0, 1); + u8 = F8; + if(u8>31) u8=31; + if(u8) { + VSSHR_32(d0, d0, u8); + } + if(!MODREG) { + VSTR64_U12(d0, ed, fixedaddress); + } + break; + case 6: + INST_NAME("PSLLD Em, Ib"); + GETEM(d0, 1); + u8 = F8; + if(u8) { + if (u8>31) { + VEOR(d0, d0, d0); + } else { + VSHL_32(d0, d0, u8); + } + if(!MODREG) { + VSTR64_U12(d0, ed, fixedaddress); + } + } + break; + default: + DEFAULT; + } + break; + case 0x73: + nextop = F8; + switch((nextop>>3)&7) { + case 2: + INST_NAME("PSRLQ Em, Ib"); + GETEM(q0, 1); + u8 = F8; + if(u8) { + if (u8>63) { + VEOR(q0, q0, q0); + } else if(u8) { + USHR_64(q0, q0, u8); + } + PUTEM(q0); + } + break; + case 6: + INST_NAME("PSLLQ Em, Ib"); + GETEM(q0, 1); + u8 = F8; + if(u8) { + if (u8>63) { + VEOR(q0, q0, q0); + } else { + SHL_64(q0, q0, u8); + } + PUTEM(q0); + } + break; + default: + DEFAULT; + } + break; + case 0x74: + INST_NAME("PCMPEQB Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + VCMEQ_8(d0, d0, d1); + break; + case 0x75: + INST_NAME("PCMPEQW Gm,Em"); + nextop = F8; + GETGM(v0); + GETEM(q0, 0); + VCMEQ_16(v0, v0, q0); + break; + case 0x76: + INST_NAME("PCMPEQD Gm,Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VCMEQ_32(v0, v0, v1); + break; + case 0x77: + INST_NAME("EMMS"); + // empty MMX, FPU now usable + mmx_purgecache(dyn, ninst, x1); + /*emu->top = 0; + emu->fpu_stack = 0;*/ //TODO: Check if something is needed here? + break; + + case 0x7E: + INST_NAME("MOVD Ed, Gm"); + nextop = F8; + GETGM(v0); + if((nextop&0xC0)==0xC0) { + ed = xRAX + (nextop&7) + (rex.b<<3); + if(rex.w) { + VMOVQDto(ed, v0, 0); + } else { + VMOVSto(ed, v0, 0); + MOVxw_REG(ed, ed); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + if(rex.w) { + VSTR64_U12(v0, ed, fixedaddress); + } else { + VSTR32_U12(v0, ed, fixedaddress); + } + } + break; + case 0x7F: + INST_NAME("MOVQ Em, Gm"); + nextop = F8; + GETGM(v0); + if(MODREG) { + v1 = mmx_get_reg_empty(dyn, ninst, x1, nextop&7); + VMOV(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VSTR64_U12(v0, ed, fixedaddress); + } + break; + + #define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + i32_ = F32S; \ + BARRIER(2); \ + JUMP(addr+i32_);\ + GETFLAGS; \ + if(dyn->insts[ninst].x64.jmp_insts==-1) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst+1].address-(dyn->native_size); \ + Bcond(NO, i32); \ + jump_to_next(dyn, addr+i32_, 0, ninst); \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ + Bcond(YES, i32); \ + } \ + + GOCOND(0x80, "J", "Id"); + #undef GO + + #define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + GETFLAGS; \ + nextop=F8; \ + CSETw(x3, YES); \ + if(MODREG) { \ + if(rex.rex) { \ + eb1= xRAX+(nextop&7)+(rex.b<<3); \ + eb2 = 0; \ + } else { \ + ed = (nextop&7); \ + eb2 = (ed>>2)*8; \ + eb1 = xRAX+(ed&3); \ + } \ + BFIx(eb1, x3, eb2, 8); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); \ + STRB_U12(x3, ed, fixedaddress); \ + } + + GOCOND(0x90, "SET", "Eb"); + #undef GO + + case 0xA2: + INST_NAME("CPUID"); + MOVx_REG(x1, xRAX); + CALL_(my_cpuid, -1, 0); + break; + case 0xA3: + INST_NAME("BT Ed, Gd"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETGD; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) + ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; + LDRxw_U12(x1, x3, fixedaddress); + ed = x1; + } + if(rex.w) { + ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f + } + LSRxw_REG(x4, ed, x2); + BFIw(xFlags, x4, F_CF, 1); + break; + case 0xA4: + nextop = F8; + INST_NAME("SHLD Ed, Gd, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED(1); + GETGD; + u8 = F8; + emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4); + WBACK; + break; + case 0xA5: + nextop = F8; + INST_NAME("SHLD Ed, Gd, CL"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + UXTBw(x3, xRCX); + SETFLAGS(X_ALL, SF_SET); + GETEDW(x4, x1, 0); + GETGD; + MOVxw_REG(x2, gd); + CALL_(rex.w?((void*)shld64):((void*)shld32), ed, x4); + WBACK; + break; + + case 0xAB: + INST_NAME("BTS Ed, Gd"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETGD; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) + ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; + LDRxw_U12(x1, x3, fixedaddress); + ed = x1; + wback = x3; + } + if(rex.w) { + ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f + } + LSRxw_REG(x4, ed, x2); + if(rex.w) { + ANDSx_mask(x4, x4, 1, 0, 0); //mask=1 + } else { + ANDSw_mask(x4, x4, 0, 0); //mask=1 + } + BFIw(xFlags, x4, F_CF, 1); + MOV32w(x4, 1); + LSLxw_REG(x4, x4, x2); + EORxw_REG(x4, ed, x4); + CSELxw(ed, ed, x4, cNE); + if(wback) { + STRxw_U12(ed, wback, fixedaddress); + } + break; + case 0xAC: + nextop = F8; + INST_NAME("SHRD Ed, Gd, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED(1); + GETGD; + u8 = F8; + emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4); + WBACK; + break; + case 0xAD: + nextop = F8; + INST_NAME("SHRD Ed, Gd, CL"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_ALL, SF_SET); + UXTBw(x3, xRCX); + GETEDW(x4, x1, 0); + GETGD; + MOVxw_REG(x2, gd); + CALL_(rex.w?((void*)shrd64):((void*)shrd32), ed, x4); + WBACK; + break; + + case 0xAE: + nextop = F8; + if((nextop&0xF8)==0xE8) { + INST_NAME("LFENCE"); + } else + if((nextop&0xF8)==0xF0) { + INST_NAME("MFENCE"); + } else + if((nextop&0xF8)==0xF8) { + INST_NAME("SFENCE"); + } else { + switch((nextop>>3)&7) { + case 0: + INST_NAME("FXSAVE Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, x1, x2, x3); + if(MODREG) { + DEFAULT; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(rex.w?((void*)fpu_fxsave64):((void*)fpu_fxsave32), -1); + } + break; + case 1: + INST_NAME("FXRSTOR Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, x1, x2, x3); + if(MODREG) { + DEFAULT; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(rex.w?((void*)fpu_fxrstor64):((void*)fpu_fxrstor32), -1); + } + break; + case 2: + INST_NAME("LDMXCSR Md"); + GETED(0); + STRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr)); + break; + case 3: + INST_NAME("STMXCSR Md"); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + LDRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr)); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr)); + STRw_U12(x4, ed, fixedaddress); + } + break; + default: + DEFAULT; + } + } + break; + case 0xAF: + INST_NAME("IMUL Gd, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + GETGD; + GETED(0); + if(rex.w) { + // 64bits imul + UFLAG_IF { + SMULH(x3, gd, ed); + MULx(gd, gd, ed); + UFLAG_OP1(x3); + UFLAG_RES(gd); + UFLAG_DF(x3, d_imul64); + } else { + MULxw(gd, gd, ed); + } + } else { + // 32bits imul + UFLAG_IF { + SMULL(gd, gd, ed); + UFLAG_RES(gd); + LSRx(x3, gd, 32); + UFLAG_OP1(x3); + UFLAG_DF(x3, d_imul32); + MOVw_REG(gd, gd); + } else { + MULxw(gd, gd, ed); + } + } + break; + + case 0xB3: + INST_NAME("BTR Ed, Gd"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETGD; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) + ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; + LDRxw_U12(x1, x3, fixedaddress); + ed = x1; + wback = x3; + } + if(rex.w) { + ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f + } + LSRxw_REG(x4, ed, x2); + if(rex.w) { + ANDSx_mask(x4, x4, 1, 0, 0); //mask=1 + } else { + ANDSw_mask(x4, x4, 0, 0); //mask=1 + } + BFIw(xFlags, x4, F_CF, 1); + MOV32w(x4, 1); + LSLxw_REG(x4, x4, x2); + EORxw_REG(x4, ed, x4); + CSELxw(ed, ed, x4, cEQ); + if(wback) { + STRxw_U12(ed, wback, fixedaddress); + } + break; + + case 0xB6: + INST_NAME("MOVZX Gd, Eb"); + nextop = F8; + GETGD; + if(MODREG) { + if(rex.rex) { + eb1 = xRAX+(nextop&7)+(rex.b<<3); + eb2 = 0; \ + } else { + ed = (nextop&7); + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = (ed&4)>>2; // L or H + } + UBFXxw(gd, eb1, eb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); + LDRB_U12(gd, ed, fixedaddress); + } + break; + case 0xB7: + INST_NAME("MOVZX Gd, Ew"); + nextop = F8; + GETGD; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + UBFXxw(gd, ed, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + LDRH_U12(gd, ed, fixedaddress); + } + break; + + case 0xBA: + nextop = F8; + switch((nextop>>3)&7) { + case 4: + INST_NAME("BT Ed, Ib"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + gd = x2; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xff0<<2, 3, rex, 0, 1); + LDRxw_U12(x1, wback, fixedaddress); + ed = x1; + } + u8 = F8; + u8&=rex.w?0x3f:0x1f; + if(u8) { + LSRxw(x1, ed, u8); + ed = x1; + } + BFIw(xFlags, ed, F_CF, 1); + break; + case 5: + INST_NAME("BTS Ed, Ib"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xff0<<2, 3, rex, 0, 1); + LDRxw_U12(x1, wback, fixedaddress); + ed = x1; + } + u8 = F8; + u8&=(rex.w?0x3f:0x1f); + if(u8) { + LSRxw(x4, ed, u8); + } else { + MOVw_REG(x4, ed); + } + BFIw(xFlags, x4, F_CF, 1); + TBNZ_MARK3(x4, 0); // bit already set, jump to next instruction + MOV32w(x4, 1); + EORxw_REG_LSL(ed, ed, x4, u8); + if(wback) { + STRxw_U12(ed, wback, fixedaddress); + } + MARK3; + break; + case 6: + INST_NAME("BTR Ed, Ib"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xff0<<2, 3, rex, 0, 1); + LDRxw_U12(x1, wback, fixedaddress); + ed = x1; + } + u8 = F8; + u8&=(rex.w?0x3f:0x1f); + if(u8) { + LSRxw(x4, ed, u8); + } else { + MOVw_REG(x4, ed); + } + BFIw(xFlags, x4, F_CF, 1); + TBZ_MARK3(x4, 0); // bit already clear, jump to next instruction + //MOVW(x14, 1); // already 0x01 + EORxw_REG_LSL(ed, ed, x4, u8); + if(wback) { + STRxw_U12(ed, wback, fixedaddress); + } + MARK3; + break; + case 7: + INST_NAME("BTC Ed, Ib"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xff0<<2, 3, rex, 0, 1); + LDRxw_U12(x1, wback, fixedaddress); + ed = x1; + } + u8 = F8; + u8&=(rex.w?0x3f:0x1f); + if(u8) { + LSRxw(x4, ed, u8); + } else { + MOVw_REG(x4, ed); + } + BFIw(xFlags, x4, F_CF, 1); + MOV32w(x4, 1); + EORxw_REG_LSL(ed, ed, x4, u8); + if(wback) { + STRxw_U12(ed, wback, fixedaddress); + } + MARK3; + break; + default: + DEFAULT; + } + break; + case 0xBB: + INST_NAME("BTC Ed, Gd"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETGD; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) + ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; + LDRxw_U12(x1, x3, fixedaddress); + ed = x1; + wback = x3; + } + if(rex.w) { + ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f + } + LSRxw_REG(x4, ed, x2); + if(rex.w) { + ANDx_mask(x4, x4, 1, 0, 0); //mask=1 + } else { + ANDw_mask(x4, x4, 0, 0); //mask=1 + } + BFIw(xFlags, x4, F_CF, 1); + MOV32w(x4, 1); + LSLxw_REG(x4, x4, x2); + EORxw_REG(ed, ed, x4); + if(wback) { + STRxw_U12(ed, wback, fixedaddress); + } + break; + case 0xBC: + INST_NAME("BSF Gd, Ed"); + SETFLAGS(X_ZF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETED(0); + GETGD; + TSTxw_REG(ed, ed); + B_MARK(cEQ); + RBITxw(x1, ed); // reverse + CLZxw(gd, x1); // x2 gets leading 0 == BSF + MARK; + CSETw(x1, cEQ); //ZF not set + BFIw(xFlags, x1, F_ZF, 1); + break; + case 0xBD: + INST_NAME("BSR Gd, Ed"); + SETFLAGS(X_ZF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETED(0); + GETGD; + TSTxw_REG(ed, ed); + B_MARK(cEQ); + CLZxw(gd, ed); // x2 gets leading 0 + SUBxw_U12(gd, gd, rex.w?63:31); + NEGxw_REG(gd, gd); // complement + MARK; + CSETw(x1, cEQ); //ZF not set + BFIw(xFlags, x1, F_ZF, 1); + break; + case 0xBE: + INST_NAME("MOVSX Gd, Eb"); + nextop = F8; + GETGD; + if(MODREG) { + if(rex.rex) { + wback = xRAX+(nextop&7)+(rex.b<<3); + wb2 = 0; + } else { + wback = (nextop&7); + wb2 = (wback>>2)*8; + wback = xRAX+(wback&3); + } + SBFXxw(gd, wback, wb2, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0xfff, 0, rex, 0, 0); + LDRSBxw_U12(gd, ed, fixedaddress); + } + break; + case 0xBF: + INST_NAME("MOVSX Gd, Ew"); + nextop = F8; + GETGD; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + SXTHxw(gd, ed); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + LDRSHxw_U12(gd, ed, fixedaddress); + } + break; + + case 0xC2: + INST_NAME("CMPPS Gx, Ex, Ib"); + nextop = F8; + GETGX(v0); + GETEX(v1, 1); + u8 = F8; + switch(u8&7) { + // the inversion of the params in the comparison is there to handle NaN the same way SSE does + case 0: FCMEQQS(v0, v0, v1); break; // Equal + case 1: FCMGTQS(v0, v1, v0); break; // Less than + case 2: FCMGEQS(v0, v1, v0); break; // Less or equal + case 3: FCMEQQS(v0, v0, v0); + if(v0!=v1) { + q0 = fpu_get_scratch(dyn); + FCMEQQS(q0, v1, v1); + VANDQ(v0, v0, q0); + } + VMVNQ(v0, v0); + break; // NaN (NaN is not equal to himself) + case 4: FCMEQQS(v0, v0, v1); VMVNQ(v0, v0); break; // Not Equal (or unordered on ARM, not on X86...) + case 5: FCMGTQS(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or equal or unordered + case 6: FCMGEQS(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or unordered + case 7: FCMEQQS(v0, v0, v0); + if(v0!=v1) { + q0 = fpu_get_scratch(dyn); + FCMEQQS(q0, v1, v1); + VANDQ(v0, v0, q0); + } + break; // not NaN + } + break; + case 0xC3: + INST_NAME("MOVNTI Ed, Gd"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg + MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + STRxw_U12(gd, ed, fixedaddress); + } + break; + case 0xC4: + INST_NAME("PINSRW Gm,Ed,Ib"); + nextop = F8; + GETGM(v0); + if(MODREG) { + u8 = (F8)&3; + ed = xRAX+(nextop&7)+(rex.b<<3); + VMOVQHfrom(v0, u8, ed); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 1); + u8 = (F8)&3; + VLD1_16(v0, u8, wback); + } + break; + case 0xC5: + INST_NAME("PEXTRW Gd,Em,Ib"); + nextop = F8; + GETGD; + if(MODREG) { + GETEM(v0, 1); + u8 = (F8)&3; + VMOVHto(gd, v0, u8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 1); + u8 = (F8)&3; + LDRH_U12(gd, wback, u8*2); + } + break; + case 0xC6: + INST_NAME("SHUFPS Gx, Ex, Ib"); + nextop = F8; + GETGX(v0); + if(!MODREG) + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + d0 = fpu_get_scratch(dyn); + // first two elements from Gx + for(int i=0; i<2; ++i) { + VMOVeS(d0, i, v0, (u8>>(i*2)&3)); + } + // second two from Ex + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + for(int i=2; i<4; ++i) { + VMOVeS(d0, i, v1, (u8>>(i*2)&3)); + } + } else { + for(int i=2; i<4; ++i) { + ADDx_U12(x2, ed, (u8>>(i*2)&3)*4); + VLD1_32(d0, i, x2); + } + } + VMOVQ(v0, d0); + break; + + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: /* BSWAP reg */ + INST_NAME("BSWAP Reg"); + gd = xRAX+(opcode&7)+(rex.b<<3); + REVxw(gd, gd); + break; + + case 0xD3: + INST_NAME("PSRLQ Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + if(MODREG) + q0 = fpu_get_scratch(dyn); + else + q0 = d1; + NEG_64(q0, d1); + USHL_R_64(d0, d0, q0); + break; + + case 0xD5: + INST_NAME("PMULLW Gm, Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 0); + VMUL_16(q0, q0, q1); + break; + + case 0xD7: + nextop = F8; + INST_NAME("PMOVMSKB Gd, Em"); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + GETEM(q0, 0); + GETGD; + TABLE64(x1, (uintptr_t)&mask_shift8); + VLDR64_U12(v0, x1, 0); // load shift + MOVI_8(v1, 0x80); // load mask + VAND(q1, v1, q0); + USHL_8(q1, q1, v0); // shift + UADDLV_8(q1, q1); // accumalte + VMOVBto(gd, q1, 0); + break; + case 0xD8: + INST_NAME("PSUBUSB Gm, Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 0); + UQSUB_8(q0, q0, q1); + break; + case 0xD9: + INST_NAME("PSUBUSW Gm, Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 0); + UQSUB_16(q0, q0, q1); + break; + case 0xDA: + INST_NAME("PMINUB Gm, Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + UMIN_8(d0, d0, d1); + break; + case 0xDB: + INST_NAME("PAND Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VAND(v0, v0, v1); + break; + case 0xDC: + INST_NAME("PADDUSB Gm,Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 0); + UQADD_8(q0, q0, q1); + break; + case 0xDD: + INST_NAME("PADDUSW Gm,Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 0); + UQADD_16(q0, q0, q1); + break; + case 0xDE: + INST_NAME("PMAXUB Gm, Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + UMAX_8(d0, d0, d1); + break; + case 0xDF: + INST_NAME("PANDN Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VBIC(v0, v1, v0); + break; + case 0xE0: + INST_NAME("PAVGB Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + URHADD_8(v0, v0, v1); + break; + + case 0xE3: + INST_NAME("PAVGW Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + URHADD_16(d0, d0, d1); + break; + + case 0xE5: + INST_NAME("PMULHW Gm,Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + q0 = fpu_get_scratch(dyn); + VSMULL_16(q0, v0, v1); + SQSHRN_16(v0, q0, 16); + break; + + case 0xE7: + INST_NAME("MOVNTQ Em, Gm"); // Non Temporal par not handled for now + nextop = F8; + gd = (nextop&0x38)>>3; + if((nextop&0xC0)==0xC0) { + DEFAULT; + } else { + v0 = mmx_get_reg(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VSTR64_U12(v0, ed, fixedaddress); + } + break; + case 0xE8: + INST_NAME("PSUBSB Gm,Em"); + nextop = F8; + GETGM(v0); + GETEM(q0, 0); + SQSUB_8(v0, v0, q0); + break; + case 0xE9: + INST_NAME("PSUBSW Gm,Em"); + nextop = F8; + GETGM(v0); + GETEM(q0, 0); + SQSUB_16(v0, v0, q0); + break; + + case 0xEB: + INST_NAME("POR Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VORR(v0, v0, v1); + break; + case 0xEC: + INST_NAME("PADDSB Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + SQADD_8(d0, d0, d1); + break; + case 0xED: + INST_NAME("PADDSW Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + SQADD_16(d0, d0, d1); + break; + + case 0xEF: + INST_NAME("PXOR Gm,Em"); + nextop = F8; + gd = ((nextop&0x38)>>3); + if(MODREG && ((nextop&7))==gd) { + // special case for PXOR Gx, Gx + q0 = mmx_get_reg_empty(dyn, ninst, x1, gd); + VEOR(q0, q0, q0); + } else { + q0 = mmx_get_reg(dyn, ninst, x1, gd); + GETEM(q1, 0); + VEOR(q0, q0, q1); + } + break; + + case 0xF2: + INST_NAME("PSLLD Gm,Em"); + nextop = F8; + GETGM(d0); + GETEM(d1, 0); + v0 = fpu_get_scratch(dyn); + VMOVeD(v0, 0, d1, 0); + VMOVeD(v0, 1, d1, 0); + SQXTN_32(v0, v0); // 2*q1 in 32bits now + SSHL_32(d0, d0, v0); + break; + + case 0xF5: + INST_NAME("PMADDWD Gx, Ex"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + q0 = fpu_get_scratch(dyn); + VSMULL_16(q0, v0, v1); + VADDPQ_32(q0, q0, q0); //ADDP from Q to non-Q? + VMOVQ(v0, q0); + break; + case 0xF6: + INST_NAME("PSADBW Gm, Em"); + nextop = F8; + GETGM(q0); + GETEM(q1, 0); + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VEOR(d1, d1, d1); // is it necessary? + UABDL_8(d0, q0, q1); + UADDLVQ_16(d1, d0); + VMOVeD(q0, 0, d1, 0); + break; + + case 0xF8: + INST_NAME("PSUBB Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VSUB_8(v0, v0, v1); + break; + case 0xF9: + INST_NAME("PSUBW Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VSUB_16(v0, v0, v1); + break; + case 0xFA: + INST_NAME("PSUBD Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VSUB_32(v0, v0, v1); + break; + + case 0xFC: + INST_NAME("PADDB Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VADD_8(v0, v0, v1); + break; + case 0xFD: + INST_NAME("PADDW Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VADD_16(v0, v0, v1); + break; + case 0xFE: + INST_NAME("PADDD Gm, Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + VADD_32(v0, v0, v1); + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c new file mode 100644 index 00000000..d604047c --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -0,0 +1,880 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + +#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) + +uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop; + uint8_t u8; + uint8_t gd, ed, eb1, eb2, gb1, gb2; + uint8_t wback, wb1, wb2, wb; + int64_t i64, j64; + int v0; + int q0; + int d0; + int64_t fixedaddress; + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(wb1); + MAYUSE(wb2); + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(j64); + MAYUSE(d0); + MAYUSE(q0); + MAYUSE(v0); + + while((opcode==0xF2) || (opcode==0xF3)) { + rep = opcode-0xF1; + opcode = F8; + } + // REX prefix before the F0 are ignored + rex.rex = 0; + while(opcode>=0x40 && opcode<=0x4f) { + rex.rex = opcode; + opcode = F8; + } + + switch(opcode) { + + case 0x03: + INST_NAME("ADD Gd, Seg:Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop = F8; + GETGD; + GETEDO(x4, 0); + emit_add32(dyn, ninst, rex, gd, ed, x3, x4); + break; + + case 0x0F: + opcode = F8; + switch(opcode) { + + case 0x10: + switch(rep) { + case 1: + INST_NAME("MOVSD Gx, Ex"); + nextop = F8; + GETG; + if(MODREG) { + ed = (nextop&7)+ (rex.b<<3); + v0 = sse_get_reg(dyn, ninst, x1, gd); + d0 = sse_get_reg(dyn, ninst, x1, ed); + VMOVeD(v0, 0, d0, 0); + } else { + grab_segdata(dyn, addr, ninst, x4, seg); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + ADDx_REG(x4, x4, ed); + VLDR64_U12(v0, x4, fixedaddress); // upper part reseted + } + break; + case 2: + INST_NAME("MOVSS Gx, Ex"); + nextop = F8; + GETG; + if(MODREG) { + v0 = sse_get_reg(dyn, ninst, x1, gd); + q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + VMOVeS(v0, 0, q0, 0); + } else { + grab_segdata(dyn, addr, ninst, x4, seg); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + ADDx_REG(x4, x4, ed); + VLDR32_U12(v0, x4, fixedaddress); + } + break; + default: + DEFAULT; + } + break; + case 0x11: + switch(rep) { + case 1: + INST_NAME("MOVSD Ex, Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + ed = (nextop&7)+ (rex.b<<3); + d0 = sse_get_reg(dyn, ninst, x1, ed); + VMOVeD(d0, 0, v0, 0); + } else { + grab_segdata(dyn, addr, ninst, x4, seg); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + ADDx_REG(x4, x4, ed); + VSTR64_U12(v0, x4, fixedaddress); + } + break; + case 2: + INST_NAME("MOVSS Ex, Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + VMOVeS(q0, 0, v0, 0); + } else { + grab_segdata(dyn, addr, ninst, x4, seg); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + ADDx_REG(x4, x4, ed); + VSTR32_U12(v0, x4, fixedaddress); + } + break; + default: + DEFAULT; + } + break; + + case 0xAF: + INST_NAME("IMUL Gd, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + grab_segdata(dyn, addr, ninst, x4, seg); + GETGD; + GETEDO(x4, 0); + if(rex.w) { + // 64bits imul + UFLAG_IF { + SMULH(x3, gd, ed); + MULx(gd, gd, ed); + UFLAG_OP1(x3); + UFLAG_RES(gd); + UFLAG_DF(x3, d_imul64); + } else { + MULxw(gd, gd, ed); + } + } else { + // 32bits imul + UFLAG_IF { + SMULL(gd, gd, ed); + UFLAG_RES(gd); + LSRx(x3, gd, 32); + UFLAG_OP1(x3); + UFLAG_DF(x3, d_imul32); + MOVw_REG(gd, gd); + } else { + MULxw(gd, gd, ed); + } + } + break; + + default: + DEFAULT; + } + break; + + case 0x33: + INST_NAME("XOR Gd, Seg:Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop = F8; + GETGD; + GETEDO(x4, 0); + emit_xor32(dyn, ninst, rex, gd, ed, x3, x4); + break; + + case 0x39: + INST_NAME("CMP Seg:Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop = F8; + GETGD; + GETEDO(x4, 0); + emit_cmp32(dyn, ninst, rex, ed, gd, x3, x4, x5); + break; + + case 0x66: + addr = dynarec64_6664(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + + case 0x80: + nextop = F8; + grab_segdata(dyn, addr, ninst, x1, seg); + switch((nextop>>3)&7) { + case 0: //ADD + INST_NAME("ADD Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_add8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 1: //OR + INST_NAME("OR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_or8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 2: //ADC + INST_NAME("ADC Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 3: //SBB + INST_NAME("SBB Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 4: //AND + INST_NAME("AND Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_and8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 5: //SUB + INST_NAME("SUB Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 6: //XOR + INST_NAME("XOR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_xor8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 7: //CMP + INST_NAME("CMP Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + if(u8) { + MOV32w(x2, u8); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + } else { + emit_cmp8_0(dyn, ninst, x1, x3, x4); + } + break; + default: + DEFAULT; + } + break; + case 0x81: + case 0x83: + nextop = F8; + grab_segdata(dyn, addr, ninst, x6, seg); + switch((nextop>>3)&7) { + case 0: //ADD + if(opcode==0x81) {INST_NAME("ADD Ed, Id");} else {INST_NAME("ADD Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5); + WBACKO(x6); + break; + case 1: //OR + if(opcode==0x81) {INST_NAME("OR Ed, Id");} else {INST_NAME("OR Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); + WBACKO(x6); + break; + case 2: //ADC + if(opcode==0x81) {INST_NAME("ADC Ed, Id");} else {INST_NAME("ADC Ed, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MOV64xw(x5, i64); + emit_adc32(dyn, ninst, rex, ed, x5, x3, x4); + WBACKO(x6); + break; + case 3: //SBB + if(opcode==0x81) {INST_NAME("SBB Ed, Id");} else {INST_NAME("SBB Ed, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MOV64xw(x5, i64); + emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4); + WBACKO(x6); + break; + case 4: //AND + if(opcode==0x81) {INST_NAME("AND Ed, Id");} else {INST_NAME("AND Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_and32c(dyn, ninst, rex, ed, i64, x3, x4); + WBACKO(x6); + break; + case 5: //SUB + if(opcode==0x81) {INST_NAME("SUB Ed, Id");} else {INST_NAME("SUB Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5); + WBACKO(x6); + break; + case 6: //XOR + if(opcode==0x81) {INST_NAME("XOR Ed, Id");} else {INST_NAME("XOR Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + emit_xor32c(dyn, ninst, rex, ed, i64, x3, x4); + WBACKO(x6); + break; + case 7: //CMP + if(opcode==0x81) {INST_NAME("CMP Ed, Id");} else {INST_NAME("CMP Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + if(i64) { + MOV64xw(x2, i64); + emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5); + } else + emit_cmp32_0(dyn, ninst, rex, ed, x3, x4); + break; + } + break; + case 0x8A: + INST_NAME("MOV Gb, Eb"); + nextop = F8; + if(rex.rex) { + gb1 = gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); + gb2=0; + } else { + gd = (nextop&0x38)>>3; + gb1 = xRAX+(gd&3); + gb2 = ((gd&4)>>2); + } + if(MODREG) { + if(rex.rex) { + wback = xRAX+(nextop&7)+(rex.b<<3); + wb2 = 0; + } else { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xRAX+(wback&3); + } + if(wb2) { + UBFXw(x4, wback, wb2*8, 8); + ed = x4; + } else { + ed = wback; + } + } else { + grab_segdata(dyn, addr, ninst, x4, seg); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 0); + LDRB_REG(x4, wback, x4); + ed = x4; + } + BFIx(gb1, ed, gb2*8, 8); + break; + case 0x89: + INST_NAME("MOV Seg:Ed, Gd"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg + MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); + STRxw_REG(gd, ed, x4); + } + break; + + case 0x8B: + INST_NAME("MOV Gd, Seg:Ed"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg + MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); + LDRxw_REG(gd, ed, x4); + } + break; + + case 0xC6: + INST_NAME("MOV Seg:Eb, Ib"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + if(MODREG) { // reg <= u8 + u8 = F8; + if(!rex.rex) { + ed = (nextop&7); + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = (ed&4)>>2; // L or H + } else { + eb1 = xRAX+(nextop&7)+(rex.b<<3); + eb2 = 0; + } + MOV32w(x3, u8); + BFIx(eb1, x3, eb2*8, 8); + } else { // mem <= u8 + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + MOV32w(x3, u8); + STRB_REG(x3, ed, x4); + } + break; + case 0xC7: + INST_NAME("MOV Seg:Ed, Id"); + grab_segdata(dyn, addr, ninst, x4, seg); + nextop=F8; + if(MODREG) { // reg <= i32 + i64 = F32S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(ed, i64); + } else { // mem <= i32 + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 4); + i64 = F32S; + MOV64xw(x3, i64); + STRxw_REG(x3, ed, x4); + } + break; + + case 0xD1: + nextop = F8; + grab_segdata(dyn, addr, ninst, x6, seg); + switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Ed, 1"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETEDO(x6, 0); + emit_rol32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACKO(x6); + break; + case 1: + INST_NAME("ROR Ed, 1"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETEDO(x6, 0); + emit_ror32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACKO(x6); + break; + case 2: + INST_NAME("RCL Ed, 1"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + MOV32w(x2, 1); + GETEDO(x6, 0); + if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} + if(ed!=x1) {MOVxw_REG(x1, ed);} + CALL_(rcl32, ed, x6); + WBACK; + break; + case 3: + INST_NAME("RCR Ed, 1"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + MOV32w(x2, 1); + GETEDO(x6, 0); + if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} + if(ed!=x1) {MOVxw_REG(x1, ed);} + CALL_(rcr32, ed, x6); + WBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ed, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETEDO(x6, 0); + emit_shl32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACKO(x6); + break; + case 5: + INST_NAME("SHR Ed, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETEDO(x6, 0); + emit_shr32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACKO(x6); + break; + case 7: + INST_NAME("SAR Ed, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETEDO(x6, 0); + emit_sar32c(dyn, ninst, rex, ed, 1, x3, x4); + WBACKO(x6); + break; + } + break; + case 0xD3: + nextop = F8; + grab_segdata(dyn, addr, ninst, x6, seg); + switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Ed, CL"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + MOV64xw(x4, (rex.w?64:32)); + SUBx_REG(x3, x4, x3); + GETEDO(x6, 0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + RORxw_REG(ed, ed, x3); + WBACKO(x6); + UFLAG_IF { // calculate flags directly + CMPSw_U12(x3, rex.w?63:31); + B_MARK(cNE); + LSRxw(x4, ed, rex.w?63:31); + ADDxw_REG(x4, x4, ed); + BFIw(xFlags, x4, F_OF, 1); + MARK; + BFIw(xFlags, ed, F_CF, 1); + UFLAG_DF(x2, d_none); + } + break; + case 1: + INST_NAME("ROR Ed, CL"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETEDO(x6, 0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + RORxw_REG(ed, ed, x3); + WBACKO(x6); + UFLAG_IF { // calculate flags directly + CMPSw_U12(x3, 1); + B_MARK(cNE); + LSRxw(x2, ed, rex.w?62:30); // x2 = d>>30 + EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1)) + BFIw(xFlags, x2, F_OF, 1); + MARK; + LSRxw(x2, ed, rex.w?63:31); + BFIw(xFlags, x2, F_CF, 1); + UFLAG_DF(x2, d_none); + } + break; + case 2: + INST_NAME("RCL Ed, CL"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + if(rex.w) { + ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETEDO(x6, 0); + if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x6); + WBACK; + break; + case 3: + INST_NAME("RCR Ed, CL"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + if(rex.w) { + ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETEDO(x6, 0); + if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x6); + WBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ed, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETEDO(x6, 0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + emit_shl32(dyn, ninst, rex, ed, x3, x5, x4); + WBACKO(x6); + break; + case 5: + INST_NAME("SHR Ed, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETEDO(x6, 0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); + WBACKO(x6); + break; + case 7: + INST_NAME("SAR Ed, CL"); + SETFLAGS(X_ALL, SF_PENDING); + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } + GETEDO(x6, 0); + if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + UFLAG_OP12(ed, x3); + ASRxw_REG(ed, ed, x3); + WBACKO(x6); + UFLAG_RES(ed); + UFLAG_DF(x3, rex.w?d_sar64:d_sar32); + break; + } + break; + + case 0xF7: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + case 1: + INST_NAME("TEST Ed, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, 4); + i64 = F32S; + MOV64xw(x2, i64); + emit_test32(dyn, ninst, rex, ed, x2, x3, x4); + break; + case 2: + INST_NAME("NOT Ed"); + GETEDO(x6, 4); + MVNxw_REG(ed, ed); + WBACKO(x6); + break; + case 3: + INST_NAME("NEG Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEDO(x6, 0); + emit_neg32(dyn, ninst, rex, ed, x3, x4); + WBACKO(x6); + break; + case 4: + INST_NAME("MUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x2, rex.w?d_mul64:d_mul32); + GETEDO(x6, 0); + if(rex.w) { + if(ed==xRDX) gd=x3; else gd=xRDX; + UMULH(gd, xRAX, ed); + MULx(xRAX, xRAX, ed); + if(gd!=xRDX) {MOVx_REG(xRDX, gd);} + } else { + UMULL(xRDX, xRAX, ed); //64 <- 32x32 + MOVw_REG(xRAX, xRDX); + LSRx(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + break; + case 5: + INST_NAME("IMUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x2, rex.w?d_imul64:d_imul32); + GETEDO(x6, 0); + if(rex.w) { + if(ed==xRDX) gd=x3; else gd=xRDX; + SMULH(gd, xRAX, ed); + MULx(xRAX, xRAX, ed); + if(gd!=xRDX) {MOVx_REG(xRDX, gd);} + } else { + SMULL(xRDX, xRAX, ed); //64 <- 32x32 + MOVw_REG(xRAX, xRDX); + LSRx(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + break; + case 6: + INST_NAME("DIV Ed"); + SETFLAGS(X_ALL, SF_SET); + if(!rex.w) { + SET_DFNONE(x2); + GETEDO(x6, 0); + MOVw_REG(x3, xRAX); + ORRx_REG_LSL(x3, x3, xRDX, 32); + if(MODREG) { + MOVw_REG(x4, ed); + ed = x4; + } + UDIVx(x2, x3, ed); + MSUBx(x4, x2, ed, xRAX); + MOVw_REG(xRAX, x2); + MOVw_REG(xRDX, x4); + } else { + if(ninst + && dyn->insts[ninst-1].x64.addr + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) { + SET_DFNONE(x2); + GETEDO(x6, 0); + UDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + } else { + GETEDO(x6, 0); + CBZxw_MARK(xRDX); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(div64, -1); + B_NEXT_nocond; + MARK; + UDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + SET_DFNONE(x2); + } + } + break; + case 7: + INST_NAME("IDIV Ed"); + SETFLAGS(X_ALL, SF_SET); + if(!rex.w) { + SET_DFNONE(x2) + GETSEDOw(x6, 0); + MOVw_REG(x3, xRAX); + ORRx_REG_LSL(x3, x3, xRDX, 32); + SDIVx(x2, x3, wb); + MSUBx(x4, x2, wb, x3); + MOVw_REG(xRAX, x2); + MOVw_REG(xRDX, x4); + } else { + if(ninst && dyn->insts + && dyn->insts[ninst-1].x64.addr + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48 + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) { + SET_DFNONE(x2) + GETEDO(x6, 0); + SDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + } else { + GETEDO(x6, 0); + CBZxw_MARK(xRDX); + MVNx_REG(x2, xRDX); + CBZxw_MARK(x2); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL((void*)idiv64, -1); + B_NEXT_nocond; + MARK; + SDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + SET_DFNONE(x2) + } + } + break; + } + break; + + case 0xFF: + nextop = F8; + grab_segdata(dyn, addr, ninst, x6, seg); + switch((nextop>>3)&7) { + case 0: // INC Ed + INST_NAME("INC Ed"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + GETEDO(x6, 0); + emit_inc32(dyn, ninst, rex, ed, x3, x4); + WBACKO(x6); + break; + case 1: //DEC Ed + INST_NAME("DEC Ed"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + GETEDO(x6, 0); + emit_dec32(dyn, ninst, rex, ed, x3, x4); + WBACKO(x6); + break; + case 2: // CALL Ed + INST_NAME("CALL Ed"); + PASS2IF(((ninst && dyn->insts[ninst-1].x64.set_flags) + || ((ninst>1) && dyn->insts[ninst-2].x64.set_flags)), 1) + { + READFLAGS(X_PEND); // that's suspicious + } else { + SETFLAGS(X_ALL, SF_SET); //Hack to put flag in "don't care" state + } + GETEDOx(x6, 0); + BARRIER(1); + BARRIER_NEXT(1); + if(!dyn->insts || ninst==dyn->size-1) { + *need_epilog = 0; + *ok = 0; + } + GETIP(addr); + PUSH1(xRIP); + jump_to_next(dyn, 0, ed, ninst); + break; + case 4: // JMP Ed + INST_NAME("JMP Ed"); + BARRIER(1); + GETEDOx(x6, 0); + jump_to_next(dyn, 0, ed, ninst); + *need_epilog = 0; + *ok = 0; + break; + case 6: // Push Ed + INST_NAME("PUSH Ed"); + GETEDOx(x6, 0); + PUSH1(ed); + break; + + default: + DEFAULT; + } + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c new file mode 100755 index 00000000..6e8c3b65 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -0,0 +1,871 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + uint8_t opcode = F8; + uint8_t nextop, u8; + int16_t i16; + uint16_t u16; + uint64_t u64; + int32_t i32; + int64_t j64; + uint8_t gd, ed; + uint8_t wback, wb1; + int64_t fixedaddress; + MAYUSE(u8); + MAYUSE(u16); + MAYUSE(u64); + MAYUSE(j64); + + while((opcode==0x2E) || (opcode==0x66)) // ignoring CS: or multiple 0x66 + opcode = F8; + + while((opcode==0xF2) || (opcode==0xF3)) { + rep = opcode-0xF1; + opcode = F8; + } + // REX prefix before the F0 are ignored + rex.rex = 0; + while(opcode>=0x40 && opcode<=0x4f) { + rex.rex = opcode; + opcode = F8; + } + + if(rex.w && opcode!=0x0f) // rex.w cancels "66", but not for 66 0f type of prefix + return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode + + switch(opcode) { + case 0x01: + INST_NAME("ADD Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_add16(dyn, ninst, x1, x2, x4, x5); + EWBACK; + break; + case 0x03: + INST_NAME("ADD Gw, Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + emit_add16(dyn, ninst, x1, x2, x3, x4); + GWBACK; + break; + case 0x05: + INST_NAME("ADD AX, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + UXTHw(x1, xRAX); + MOV32w(x2, i32); + emit_add16(dyn, ninst, x1, x2, x3, x4); + BFIx(xRAX, x1, 0, 16); + break; + + case 0x09: + INST_NAME("OR Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_or16(dyn, ninst, x1, x2, x4, x2); + EWBACK; + break; + case 0x0B: + INST_NAME("OR Gw, Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + emit_or16(dyn, ninst, x1, x2, x4, x3); + GWBACK; + break; + case 0x0D: + INST_NAME("OR AX, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + UXTHw(x1, xRAX); + MOV32w(x2, i32); + emit_or16(dyn, ninst, x1, x2, x3, x4); + BFIx(xRAX, x1, 0, 16); + break; + + case 0x0F: + addr = dynarec64_660F(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + case 0x11: + INST_NAME("ADC Ew, Gw"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_adc16(dyn, ninst, x1, x2, x4, x5); + EWBACK; + break; + case 0x13: + INST_NAME("ADC Gw, Ew"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + emit_adc16(dyn, ninst, x1, x2, x4, x3); + GWBACK; + break; + case 0x15: + INST_NAME("ADC AX, Iw"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + UXTHw(x1, xRAX); + MOV32w(x2, i32); + emit_adc16(dyn, ninst, x1, x2, x3, x4); + BFIx(xRAX, x1, 0, 16); + break; + + case 0x19: + INST_NAME("SBB Ew, Gw"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_sbb16(dyn, ninst, x1, x2, x4, x5); + EWBACK; + break; + case 0x1B: + INST_NAME("SBB Gw, Ew"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + emit_sbb16(dyn, ninst, x1, x2, x4, x3); + GWBACK; + break; + case 0x1D: + INST_NAME("SBB AX, Iw"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + i16 = F16S; + UXTHw(x1, xRAX); + MOVZw(x2, i16); + emit_sbb16(dyn, ninst, x1, x2, x3, x4); + BFIx(xRAX, x1, 0, 16); + break; + + case 0x21: + INST_NAME("AND Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_and16(dyn, ninst, x1, x2, x4, x5); + EWBACK; + break; + case 0x23: + INST_NAME("AND Gw, Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + emit_and16(dyn, ninst, x1, x2, x3, x4); + GWBACK; + break; + case 0x25: + INST_NAME("AND AX, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + UXTHw(x1, xRAX); + MOV32w(x2, i32); + emit_and16(dyn, ninst, x1, x2, x3, x4); + BFIx(xRAX, x1, 0, 16); + break; + + case 0x29: + INST_NAME("SUB Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_sub16(dyn, ninst, x1, x2, x4, x5); + EWBACK; + break; + case 0x2B: + INST_NAME("SUB Gw, Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + emit_sub16(dyn, ninst, x1, x2, x3, x4); + GWBACK; + break; + case 0x2D: + INST_NAME("SUB AX, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + UXTHw(x1, xRAX); + MOV32w(x2, i32); + emit_sub16(dyn, ninst, x1, x2, x3, x4); + BFIx(xRAX, x1, 0, 16); + break; + + case 0x31: + INST_NAME("XOR Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_xor16(dyn, ninst, x1, x2, x4, x5); + EWBACK; + break; + case 0x33: + INST_NAME("XOR Gw, Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + emit_xor16(dyn, ninst, x1, x2, x3, x4); + GWBACK; + break; + case 0x35: + INST_NAME("XOR AX, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + UXTHw(x1, xRAX); + MOV32w(x2, i32); + emit_xor16(dyn, ninst, x1, x2, x3, x4); + BFIx(xRAX, x1, 0, 16); + break; + + case 0x39: + INST_NAME("CMP Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5); + break; + case 0x3B: + INST_NAME("CMP Gw, Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5); + break; + case 0x3D: + INST_NAME("CMP AX, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i32 = F16; + UXTHw(x1, xRAX); + if(i32) { + MOV32w(x2, i32); + emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5); + } else { + emit_cmp16_0(dyn, ninst, x1, x3, x4); + } + break; + + case 0x64: + addr = dynarec64_6664(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + + case 0x66: + addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + + case 0x69: + INST_NAME("IMUL Gw,Ew,Iw"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + UFLAG_DF(x1, d_imul16); + GETSEW(x1, 2); + i32 = F16S; + MOV32w(x2, i32); + MULw(x2, x2, x1); + UFLAG_RES(x2); + gd=x2; + GWBACK; + break; + + case 0x6B: + INST_NAME("IMUL Gw,Ew,Ib"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + UFLAG_DF(x1, d_imul16); + GETSEW(x1, 1); + i32 = F8S; + MOV32w(x2, i32); + MULw(x2, x2, x1); + UFLAG_RES(x2); + gd=x2; + GWBACK; + break; + + case 0x81: + case 0x83: + nextop = F8; + switch((nextop>>3)&7) { + case 0: //ADD + if(opcode==0x81) { + INST_NAME("ADD Ew, Iw"); + } else { + INST_NAME("ADD Ew, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode==0x81)?2:1); + if(opcode==0x81) i16 = F16S; else i16 = F8S; + MOVZw(x5, i16); + emit_add16(dyn, ninst, ed, x5, x2, x4); + EWBACK; + break; + case 1: //OR + if(opcode==0x81) {INST_NAME("OR Ew, Iw");} else {INST_NAME("OR Ew, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode==0x81)?2:1); + if(opcode==0x81) i16 = F16S; else i16 = F8S; + MOVZw(x5, i16); + emit_or16(dyn, ninst, x1, x5, x2, x4); + EWBACK; + break; + case 2: //ADC + if(opcode==0x81) {INST_NAME("ADC Ew, Iw");} else {INST_NAME("ADC Ew, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode==0x81)?2:1); + if(opcode==0x81) i16 = F16S; else i16 = F8S; + MOVZw(x5, i16); + emit_adc16(dyn, ninst, x1, x5, x2, x4); + EWBACK; + break; + case 3: //SBB + if(opcode==0x81) {INST_NAME("SBB Ew, Iw");} else {INST_NAME("SBB Ew, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode==0x81)?2:1); + if(opcode==0x81) i16 = F16S; else i16 = F8S; + MOVZw(x5, i16); + emit_sbb16(dyn, ninst, x1, x5, x2, x4); + EWBACK; + break; + case 4: //AND + if(opcode==0x81) {INST_NAME("AND Ew, Iw");} else {INST_NAME("AND Ew, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode==0x81)?2:1); + if(opcode==0x81) i16 = F16S; else i16 = F8S; + MOVZw(x5, i16); + emit_and16(dyn, ninst, x1, x5, x2, x4); + EWBACK; + break; + case 5: //SUB + if(opcode==0x81) {INST_NAME("SUB Ew, Iw");} else {INST_NAME("SUB Ew, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode==0x81)?2:1); + if(opcode==0x81) i16 = F16S; else i16 = F8S; + MOVZw(x5, i16); + emit_sub16(dyn, ninst, x1, x5, x2, x4); + EWBACK; + break; + case 6: //XOR + if(opcode==0x81) {INST_NAME("XOR Ew, Iw");} else {INST_NAME("XOR Ew, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode==0x81)?2:1); + if(opcode==0x81) i16 = F16S; else i16 = F8S; + MOVZw(x5, i16); + emit_xor16(dyn, ninst, x1, x5, x2, x4); + EWBACK; + break; + case 7: //CMP + if(opcode==0x81) {INST_NAME("CMP Ew, Iw");} else {INST_NAME("CMP Ew, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode==0x81)?2:1); + if(opcode==0x81) i16 = F16S; else i16 = F8S; + if(i16) { + MOVZw(x2, i16); + emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5); + } else + emit_cmp16_0(dyn, ninst, x1, x3, x4); + break; + } + break; + + case 0x85: + INST_NAME("TEST Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEW(x1, 0); + GETGW(x2); + emit_test16(dyn, ninst, x1, x2, x3, x4, x5); + break; + + case 0x89: + INST_NAME("MOV Ew, Gw"); + nextop = F8; + GETGD; // don't need GETGW here + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + if(ed!=gd) { + BFIx(ed, gd, 0, 16); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + STRH_U12(gd, ed, fixedaddress); + } + break; + case 0x8B: + INST_NAME("MOV Gw, Ew"); + nextop = F8; + GETGD; // don't need GETGW neither + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + if(ed!=gd) { + BFIx(gd, ed, 0, 16); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + LDRH_U12(x1, ed, fixedaddress); + BFIx(gd, x1, 0, 16); + } + break; + + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + gd = xRAX+(opcode&0x07)+(rex.b<<3); + if(gd==xRAX) { + INST_NAME("NOP"); + } else { + INST_NAME("XCHG AX, Reg"); + MOVw_REG(x2, xRAX); + BFIx(xRAX, gd, 0, 16); + BFIx(gd, x2, 0, 16); + } + break; + + case 0x98: + INST_NAME("CBW"); + SXTBw(x1, xRAX); + BFIw(xRAX, x1, 0, 16); + break; + + case 0xA1: + INST_NAME("MOV EAX,Od"); + u64 = F64; + MOV64x(x1, u64); + LDRH_U12(x2, x1, 0); + BFIx(xRAX, x2, 0, 16); + break; + + case 0xA3: + INST_NAME("MOV Od,EAX"); + u64 = F64; + MOV64x(x1, u64); + STRH_U12(xRAX, x1, 0); + break; + + case 0xA5: + if(rep) { + INST_NAME("REP MOVSW"); + CBZx_NEXT(xRCX); + TBNZ_MARK2(xFlags, F_DF); + MARK; // Part with DF==0 + LDRH_S9_postindex(x1, xRSI, 2); + STRH_S9_postindex(x1, xRDI, 2); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + LDRH_S9_postindex(x1, xRSI, -2); + STRH_S9_postindex(x1, xRDI, -2); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK2(xRCX); + // done + } else { + INST_NAME("MOVSW"); + GETDIR(x3, 2); + LDRH_U12(x1, xRSI, 0); + STRH_U12(x1, xRDI, 0); + ADDx_REG(xRSI, xRSI, x3); + ADDx_REG(xRDI, xRDI, x3); + } + break; + + case 0xA9: + INST_NAME("TEST AX,Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u16 = F16; + MOV32w(x2, u16); + UBFXx(x1, xRAX, 0, 16); + emit_test16(dyn, ninst, x1, x2, x3, x4, x5); + break; + + case 0xAB: + if(rep) { + INST_NAME("REP STOSW"); + CBZx_NEXT(xRCX); + TBNZ_MARK2(xFlags, F_DF); + MARK; // Part with DF==0 + STRH_S9_postindex(xRAX, xRDI, 2); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + STRH_S9_postindex(xRAX, xRDI, -2); + SUBx_U12(xRCX, xRCX, 1); + CBNZx_MARK2(xRCX); + // done + } else { + INST_NAME("STOSW"); + GETDIR(x3, 2); + STRH_U12(xRAX, xRDI, 0); + ADDx_REG(xRDI, xRDI, x3); + } + break; + + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + INST_NAME("MOV Reg16, Iw"); + u16 = F16; + MOV32w(x1, u16); + gd = xRAX+(opcode&7)+(rex.b<<3); + BFIx(gd, x1, 0, 16); + break; + + case 0xC1: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Ew, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEW(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rol16, x1, x3); + EWBACK; + break; + case 1: + INST_NAME("ROR Ew, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEW(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(ror16, x1, x3); + EWBACK; + break; + case 2: + INST_NAME("RCL Ew, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEW(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rcl16, x1, x3); + EWBACK; + break; + case 3: + INST_NAME("RCR Ew, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEW(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rcr16, x1, x3); + EWBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ew, Ib"); + UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} + SETFLAGS(X_ALL, SF_PENDING); + GETEW(x1, 1); + u8 = F8; + MOV32w(x2, (u8&0x1f)); + UFLAG_OP12(ed, x2) + LSLw_IMM(ed, ed, u8&0x1f); + EWBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_shl16); + break; + case 5: + INST_NAME("SHR Ed, Ib"); + UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} + SETFLAGS(X_ALL, SF_PENDING); + GETEW(x1, 1); + u8 = F8; + MOV32w(x2, (u8&0x1f)); + UFLAG_OP12(ed, x2) + LSRw_IMM(ed, ed, u8&0x1f); + EWBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_shr16); + break; + case 7: + INST_NAME("SAR Ed, Ib"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} + GETSEW(x1, 0); + u8 = F8; + MOV32w(x2, (u8&0x1f)); + UFLAG_OP12(ed, x2) + ASRw_REG(ed, ed, x2); + EWBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_sar16); + break; + } + break; + + case 0xC7: + INST_NAME("MOV Ew, Iw"); + nextop = F8; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + u16 = F16; + MOV32w(x1, u16); + BFIx(ed, x1, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 2); + u16 = F16; + MOV32w(x1, u16); + STRH_U12(x1, ed, fixedaddress); + } + break; + + case 0xD1: + case 0xD3: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + if(opcode==0xD1) { + INST_NAME("ROL Ew, 1"); + MOV32w(x2, 1); + } else { + INST_NAME("ROL Ew, CL"); + ANDSw_mask(x2, xRCX, 0, 0b00100); + } + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEW(x1, 0); + CALL_(rol16, x1, x3); + EWBACK; + break; + case 1: + if(opcode==0xD1) { + INST_NAME("ROR Ew, 1"); + MOV32w(x2, 1); + } else { + INST_NAME("ROR Ew, CL"); + ANDSw_mask(x2, xRCX, 0, 0b00100); + } + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEW(x1, 0); + CALL_(ror16, x1, x3); + EWBACK; + break; + case 2: + if(opcode==0xD1) {INST_NAME("RCL Ew, 1"); } else { INST_NAME("RCL Ew, CL");} + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + if(opcode==0xD1) {MOV32w(x2, 1);} else {ANDSw_mask(x2, xRCX, 0, 0b00100);} + GETEW(x1, 0); + CALL_(rcl16, x1, x3); + EWBACK; + break; + case 3: + if(opcode==0xD1) {INST_NAME("RCR Ew, 1");} else {INST_NAME("RCR Ew, CL");} + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + if(opcode==0xD1) {MOV32w(x2, 1);} else {ANDSw_mask(x2, xRCX, 0, 0b00100);} + GETEW(x1, 0); + CALL_(rcr16, x1, x3); + EWBACK; + break; + case 4: + case 6: + if(opcode==0xD1) { + INST_NAME("SHL Ew, 1"); + MOV32w(x4, 1); + } else { + INST_NAME("SHL Ew, CL"); + ANDSw_mask(x4, xRCX, 0, 0b00100); + } + UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} + SETFLAGS(X_ALL, SF_PENDING); + GETEW(x1, 0); + UFLAG_OP12(ed, x4) + LSLw_REG(ed, ed, x4); + EWBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_shl16); + break; + case 5: + if(opcode==0xD1) { + INST_NAME("SHR Ew, 1"); + MOV32w(x4, 1); + } else { + INST_NAME("SHR Ew, CL"); + ANDSw_mask(x4, xRCX, 0, 0b00100); + } + UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} + SETFLAGS(X_ALL, SF_PENDING); + GETEW(x1, 0); + UFLAG_OP12(ed, x4) + LSRw_REG(ed, ed, x4); + EWBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_shr16); + break; + case 7: + if(opcode==0xD1) { + INST_NAME("SAR Ew, 1"); + MOV32w(x4, 1); + } else { + INST_NAME("SAR Ew, CL"); + ANDSw_mask(x4, xRCX, 0, 0b00100); + } + UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} + SETFLAGS(X_ALL, SF_PENDING); + GETSEW(x1, 0); + UFLAG_OP12(ed, x4) + ASRw_REG(ed, ed, x4); + EWBACK; + UFLAG_RES(ed); + UFLAG_DF(x3, d_sar16); + break; + } + break; + + case 0xF7: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + case 1: + INST_NAME("TEST Ew, Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, 2); + u16 = F16; + MOV32w(x2, u16); + emit_test16(dyn, ninst, x1, x2, x3, x4, x5); + break; + case 2: + INST_NAME("NOT Ew"); + GETEW(x1, 0); + MVNw_REG(ed, ed); + EWBACK; + break; + case 3: + INST_NAME("NEG Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, 0); + emit_neg16(dyn, ninst, ed, x2, x4); + EWBACK; + break; + case 4: + INST_NAME("MUL AX, Ew"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x1, d_mul16); + GETEW(x1, 0); + UXTHw(x2, xRAX); + MULw(x1, x2, x1); + UFLAG_RES(x1); + BFIx(xRAX, x1, 0, 16); + BFXILx(xRDX, x1, 16, 16); + break; + case 5: + INST_NAME("IMUL AX, Ew"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x1, d_imul16); + GETSEW(x1, 0); + SXTHw(x2, xRAX); + MULw(x1, x2, x1); + UFLAG_RES(x1); + BFIx(xRAX, x1, 0, 16); + BFXILx(xRDX, x1, 16, 16); + break; + case 6: + INST_NAME("DIV Ew"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_ALL, SF_SET); + GETEW(x1, 0); + CALL(div16, -1); + break; + case 7: + INST_NAME("IDIV Ew"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_ALL, SF_SET); + GETEW(x1, 0); + CALL(idiv16, -1); + break; + } + break; + + case 0xFF: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + INST_NAME("INC Ew"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + GETEW(x1, 0); + emit_inc16(dyn, ninst, x1, x2, x4); + EWBACK; + break; + case 1: + INST_NAME("DEC Ew"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + GETEW(x1, 0); + emit_dec16(dyn, ninst, x1, x2, x4); + EWBACK; + break; + default: + DEFAULT; + } + break; + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c new file mode 100755 index 00000000..39143bf9 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -0,0 +1,1946 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +// Get EX as a quad +#define GETEX(a, D) \ + if(MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, D); \ + a = fpu_get_scratch(dyn); \ + VLDR128_U12(a, ed, fixedaddress); \ + } + +#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) + +#define GETGX(a) \ + gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg(dyn, ninst, x1, gd) + +#define GETGX_empty(a) \ + gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg_empty(dyn, ninst, x1, gd) + +uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop, u8; + int32_t i32; + uint8_t gd, ed; + uint8_t wback, wb1; + uint8_t eb1, eb2; + int64_t j64; + uint64_t tmp64u, tmp64u2; + int v0, v1; + int q0, q1; + int d0, d1; + int64_t fixedaddress; + + MAYUSE(d0); + MAYUSE(d1); + MAYUSE(q0); + MAYUSE(q1); + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(j64); + #if STEP > 1 + static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 }; + #endif + + switch(opcode) { + + case 0x10: + INST_NAME("MOVUPD Gx,Ex"); + nextop = F8; + GETG; + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VMOVQ(v0, v1); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VLDR128_U12(v0, ed, fixedaddress); + } + break; + case 0x11: + INST_NAME("MOVUPD Ex,Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVQ(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + } + break; + case 0x12: + INST_NAME("MOVLPD Gx, Eq"); + nextop = F8; + GETGX(v0); + if(MODREG) { + // access register instead of memory is bad opcode! + DEFAULT; + return addr; + } + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VLD1_64(v0, 0, ed); + break; + case 0x13: + INST_NAME("MOVLPD Eq, Gx"); + nextop = F8; + GETGX(v0); + if(MODREG) { + // access register instead of memory is bad opcode! + DEFAULT; + return addr; + } + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VST1_64(v0, 0, ed); + break; + case 0x14: + INST_NAME("UNPCKLPD Gx, Ex"); + nextop = F8; + GETGX(v0); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v0, 1, v1, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VLD1_64(v0, 1, ed); + } + break; + case 0x15: + INST_NAME("UNPCKHPD Gx, Ex"); + nextop = F8; + GETGX(v0); + VMOVeD(v0, 0, v0, 1); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v0, 1, v1, 1); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + v1 = fpu_get_scratch(dyn); + ADDx_U12(ed, ed, 8); + VLD1_64(v0, 1, ed); + } + break; + case 0x16: + INST_NAME("MOVHPD Gx, Eq"); + nextop = F8; + GETGX(v0); + if(MODREG) { + // access register instead of memory is bad opcode! + DEFAULT; + return addr; + } + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VLD1_64(v0, 1, ed); + break; + case 0x17: + INST_NAME("MOVHPD Eq, Gx"); + nextop = F8; + GETGX(v0); + if(MODREG) { + // access register instead of memory is bad opcode! + DEFAULT; + return addr; + } + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VST1_64(v0, 1, ed); + break; + + case 0x1F: + INST_NAME("NOP (multibyte)"); + nextop = F8; + FAKEED; + break; + + case 0x28: + INST_NAME("MOVAPD Gx,Ex"); + nextop = F8; + GETG; + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg(dyn, ninst, x1, ed); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VMOVQ(v0, v1); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VLDR128_U12(v0, ed, fixedaddress); + } + break; + case 0x29: + INST_NAME("MOVAPD Ex,Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VMOVQ(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + } + break; + + case 0x2E: + // no special check... + case 0x2F: + if(opcode==0x2F) {INST_NAME("COMISD Gx, Ex");} else {INST_NAME("UCOMISD Gx, Ex");} + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + FCMPD(v0, q0); + FCOMI(x1, x2); + break; + + case 0x38: // SSSE3 opcodes + nextop = F8; + switch(nextop) { + case 0x00: + INST_NAME("PSHUFB Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + d0 = fpu_get_scratch(dyn); + MOVIQ_8(d0, 0b10001111); + VANDQ(d0, d0, q1); // mask the index + VTBLQ1_8(q0, q0, d0); + break; + case 0x01: + INST_NAME("PHADDW Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + VADDPQ_16(q0, q0, q1); + break; + case 0x02: + INST_NAME("PHADDD Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + VADDPQ_32(q0, q0, q1); + break; + + case 0x04: + INST_NAME("PMADDUBSW Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + UXTL_8(v0, q0); // this is unsigned, so 0 extended + SXTL_8(v1, q1); // this is signed + VMULQ_16(v0, v0, v1); + SADDLPQ_16(v1, v0); + UXTL2_8(v0, q0); // this is unsigned + SQXTN_16(q0, v1); // SQXTN reset the vector so need to grab the high part first + SXTL2_8(v1, q1); // this is signed + VMULQ_16(v0, v0, v1); + SADDLPQ_16(v0, v0); + SQXTN2_16(q0, v0); + break; + + case 0x08: + INST_NAME("PSIGNB Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v1 = fpu_get_scratch(dyn); + v0 = fpu_get_scratch(dyn); + NEGQ_8(v0, q0); // get NEG + CMLTQ_0_8(v1, q1); // calculate mask + VBICQ(q0, q0, v1); // apply not mask on dest + VANDQ(v0, v0, v1); // apply mask on src + VORRQ(q0, q0, v0); // merge + CMEQQ_0_8(v1, q1); // handle case where Ex is 0 + VBICQ(q0, q0, v1); + break; + case 0x09: + INST_NAME("PSIGNW Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v1 = fpu_get_scratch(dyn); + v0 = fpu_get_scratch(dyn); + NEGQ_16(v0, q0); // get NEG + CMLTQ_0_16(v1, q1); // calculate mask + VBICQ(q0, q0, v1); // apply not mask on dest + VANDQ(v0, v0, v1); // apply mask on src + VORRQ(q0, q0, v0); // merge + CMEQQ_0_16(v1, q1); // handle case where Ex is 0 + VBICQ(q0, q0, v1); + break; + case 0x0A: + INST_NAME("PSIGND Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v1 = fpu_get_scratch(dyn); + v0 = fpu_get_scratch(dyn); + NEGQ_32(v0, q0); // get NEG + CMLTQ_0_32(v1, q1); // calculate mask + VBICQ(q0, q0, v1); // apply not mask on dest + VANDQ(v0, v0, v1); // apply mask on src + VORRQ(q0, q0, v0); // merge + CMEQQ_0_32(v1, q1); // handle case where Ex is 0 + VBICQ(q0, q0, v1); + break; + case 0x0B: + INST_NAME("PMULHRSW Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + SQRDMULHQ_16(q0, q0, q1); + break; + + case 0x1C: + INST_NAME("PABSB Gx,Ex"); + nextop = F8; + GETEX(q1, 0); + GETG; + q0 = sse_get_reg_empty(dyn, ninst, x1, gd); + ABSQ_8(q0, q1); + break; + case 0x1D: + INST_NAME("PABSW Gx,Ex"); + nextop = F8; + GETEX(q1, 0); + GETG; + q0 = sse_get_reg_empty(dyn, ninst, x1, gd); + ABSQ_16(q0, q1); + break; + case 0x1E: + INST_NAME("PABSD Gx,Ex"); + nextop = F8; + GETEX(q1, 0); + GETG; + q0 = sse_get_reg_empty(dyn, ninst, x1, gd); + ABSQ_32(q0, q1); + break; + + case 0x20: + INST_NAME("PMOVSXBW Gx, Ex"); // SSE4 opcode! + nextop = F8; + GETEX(q1, 0); + GETGX_empty(q0); + SXTL_8(q0, q1); // 8bits->16bits + break; + case 0x21: + INST_NAME("PMOVSXBD Gx, Ex"); // SSE4 opcode! + nextop = F8; + GETEX(q1, 0); + GETGX_empty(q0); + SXTL_8(q0, q1); // 8bits->16bits + SXTL_16(q0, q0); //16bits->32bits + break; + case 0x22: + INST_NAME("PMOVSXBQ Gx, Ex"); // SSE4 opcode! + nextop = F8; + GETEX(q1, 0); + GETGX_empty(q0); + SXTL_8(q0, q1); // 8bits->16bits + SXTL_16(q0, q0); //16bits->32bits + SXTL_32(q0, q0); //32bits->64bits + break; + case 0x23: + INST_NAME("PMOVSXWD Gx, Ex"); // SSE4 opcode! + nextop = F8; + GETEX(q1, 0); + GETGX_empty(q0); + SXTL_16(q0, q1); // 16bits->32bits + break; + case 0x24: + INST_NAME("PMOVSXWQ Gx, Ex"); // SSE4 opcode! + nextop = F8; + GETEX(q1, 0); + GETGX_empty(q0); + SXTL_16(q0, q1); // 16bits->32bits + SXTL_32(q0, q1); // 32bits->64bits + break; + case 0x25: + INST_NAME("PMOVSXDQ Gx, Ex"); // SSE4 opcode! + nextop = F8; + GETEX(q1, 0); + GETGX_empty(q0); + SXTL_32(q0, q1); // 32bits->64bits + break; + + case 0x39: + INST_NAME("PMINSD Gx, Ex"); // SSE4 opcode! + nextop = F8; + GETEX(q1, 0); + GETGX(q0); + SMINQ_32(q0, q0, q1); + break; + + case 0x3D: + INST_NAME("PMINSD Gx, Ex"); // SSE4 opcode! + nextop = F8; + GETEX(q1, 0); + GETGX(q0); + SMAXQ_32(q0, q0, q1); + break; + + case 0xDB: + INST_NAME("AESIMC Gx, Ex"); // AES-NI + nextop = F8; + if(arm64_aes) { + GETEX(q1, 0); + GETGX_empty(q0); + AESIMC(q0, q1); + } else { + GETEX(q1, 0); + GETGX_empty(q0); + if(q0!=q1) { + VMOVQ(q0, q1); + } + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(arm_aesimc, -1); + } + break; + case 0xDC: + INST_NAME("AESENC Gx, Ex"); // AES-NI + nextop = F8; + if(arm64_aes) { + GETEX(q1, 0); + GETGX(q0); + v0 = fpu_get_scratch(dyn); // ARM64 internal operation differs a bit from x86_64 + VEORQ(v0, q0, q1); + AESE(v0, q1); + AESMC(v0, v0); + VEORQ(q0, v0, q1); + } else { + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(arm_aese, -1); + GETGX(q0); + GETEX(q1, 0); + VEORQ(q0, q0, q1); + } + break; + case 0xDD: + INST_NAME("AESENCLAST Gx, Ex"); // AES-NI + nextop = F8; + if(arm64_aes) { + GETEX(q1, 0); + GETGX(q0); + v0 = fpu_get_scratch(dyn); // ARM64 internal operation differs a bit from x86_64 + VEORQ(v0, q0, q1); + AESE(v0, q1); + VEORQ(q0, v0, q1); + } else { + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(arm_aeselast, -1); + GETGX(q0); + GETEX(q1, 0); + VEORQ(q0, q0, q1); + } + break; + case 0xDE: + INST_NAME("AESDEC Gx, Ex"); // AES-NI + nextop = F8; + if(arm64_aes) { + GETEX(q1, 0); + GETGX(q0); + v0 = fpu_get_scratch(dyn); // ARM64 internal operation differs a bit from x86_64 + VEORQ(v0, q0, q1); + AESD(v0, q1); + AESIMC(v0, v0); + VEORQ(q0, v0, q1); + } else { + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(arm_aesd, -1); + GETGX(q0); + GETEX(q1, 0); + VEORQ(q0, q0, q1); + } + break; + case 0xDF: + INST_NAME("AESDECLAST Gx, Ex"); // AES-NI + nextop = F8; + if(arm64_aes) { + GETEX(q1, 0); + GETGX(q0); + v0 = fpu_get_scratch(dyn); // ARM64 internal operation differs a bit from x86_64 + VEORQ(v0, q0, q1); + AESD(v0, q1); + VEORQ(q0, v0, q1); + } else { + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(arm_aesdlast, -1); + GETGX(q0); + GETEX(q1, 0); + VEORQ(q0, q0, q1); + } + break; + + default: + DEFAULT; + } + break; + + case 0x3A: // these are some more SSSE3 opcodes + opcode = F8; + switch(opcode) { + case 0x0B: + INST_NAME("ROUNDSD Gx, Ex, Ib"); + nextop = F8; + GETGX(q0); + GETEX(q1, 1); + u8 = F8; + v1 = fpu_get_scratch(dyn); + if(u8&4) { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + FRINTXD(v1, q1); + x87_restoreround(dyn, ninst, u8); + } else { + const uint8_t rounds[] = {0, 2, 1, 3}; + MAYUSE(rounds); + FRINTRRD(v1, q1, rounds[u8&3]); + } + VMOVeD(q0, 0, v1, 0); + break; + + case 0x0F: + INST_NAME("PALIGNR Gx, Ex, Ib"); + nextop = F8; + GETGX(q0); + GETEX(q1, 1); + u8 = F8; + if(u8>31) { + VEORQ(q0, q0, q0); + } else if(u8>15) { + d0 = fpu_get_scratch(dyn); + VEORQ(d0, d0, d0); + VEXTQ_8(q0, q0, d0, u8-16); + } else { + VEXTQ_8(q0, q1, q0, u8); + } + break; + + case 0x22: + INST_NAME("PINSRD Gx, ED, Ib"); + nextop = F8; + GETGX(q0); + GETED(1); + u8 = F8; + if(rex.w) { + VMOVQDfrom(q0, (u8&1), ed); + } else { + VMOVQSfrom(q0, (u8&3), ed); + } + break; + + default: + DEFAULT; + } + break; + + #define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + GETFLAGS; \ + nextop=F8; \ + GETGD; \ + if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); \ + LDRH_U12(x1, ed, fixedaddress); \ + ed = x1; \ + } \ + Bcond(NO, +8); \ + BFIx(gd, ed, 0, 16); + + GOCOND(0x40, "CMOV", "Gw, Ew"); + #undef GO + + case 0x50: + nextop = F8; + INST_NAME("PMOVMSKD Gd, Ex"); + GETEX(q0, 0); + GETGD; + VMOVQDto(x1, q0, 1); + VMOVQDto(gd, q0, 0); + LSRx(x1, x1, 63); + LSRx(gd, gd, 63); + BFIx(gd, x1, 1, 1); + break; + + case 0x54: + INST_NAME("ANDPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VANDQ(v0, v0, q0); + break; + case 0x55: + INST_NAME("ANDNPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VBICQ(v0, q0, v0); + break; + case 0x56: + INST_NAME("ORPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VORRQ(v0, v0, q0); + break; + case 0x57: + INST_NAME("XORPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VEORQ(v0, v0, q0); + break; + case 0x58: + INST_NAME("ADDPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFADDQD(v0, v0, q0); + break; + case 0x59: + INST_NAME("MULPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFMULQD(v0, v0, q0); + break; + case 0x5A: + INST_NAME("CVTPD2PS Gx, Ex"); + nextop = F8; + GETEX(v1, 0); + GETGX_empty(v0); + FCVTXN(v0, v1); + break; + case 0x5B: + INST_NAME("CVTPS2DQ Gx, Ex"); + nextop = F8; + GETEX(v1, 0); + GETGX_empty(v0); + #ifdef PRECISE_CVT + LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr)); + UBFXx(x1, x1, 13, 2); // extract round requested + LSLx_REG(x1, x1, 3); + // Construct a "switch case", with each case 2 instructions, so 8 bytes + ADR(xLR, GETMARK); + ADDx_REG(xLR, xLR, x1); + B(xLR); + MARK; + VFCVTNSQS(v0, v1); // 0: Nearest (even) + B_NEXT_nocond; + VFCVTMSQS(v0, v1); // 1: Toward -inf + B_NEXT_nocond; + VFCVTPSQS(v0, v1); // 2: Toward +inf + B_NEXT_nocond; + VFCVTZSQS(v0, v1); // 3: Toward 0 + #else + VFCVTNSQS(v0, v1); + #endif + break; + case 0x5C: + INST_NAME("SUBPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFSUBQD(v0, v0, q0); + break; + case 0x5D: + INST_NAME("MINPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFMINQD(v0, v0, q0); + break; + case 0x5E: + INST_NAME("DIVPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFDIVQD(v0, v0, q0); + break; + case 0x5F: + INST_NAME("MAXPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFMAXQD(v0, v0, q0); + break; + case 0x60: + INST_NAME("PUNPCKLBW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VZIP1Q_8(v0, v0, q0); + break; + case 0x61: + INST_NAME("PUNPCKLWD Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VZIP1Q_16(v0, v0, q0); + break; + case 0x62: + INST_NAME("PUNPCKLDQ Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VZIP1Q_32(v0, v0, q0); + break; + case 0x63: + INST_NAME("PACKSSWB Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + SQXTN_8(q0, q0); + if(q0==q1) { + VMOVeD(q0, 1, q0, 0); + } else { + SQXTN2_8(q0, q1); + } + break; + case 0x64: + INST_NAME("PCMPGTB Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + VCMGTQ_8(v0, v0, v1); + break; + case 0x65: + INST_NAME("PCMPGTW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + VCMGTQ_16(v0, v0, v1); + break; + case 0x66: + INST_NAME("PCMPGTD Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + VCMGTQ_32(v0, v0, v1); + break; + case 0x67: + INST_NAME("PACKUSWB Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + SQXTUN_8(v0, v0); + if(v0==v1) { + VMOVeD(v0, 1, v0, 0); + } else { + SQXTUN2_8(v0, v1); + } + break; + case 0x68: + INST_NAME("PUNPCKHBW Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 1); + VZIP2Q_8(q0, q0, q1); + break; + case 0x69: + INST_NAME("PUNPCKHWD Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 1); + VZIP2Q_16(q0, q0, q1); + break; + case 0x6A: + INST_NAME("PUNPCKHDQ Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 1); + VZIP2Q_32(q0, q0, q1); + break; + case 0x6B: + INST_NAME("PACKSSDW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + SQXTN_16(v0, v0); + if(v0==v1) { + VMOVeD(v0, 1, v0, 0); + } else { + SQXTN2_16(v0, v1); + } + break; + case 0x6C: + INST_NAME("PUNPCKLQDQ Gx,Ex"); + nextop = F8; + GETGX(v0); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v0, 1, v1, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VLD1_64(v0, 1, ed); + } + break; + case 0x6D: + INST_NAME("PUNPCKHQDQ Gx,Ex"); + nextop = F8; + GETGX(v0); + VMOVeD(v0, 0, v0, 1); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v0, 1, v1, 1); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + ADDSx_U12(x1, ed, 8); + VLD1_64(v0, 1, x1); + } + break; + case 0x6E: + INST_NAME("MOVD Gx, Ed"); + nextop = F8; + GETG; + GETED(0); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + if(rex.w) { + FMOVDx(v0, ed); + } else { + VEORQ(v0, v0, v0); // RAZ vector + VMOVQSfrom(v0, 0, ed); + } + break; + case 0x6F: + INST_NAME("MOVDQA Gx,Ex"); + nextop = F8; + GETG; + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVQ(v0, v1); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VLDR128_U12(v0, ed, fixedaddress); + } + break; + case 0x70: + INST_NAME("PSHUFD Gx,Ex,Ib"); + nextop = F8; + GETG; + i32 = -1; + if(MODREG) { + u8 = F8; + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + if(u8==0x4E) { + if(v0==v1) { + VEXTQ_8(v0, v0, v0, 8); // Swap Up/Lower 64bits parts + } else { + VMOVeD(v0, 0, v1, 1); + VMOVeD(v0, 1, v1, 0); + } + } else if(u8==0x00) { + // duplicate lower 32bits to all spot + if(v0!=v1) { + VMOVeS(v0, 0, v1, 0); + } + VMOVeS(v0, 1, v1, 0); + VMOVeD(v0, 1, v0, 0); + } else if(u8==0x55) { + // duplicate slot 1 to all spot + if(v0!=v1) { + VMOVeS(v0, 1, v1, 1); + } + VMOVeS(v0, 0, v1, 1); + VMOVeD(v0, 1, v0, 0); + } else if(u8==0xAA) { + // duplicate slot 2 to all spot + if(v0!=v1) { + VMOVeS(v0, 2, v1, 2); + } + VMOVeS(v0, 3, v1, 2); + VMOVeD(v0, 0, v0, 1); + } else if(u8==0xFF) { + // duplicate slot 3 to all spot + if(v0!=v1) { + VMOVeS(v0, 3, v1, 3); + } + VMOVeS(v0, 2, v1, 3); + VMOVeD(v0, 0, v0, 1); + } else if(v0!=v1) { + VMOVeS(v0, 0, v1, (u8>>(0*2))&3); + VMOVeS(v0, 1, v1, (u8>>(1*2))&3); + VMOVeS(v0, 2, v1, (u8>>(2*2))&3); + VMOVeS(v0, 3, v1, (u8>>(3*2))&3); + } else { + uint64_t swp[4] = { + (0)|(1<<8)|(2<<16)|(3<<24), + (4)|(5<<8)|(6<<16)|(7<<24), + (8)|(9<<8)|(10<<16)|(11<<24), + (12)|(13<<8)|(14<<16)|(15<<24) + }; + d0 = fpu_get_scratch(dyn); + tmp64u = swp[(u8>>(0*2))&3] | (swp[(u8>>(1*2))&3]<<32); + MOV64x(x2, tmp64u); + VMOVQDfrom(d0, 0, x2); + tmp64u2 = swp[(u8>>(2*2))&3] | (swp[(u8>>(3*2))&3]<<32); + if(tmp64u2==tmp64u) { + VMOVQDfrom(d0, 1, x2); + } else { + MOV64x(x3, tmp64u2); + VMOVQDfrom(d0, 1, x3); + } + VTBLQ1_8(v0, v1, d0); + } + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + if (u8) { + for (int i=0; i<4; ++i) { + int32_t idx = (u8>>(i*2))&3; + if(idx!=i32) { + ADDx_U12(x2, ed, idx*4); + i32 = idx; + } + VLD1_32(v0, i, x2); + } + } else { + VLDQ1R_32(v0, ed); + } + } + break; + case 0x71: + nextop = F8; + switch((nextop>>3)&7) { + case 2: + INST_NAME("PSRLW Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8) { + if (u8>15) { + VEORQ(q0, q0, q0); + } else if(u8) { + VSHRQ_16(q0, q0, u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + } + break; + case 4: + INST_NAME("PSRAW Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8>15) u8=15; + if(u8) { + VSSHRQ_16(q0, q0, u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + break; + case 6: + INST_NAME("PSLLW Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8) { + if (u8>15) { + VEORQ(q0, q0, q0); + } else { + VSHLQ_16(q0, q0, u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + } + break; + default: + *ok = 0; + DEFAULT; + } + break; + case 0x72: + nextop = F8; + switch((nextop>>3)&7) { + case 2: + INST_NAME("PSRLD Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8) { + if (u8>31) { + VEORQ(q0, q0, q0); + } else if(u8) { + VSHRQ_32(q0, q0, u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + } + break; + case 4: + INST_NAME("PSRAD Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8>31) u8=31; + if(u8) { + VSSHRQ_32(q0, q0, u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + break; + case 6: + INST_NAME("PSLLD Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8) { + if (u8>31) { + VEORQ(q0, q0, q0); + } else { + VSHLQ_32(q0, q0, u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + } + break; + default: + DEFAULT; + } + break; + case 0x73: + nextop = F8; + switch((nextop>>3)&7) { + case 2: + INST_NAME("PSRLQ Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8) { + if (u8>63) { + VEORQ(q0, q0, q0); + } else if(u8) { + VSHRQ_64(q0, q0, u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + } + break; + case 3: + INST_NAME("PSRLDQ Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8) { + if(u8>15) { + VEORQ(q0, q0, q0); + } else { + q1 = fpu_get_scratch(dyn); + VEORQ(q1, q1, q1); + VEXTQ_8(q0, q0, q1, u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + } + break; + case 6: + INST_NAME("PSLLQ Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8) { + if (u8>63) { + VEORQ(q0, q0, q0); + } else { + VSHLQ_64(q0, q0, u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + } + break; + case 7: + INST_NAME("PSLLDQ Ex, Ib"); + GETEX(q0, 1); + u8 = F8; + if(u8) { + if(u8>15) { + VEORQ(q0, q0, q0); + } else if(u8>0) { + q1 = fpu_get_scratch(dyn); + VEORQ(q1, q1, q1); + VEXTQ_8(q0, q1, q0, 16-u8); + } + if(!MODREG) { + VSTR128_U12(q0, ed, fixedaddress); + } + } + break; + default: + DEFAULT; + } + break; + + case 0x74: + INST_NAME("PCMPEQB Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VCMEQQ_8(v0, v0, q0); + break; + case 0x75: + INST_NAME("PCMPEQW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VCMEQQ_16(v0, v0, q0); + break; + case 0x76: + INST_NAME("PCMPEQD Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VCMEQQ_32(v0, v0, q0); + break; + + case 0x7E: + INST_NAME("MOVD Ed,Gx"); + nextop = F8; + GETGX(v0); + if(rex.w) { + if(MODREG) { + ed = xRAX + (nextop&7) + (rex.b<<3); + VMOVQDto(ed, v0, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VSTR64_U12(v0, ed, fixedaddress); + } + } else { + if(MODREG) { + ed = xRAX + (nextop&7) + (rex.b<<3); + VMOVSto(ed, v0, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VSTR32_U12(v0, ed, fixedaddress); + } + } + break; + case 0x7F: + INST_NAME("MOVDQA Ex,Gx"); + nextop = F8; + GETGX(v0); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVQ(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + } + break; + + case 0xA3: + INST_NAME("BT Ew, Gw"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGD + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, 0, 0); + SBFXw(x1, gd, 4, 12); // r1 = (gw>>4) + ADDx_REG_LSL(x3, wback, x1, 1); //(&ed)+=r1*2; + LDRH_U12(x1, x3, fixedaddress); + ed = x1; + } + ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f + LSRw_REG(x1, ed, x2); + BFIw(xFlags, x1, F_CF, 1); + break; + case 0xA4: + case 0xA5: + nextop = F8; + if(opcode==0xA4) { + INST_NAME("SHLD Ew, Gw, Ib"); + } else { + INST_NAME("SHLD Ew, Gw, CL"); + UXTBw(x3, xRCX); + } + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_ALL, SF_SET); + GETEWW(x4, x1, (opcode==0xA4)?1:0); + GETGW(x2); + if(opcode==0xA4) { + u8 = F8; + MOV32w(x3, u8); + } + CALL_(shld16, x1, wback); + EWBACKW(x1); + break; + + case 0xAB: + INST_NAME("BTS Ew, Gw"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGD + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, 0, 0); + SBFXw(x4, gd, 4, 12); // r1 = (gw>>4) + ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2; + LDRH_U12(x4, x3, fixedaddress); + ed = x4; + } + ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f + LSRw_REG(x1, ed, x2); + BFIw(xFlags, x1, F_CF, 1); + ANDSw_mask(x1, x1, 0, 0); //mask=1 + B_NEXT(cNE); + MOV32w(x1, 1); + LSLxw_REG(x1, x1, x2); + EORx_REG(ed, ed, x1); + if(wback) { + STRH_U12(ed, wback, fixedaddress); + } + break; + case 0xAC: + case 0xAD: + nextop = F8; + if(opcode==0xAC) { + INST_NAME("SHRD Ew, Gw, Ib"); + } else { + INST_NAME("SHRD Ew, Gw, CL"); + UXTBw(x3, xRCX); + } + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_ALL, SF_SET); + GETEWW(x4, x1, (opcode==0xAC)?1:0); + GETGW(x2); + if(opcode==0xAC) { + u8 = F8; + MOV32w(x3, u8); + } + CALL_(shrd16, x1, wback); + EWBACKW(x1); + break; + + case 0xAF: + INST_NAME("IMUL Gw,Ew"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + UFLAG_DF(x1, d_imul16); + GETSEW(x1, 0); + GETSGW(x2); + MULw(x2, x2, x1); + UFLAG_RES(x2); + GWBACK; + break; + + case 0xB3: + INST_NAME("BTR Ew, Gw"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGD + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, 0, 0); + SBFXw(x4, gd, 4, 12); // r1 = (gw>>4) + ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2; + LDRH_U12(x4, x3, fixedaddress); + wback = x3; + ed = x4; + } + ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f + LSRw_REG(x1, ed, x2); + BFIw(xFlags, x1, F_CF, 1); + ANDSw_mask(x1, x1, 0, 0); //mask=1 + B_NEXT(cEQ); + MOV32w(x1, 1); + LSLxw_REG(x1, x1, x2); + EORx_REG(ed, ed, x1); + if(wback) { + STRH_U12(ed, wback, fixedaddress); + } + break; + + case 0xB6: + INST_NAME("MOVZX Gw, Eb"); + nextop = F8; + if(MODREG) { + if(rex.rex) { + eb1 = xRAX+(nextop&7)+(rex.b<<3); + eb2 = 0; \ + } else { + ed = (nextop&7); + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = (ed&4)>>2; // L or H + } + UBFXxw(x1, eb1, eb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); + LDRB_U12(x1, ed, fixedaddress); + } + gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGW + BFIx(gd, x1, 0, 16); // insert in Gw + break; + case 0xB7: + INST_NAME("MOVZX Gw, Ew"); + nextop = F8; + if(MODREG) { + eb1 = xRAX+(nextop&7)+(rex.b<<3); + UBFXxw(x1, eb1, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff>>1, 1, rex, 0, 0); + LDRH_U12(x1, ed, fixedaddress); + } + gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGW + BFIx(gd, x1, 0, 16); // insert in Gw + break; + + + case 0xBB: + INST_NAME("BTC Ew, Gw"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGD + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, 0, 0); + SBFXw(x4, gd, 4, 12); // r1 = (gw>>4) + ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2; + LDRH_U12(x4, x3, fixedaddress); + wback = x3; + ed = x4; + } + ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f + LSRw_REG(x1, ed, x2); + BFIw(xFlags, x1, F_CF, 1); + ANDw_mask(x1, x1, 0, 0); //mask=1 + MOV32w(x1, 1); + LSLxw_REG(x1, x1, x2); + EORx_REG(ed, ed, x1); + if(wback) { + STRH_U12(ed, wback, fixedaddress); + } + break; + case 0xBC: + INST_NAME("BSF Ew,Gw"); + SETFLAGS(X_ZF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETGD; + GETEW(x1, 0); // Get EW + TSTw_REG(x1, x1); + B_MARK(cEQ); + RBITw(x1, x1); // reverse + CLZw(x2, x1); // x2 gets leading 0 == BSF + BFIw(gd, x2, 0, 16); + MARK; + CSETw(x1, cEQ); //ZF not set + BFIw(xFlags, x1, F_ZF, 1); + break; + case 0xBD: + INST_NAME("BSR Ew,Gw"); + SETFLAGS(X_ZF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETGD; + GETEW(x1, 0); // Get EW + TSTw_REG(x1, x1); // Don't use CBZ here, as the flag is reused later + B_MARK(cEQ); + LSLw(x1, x1, 16); // put bits on top + CLZw(x2, x1); // x2 gets leading 0 + SUBw_U12(x2, x2, 15); + NEGw_REG(x2, x2); // complement + BFIx(gd, x2, 0, 16); + MARK; + CSETw(x1, cEQ); //ZF not set + BFIw(xFlags, x1, F_ZF, 1); + break; + case 0xBE: + INST_NAME("MOVSX Gw, Eb"); + nextop = F8; + GETGD; + if(MODREG) { + if(rex.rex) { + ed = xRAX+(nextop&7)+(rex.b<<3); + eb1=ed; + eb2=0; + } else { + ed = (nextop&7); + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = (ed&4)>>2; // L or H + } + SBFXw(x1, eb1, eb2, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); + LDRSBw_U12(x1, ed, fixedaddress); + } + BFIx(gd, x1, 0, 16); + break; + + case 0xC2: + INST_NAME("CMPPD Gx, Ex, Ib"); + nextop = F8; + GETGX(v0); + GETEX(v1, 1); + u8 = F8; + switch(u8&7) { + // the inversion of the params in the comparison is there to handle NaN the same way SSE does + case 0: FCMEQQD(v0, v0, v1); break; // Equal + case 1: FCMGTQD(v0, v1, v0); break; // Less than + case 2: FCMGEQD(v0, v1, v0); break; // Less or equal + case 3: FCMEQQD(v0, v0, v0); + if(v0!=v1) { + q0 = fpu_get_scratch(dyn); + FCMEQQD(q0, v1, v1); + VANDQ(v0, v0, q0); + } + VMVNQ(v0, v0); + break; // NaN (NaN is not equal to himself) + case 4: FCMEQQD(v0, v0, v1); VMVNQ(v0, v0); break; // Not Equal (or unordered on ARM, not on X86...) + case 5: FCMGTQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or equal or unordered + case 6: FCMGEQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or unordered + case 7: FCMEQQD(v0, v0, v0); + if(v0!=v1) { + q0 = fpu_get_scratch(dyn); + FCMEQQD(q0, v1, v1); + VANDQ(v0, v0, q0); + } + break; // not NaN + } + break; + + case 0xC4: + INST_NAME("PINSRW Gx,Ed,Ib"); + nextop = F8; + GETGX(v0); + if(MODREG) { + u8 = (F8)&7; + ed = xRAX+(nextop&7)+(rex.b<<3); + VMOVQHfrom(v0, u8, ed); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 1); + u8 = (F8)&7; + VLD1_16(v0, u8, wback); + } + break; + case 0xC5: + INST_NAME("PEXTRW Gd,Ex,Ib"); + nextop = F8; + GETGD; + if(MODREG) { + GETEX(v0, 1); + u8 = (F8)&7; + VMOVHto(gd, v0, u8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 1); + u8 = (F8)&7; + LDRH_U12(gd, wback, u8*2); + } + break; + case 0xC6: + INST_NAME("SHUFPD Gx, Ex, Ib"); + nextop = F8; + GETGX(v0); + GETEX(v1, 1); + u8 = F8; + if(v0==v1 && u8==0) { + VMOVeD(v0, 1, v0, 0); + } else { + if(v0==v1) + q0 = fpu_get_scratch(dyn); + else + q0 = v0; + VMOVeD(q0, 0, v0, (u8&1)); + VMOVeD(q0, 1, v1, ((u8>>1)&1)); + if(v0==v1) { + VMOVQ(v0, q0); + } + } + break; + + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: /* BSWAP reg */ + INST_NAME("BSWAP Reg"); + gd = xRAX+(opcode&7)+(rex.b<<3); + if(rex.w) { + REV64x(gd, gd); + } else { + REV16w(x1, gd); + BFIx(gd, x1, 0, 16); + } + break; + + case 0xD1: + INST_NAME("PSRLW Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + VDUPQ_16(v0, q1, 0); + NEGQ_16(v0, v0); // neg, because SHR + USHLQ_16(q0, q0, v0); // SHR x8 + break; + case 0xD2: + INST_NAME("PSRLD Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + VDUPQ_32(v0, q1, 0); + NEGQ_32(v0, v0); // neg, because SHR + USHLQ_32(q0, q0, v0); // SHR x4 + break; + case 0xD3: + INST_NAME("PSRLQ Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + NEG_64(v0, q1); + VMOVeD(v0, 1, v0, 0); + USHLQ_64(q0, q0, v0); + break; + case 0xD4: + INST_NAME("PADDQ Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VADDQ_64(v0, v0, q0); + break; + case 0xD5: + INST_NAME("PMULLW Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + VMULQ_16(q0, q0, q1); + break; + case 0xD6: + INST_NAME("MOVQ Ex, Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + FMOVD(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VSTR64_U12(v0, ed, fixedaddress); + } + break; + case 0xD7: + nextop = F8; + INST_NAME("PMOVMSKB Gd, Ex"); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + GETEX(q0, 0); + GETGD; + TABLE64(x1, (uintptr_t)&mask_shift8); + VLDR64_U12(v0, x1, 0); // load shift + MOVI_8(v1, 0x80); // load mask + VAND(q1, v1, q0); + USHL_8(q1, q1, v0); // shift + UADDLV_8(q1, q1); // accumalte + VMOVBto(gd, q1, 0); + // and now the high part + VMOVeD(q1, 0, q0, 1); + VAND(q1, v1, q1); // keep highest bit + USHL_8(q1, q1, v0); // shift + UADDLV_8(q1, q1); // accumalte + VMOVBto(x1, q1, 0); + BFIx(gd, x1, 8, 8); + break; + case 0xD8: + INST_NAME("PSUBUSB Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + UQSUBQ_8(q0, q0, q1); + break; + case 0xD9: + INST_NAME("PSUBUSW Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + UQSUBQ_16(q0, q0, q1); + break; + case 0xDA: + INST_NAME("PMINUB Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1,0); + UMINQ_8(q0, q0, q1); + break; + case 0xDB: + INST_NAME("PAND Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VANDQ(v0, v0, q0); + break; + case 0xDC: + INST_NAME("PADDUSB Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + UQADDQ_8(q0, q0, q1); + break; + case 0xDD: + INST_NAME("PADDUSW Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + UQADDQ_16(q0, q0, q1); + break; + case 0xDE: + INST_NAME("PMAXUB Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + UMAXQ_8(q0, q0, q1); + break; + case 0xDF: + INST_NAME("PANDN Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VBICQ(v0, q0, v0); + break; + + case 0xE0: + INST_NAME("PAVGB Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + URHADDQ_8(v0, v0, v1); + break; + + case 0xE1: + INST_NAME("PSRAW Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + VMOVeD(v0, 0, q1, 0); + VMOVeD(v0, 1, q1, 0); + SQXTN_32(v0, v0); // 2*q1 in 32bits now + NEG_32(v0, v0); // because we want SHR and not SHL + VMOVeD(v0, 1, v0, 0); + SQXTN_16(v0, v0); // 4*q1 in 32bits now + VMOVeD(v0, 1, v0, 0); + SSHLQ_16(q0, q0, v0); + break; + case 0xE2: + INST_NAME("PSRAD Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + VMOVeD(v0, 0, q1, 0); + VMOVeD(v0, 1, q1, 0); + SQXTN_32(v0, v0); // 2*q1 in 32bits now + NEG_32(v0, v0); // because we want SHR and not SHL + VMOVeD(v0, 1, v0, 0); + SSHLQ_32(q0, q0, v0); + break; + case 0xE3: + INST_NAME("PAVGW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + URHADDQ_16(v0, v0, q0); + break; + case 0xE4: + INST_NAME("PMULHUW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + VUMULL_16(q0, v0, v1); + VUMULL2_16(q1, v0, v1); + UQSHRN_16(v0, q0, 16); + UQSHRN2_16(v0, q1, 16); + break; + case 0xE5: + INST_NAME("PMULHW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + VSMULL_16(q0, v0, v1); + VSMULL2_16(q1, v0, v1); + SQSHRN_16(v0, q0, 16); + SQSHRN2_16(v0, q1, 16); + break; + case 0xE6: + INST_NAME("CVTTPD2DQ Gx, Ex"); + nextop = F8; + GETEX(v1, 0); + GETGX_empty(v0); + VFCVTNSQD(v0, v1); // convert double -> int64 + SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part + break; + case 0xE7: + INST_NAME("MOVNTDQ Ex, Gx"); + nextop = F8; + GETGX(v0); + if(MODREG) { + v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVQ(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + } + break; + case 0xE8: + INST_NAME("PSUBSB Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + SQSUBQ_8(v0, v0, q0); + break; + case 0xE9: + INST_NAME("PSUBSW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + SQSUBQ_16(v0, v0, q0); + break; + case 0xEA: + INST_NAME("PMINSW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + SMINQ_16(v0, v0, q0); + break; + case 0xEB: + INST_NAME("POR Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VORRQ(v0, v0, q0); + break; + case 0xEC: + INST_NAME("PADDSB Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + SQADDQ_8(v0, v0, q0); + break; + case 0xED: + INST_NAME("PADDSW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + SQADDQ_16(v0, v0, q0); + break; + case 0xEE: + INST_NAME("PMAXSW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + SMAXQ_16(v0, v0, q0); + break; + case 0xEF: + INST_NAME("PXOR Gx,Ex"); + nextop = F8; + GETG; + if(MODREG && ((nextop&7)+(rex.b<<3)==gd)) { + // special case for PXOR Gx, Gx + q0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VEORQ(q0, q0, q0); + } else { + q0 = sse_get_reg(dyn, ninst, x1, gd); + GETEX(q1, 0); + VEORQ(q0, q0, q1); + } + break; + + case 0xF2: + INST_NAME("PSLLD Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + VMOVeD(v0, 0, q1, 0); + VMOVeD(v0, 1, q1, 0); + SQXTN_32(v0, v0); // 2*q1 in 32bits now + VMOVeD(v0, 1, v0, 0); + SSHLQ_32(q0, q0, v0); + break; + case 0xF3: + INST_NAME("PSLLQ Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + VMOVQ(v0, q1); + VMOVeD(v0, 1, v0, 0); + USHLQ_64(q0, q0, v0); + break; + case 0xF4: + INST_NAME("PMULUDQ Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + q0 = fpu_get_scratch(dyn); + VUZP1Q_32(q0, v0, v0); //A3 A2 A1 A0 -> A3 A1 A2 A0 + if(MODREG) { + q1 = fpu_get_scratch(dyn); + } else { + q1 = v1; + } + VUZP1Q_32(q1, v1, v1); + VUMULL_32(v0, q0, q1); + break; + case 0xF5: + INST_NAME("PMADDWD Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + VSMULL_16(q0, v0, v1); + VSMULL2_16(q1, v0, v1); + VADDPQ_32(v0, q0, q1); + break; + case 0xF6: + INST_NAME("PSADBW Gx, Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VEOR(d1, d1, d1); // is it necessary? + UABDL_8(d0, q0, q1); + UADDLVQ_16(d1, d0); + VMOVeD(q0, 0, d1, 0); + UABDL2_8(d0, q0, q1); + UADDLVQ_16(d1, d0); + VMOVeD(q0, 1, d1, 0); + break; + case 0xF7: + INST_NAME("MASKMOVDQU Gx, Ex") + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + VLDR128_U12(v0, xRDI, 0); + if(MODREG) + v1 = fpu_get_scratch(dyn); // need to preserve the register + else + v1 = q1; + VSSHRQ_8(v1, q1, 7); // get the mask + VBICQ(v0, v0, v1); // mask destination + VANDQ(v1, q0, v1); // mask source + VORRQ(v1, v1, v0); // combine + VSTR128_U12(v1, xRDI, 0); // put back + break; + case 0xF8: + INST_NAME("PSUBB Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VSUBQ_8(v0, v0, q0); + break; + case 0xF9: + INST_NAME("PSUBW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VSUBQ_16(v0, v0, q0); + break; + case 0xFA: + INST_NAME("PSUBD Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VSUBQ_32(v0, v0, q0); + break; + case 0xFB: + INST_NAME("PSUBQ Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VSUBQ_64(v0, v0, q0); + break; + case 0xFC: + INST_NAME("PADDB Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VADDQ_8(v0, v0, q0); + break; + case 0xFD: + INST_NAME("PADDW Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VADDQ_16(v0, v0, q0); + break; + case 0xFE: + INST_NAME("PADDD Gx,Ex"); + nextop = F8; + GETGX(v0); + GETEX(q0, 0); + VADDQ_32(v0, v0, q0); + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_6664.c b/src/dynarec/arm64/dynarec_arm64_6664.c new file mode 100644 index 00000000..422c673b --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_6664.c @@ -0,0 +1,129 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + +#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) + +uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop; + uint8_t gd, ed; + int v0, v1; + int64_t fixedaddress; + + // REX prefix before the 66 are ignored + rex.rex = 0; + while(opcode>=0x40 && opcode<=0x4f) { + rex.rex = opcode; + opcode = F8; + } + + /*if(rex.w && opcode!=0x0f) { // rex.w cancels "66", but not for 66 0f type of prefix + MESSAGE(LOG_DUMP, "Here!\n"); + return dynarec64_64(dyn, addr-2, ip, ninst, rex, rep, ok, need_epilog); + }*/ + + switch(opcode) { + + case 0x0F: + opcode = F8; + switch(opcode) { + + case 0xD6: + INST_NAME("MOVQ Ex, Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + FMOVD(v1, v0); + } else { + grab_segdata(dyn, addr, ninst, x4, _FS); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VSTR64_REG(v0, ed, x4); + } + break; + + default: + DEFAULT; + } + break; + + case 0x89: + INST_NAME("MOV FS:Ew, Gw"); + nextop = F8; + GETGD; // don't need GETGW here + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + if(rex.w) { + MOVx_REG(ed, gd); + } else { + if(ed!=gd) { + BFIx(ed, gd, 0, 16); + } + } + } else { + grab_segdata(dyn, addr, ninst, x4, _FS); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); + if(rex.w) { + STRx_REG(gd, ed, x4); + } else { + STRH_REG(gd, ed, x4); + } + } + break; + + case 0x8B: + INST_NAME("MOV Gd, FS:Ed"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg + ed = xRAX+(nextop&7)+(rex.b<<3); + if(rex.w) { + MOVx_REG(gd, ed); + } else { + if(ed!=gd) { + BFIx(gd, ed, 0, 16); + } + } + } else { // mem <= reg + grab_segdata(dyn, addr, ninst, x4, _FS); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); + if(rex.w) { + LDRx_REG(gd, ed, x4); + } else { + LDRH_REG(x1, ed, x4); + BFIx(gd, x1, 0, 16); + } + } + break; + + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c new file mode 100755 index 00000000..0e846784 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -0,0 +1,428 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + +#define GETGX(a) \ + gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg(dyn, ninst, x1, gd) + +#define GETGM(a) \ + gd = ((nextop&0x38)>>3); \ + a = mmx_get_reg(dyn, ninst, x1, gd) + +#define GETGm gd = ((nextop&0x38)>>3) + +uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop; + uint8_t gd, ed, wback, wb; + int64_t fixedaddress; + int8_t i8; + uint8_t u8; + int32_t i32; + int64_t j64, i64; + int v0, v1, s0; + MAYUSE(i32); + MAYUSE(j64); + MAYUSE(v0); + MAYUSE(v1); + MAYUSE(s0); + + // REX prefix before the 67 are ignored + rex.rex = 0; + while(opcode>=0x40 && opcode<=0x4f) { + rex.rex = opcode; + opcode = F8; + } + rep = 0; + while((opcode==0xF2) || (opcode==0xF3)) { + rep = opcode-0xF1; + opcode = F8; + } + + switch(opcode) { + + case 0x0F: + opcode=F8; + switch(opcode) { + + case 0x2E: + // no special check... + case 0x2F: + if(rep) { + DEFAULT; + } else { + if(opcode==0x2F) {INST_NAME("COMISS Gx, Ex");} else {INST_NAME("UCOMISS Gx, Ex");} + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGX(v0); + if(MODREG) { + s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + } else { + s0 = fpu_get_scratch(dyn); + addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + } + FCMPS(v0, s0); + FCOMI(x1, x2); + } + break; + + case 0x6F: + INST_NAME("MOVQ Gm, Em"); + nextop = F8; + GETGm; + if(MODREG) { + v1 = mmx_get_reg(dyn, ninst, x1, nextop&7); // no rex.b on MMX + v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); + VMOVeD(v0, 0, v1, 0); + } else { + v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); + addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VLDR64_U12(v0, ed, fixedaddress); + } + break; + + case 0x7F: + INST_NAME("MOVQ Em, Gm"); + nextop = F8; + GETGM(v0); + if(MODREG) { + v1 = mmx_get_reg_empty(dyn, ninst, x1, nextop&7); + VMOV(v1, v0); + } else { + addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VSTR64_U12(v0, ed, fixedaddress); + } + break; + + default: + DEFAULT; + } + break; + + case 0x89: + INST_NAME("MOV Ed, Gd"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg + MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); + } else { // mem <= reg + addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); + STRxw_U12(gd, ed, fixedaddress); + } + break; + + case 0x8D: + INST_NAME("LEA Gd, Ed"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg? that's an invalid operation + DEFAULT; + } else { // mem <= reg + // should a geted32 be created, to use 32bits regs instead of 64bits? + addr = geted32(dyn, addr, ninst, nextop, &ed, gd, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=gd) { + MOVw_REG(gd, ed); + } + } + break; + + case 0xC1: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + INST_NAME("ROL Ed, Ib"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED32(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { WBACK; } + break; + case 1: + INST_NAME("ROR Ed, Ib"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED32(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { WBACK; } + break; + case 2: + INST_NAME("RCL Ed, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETED32W(x4, x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4); + WBACK; + break; + case 3: + INST_NAME("RCR Ed, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETED32W(x4, x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); + WBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ed, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED32(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_shl32c(dyn, ninst, rex, ed, u8, x3, x4); + WBACK; + break; + case 5: + INST_NAME("SHR Ed, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED32(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_shr32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { + WBACK; + } + break; + case 7: + INST_NAME("SAR Ed, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETED32(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_sar32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { + WBACK; + } + break; + } + break; + + #define GO(NO, YES) \ + BARRIER(2); \ + JUMP(addr+i8);\ + if(dyn->insts[ninst].x64.jmp_insts==-1) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst+1].address-(dyn->native_size); \ + Bcond(NO, i32); \ + jump_to_next(dyn, addr+i8, 0, ninst); \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ + Bcond(YES, i32); \ + } + case 0xE0: + INST_NAME("LOOPNZ (32bits)"); + READFLAGS(X_ZF); + i8 = F8S; + MOVw_REG(x1, xRCX); + SUBSw_U12(x1, x1, 1); + BFIx(xRCX, x1, 0, 32); + B_NEXT(cEQ); // ECX is 0, no LOOP + TSTw_mask(xFlags, 0b011010, 0); //mask=0x40 + GO(cNE, cEQ); + break; + case 0xE1: + INST_NAME("LOOPZ (32bits)"); + READFLAGS(X_ZF); + i8 = F8S; + MOVw_REG(x1, xRCX); + SUBSw_U12(x1, x1, 1); + BFIx(xRCX, x1, 0, 32); + B_NEXT(cEQ); // ECX is 0, no LOOP + TSTw_mask(xFlags, 0b011010, 0); //mask=0x40 + GO(cEQ, cNE); + break; + case 0xE2: + INST_NAME("LOOP (32bits)"); + i8 = F8S; + MOVw_REG(x1, xRCX); + SUBSw_U12(x1, x1, 1); + BFIx(xRCX, x1, 0, 32); + GO(cEQ, cNE); + break; + case 0xE3: + INST_NAME("JECXZ"); + i8 = F8S; + MOVw_REG(x1, xRCX); + TSTw_REG(x1, x1); + GO(cNE, cEQ); + break; + #undef GO + + case 0xE8: + return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode) + + case 0xF7: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + case 1: + INST_NAME("TEST Ed, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED32H(x1, 4); + i64 = F32S; + MOV64xw(x2, i64); + emit_test32(dyn, ninst, rex, ed, x2, x3, x4); + break; + case 2: + INST_NAME("NOT Ed"); + GETED32(4); + MVNxw_REG(ed, ed); + WBACK; + break; + case 3: + INST_NAME("NEG Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED32(0); + emit_neg32(dyn, ninst, rex, ed, x3, x4); + WBACK; + break; + case 4: + INST_NAME("MUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x2, rex.w?d_mul64:d_mul32); + GETED32(0); + if(rex.w) { + if(ed==xRDX) gd=x3; else gd=xRDX; + UMULH(gd, xRAX, ed); + MULx(xRAX, xRAX, ed); + if(gd!=xRDX) {MOVx_REG(xRDX, gd);} + } else { + UMULL(xRDX, xRAX, ed); //64 <- 32x32 + MOVw_REG(xRAX, xRDX); + LSRx(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + break; + case 5: + INST_NAME("IMUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x2, rex.w?d_imul64:d_imul32); + GETED32(0); + if(rex.w) { + if(ed==xRDX) gd=x3; else gd=xRDX; + SMULH(gd, xRAX, ed); + MULx(xRAX, xRAX, ed); + if(gd!=xRDX) {MOVx_REG(xRDX, gd);} + } else { + SMULL(xRDX, xRAX, ed); //64 <- 32x32 + MOVw_REG(xRAX, xRDX); + LSRx(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + break; + case 6: + INST_NAME("DIV Ed"); + SETFLAGS(X_ALL, SF_SET); + if(!rex.w) { + SET_DFNONE(x2); + GETED32(0); + MOVw_REG(x3, xRAX); + ORRx_REG_LSL(x3, x3, xRDX, 32); + if(MODREG) { + MOVw_REG(x4, ed); + ed = x4; + } + UDIVx(x2, x3, ed); + MSUBx(x4, x2, ed, xRAX); + MOVw_REG(xRAX, x2); + MOVw_REG(xRDX, x4); + } else { + if(ninst && dyn->insts + && dyn->insts[ninst-1].x64.addr + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) { + SET_DFNONE(x2); + GETED32(0); + UDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + } else { + GETED32H(x1, 0); // get edd changed addr, so cannot be called 2 times for same op... + CBZxw_MARK(xRDX); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(div64, -1); + B_NEXT_nocond; + MARK; + UDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + SET_DFNONE(x2); + } + } + break; + case 7: + INST_NAME("IDIV Ed"); + SETFLAGS(X_ALL, SF_SET); + if(!rex.w) { + SET_DFNONE(x2) + GETSED32w(0); + MOVw_REG(x3, xRAX); + ORRx_REG_LSL(x3, x3, xRDX, 32); + SDIVx(x2, x3, wb); + MSUBx(x4, x2, wb, x3); + MOVw_REG(xRAX, x2); + MOVw_REG(xRDX, x4); + } else { + if(ninst && dyn->insts + && dyn->insts[ninst-1].x64.addr + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48 + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) { + SET_DFNONE(x2) + GETED32(0); + SDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + } else { + GETED32H(x1, 0); // get edd changed addr, so cannot be called 2 times for same op... + CBZxw_MARK(xRDX); + MVNx_REG(x2, xRDX); + CBZxw_MARK(x2); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL((void*)idiv64, -1); + B_NEXT_nocond; + MARK; + SDIVx(x2, xRAX, ed); + MSUBx(xRDX, x2, ed, xRAX); + MOVx_REG(xRAX, x2); + SET_DFNONE(x2) + } + } + break; + } + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_d8.c b/src/dynarec/arm64/dynarec_arm64_d8.c new file mode 100644 index 00000000..2a963bb4 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_d8.c @@ -0,0 +1,232 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "emu/x87emu_private.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t nextop = F8; + uint8_t ed; + int64_t fixedaddress; + int v1, v2; + int s0; + + MAYUSE(s0); + MAYUSE(v2); + MAYUSE(v1); + + switch(nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FADD ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FADDD(v1, v1, v2); + break; + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + INST_NAME("FMUL ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FMULD(v1, v1, v2); + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + INST_NAME("FCOM ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOM(x1, x2, x3); + break; + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + INST_NAME("FCOMP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOM(x1, x2, x3); + x87_do_pop(dyn, ninst); + break; + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + INST_NAME("FSUB ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FSUBD(v1, v1, v2); + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + INST_NAME("FSUBR ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FSUBD(v1, v2, v1); + break; + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + INST_NAME("FDIV ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FDIVD(v1, v1, v2); + break; + case 0xF8: + case 0xF9: + case 0xFA: + case 0xFB: + case 0xFC: + case 0xFD: + case 0xFE: + case 0xFF: + INST_NAME("FDIVR ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FDIVD(v1, v2, v1); + break; + + default: + switch((nextop>>3)&7) { + case 0: + INST_NAME("FADD ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(s0, s0); + FADDD(v1, v1, s0); + break; + case 1: + INST_NAME("FMUL ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(s0, s0); + FMULD(v1, v1, s0); + break; + case 2: + INST_NAME("FCOM ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(s0, s0); + FCMPD(v1, s0); + FCOM(x1, x2, x3); + break; + case 3: + INST_NAME("FCOMP ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(s0, s0); + FCMPD(v1, s0); + FCOM(x1, x2, x3); + x87_do_pop(dyn, ninst); + break; + case 4: + INST_NAME("FSUB ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(s0, s0); + FSUBD(v1, v1, s0); + break; + case 5: + INST_NAME("FSUBR ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(s0, s0); + FSUBD(v1, s0, v1); + break; + case 6: + INST_NAME("FDIV ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(s0, s0); + FDIVD(v1, v1, s0); + break; + case 7: + INST_NAME("FDIVR ST0, float[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(s0, s0); + FDIVD(v1, s0, v1); + break; + default: + DEFAULT; + } + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c new file mode 100644 index 00000000..cd11b6f2 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -0,0 +1,356 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "emu/x87emu_private.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t nextop = F8; + uint8_t ed; + uint8_t wback, wb1; + int64_t fixedaddress; + int v1, v2; + int s0; + int i1, i2, i3; + + MAYUSE(s0); + MAYUSE(v2); + MAYUSE(v1); + + switch(nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FLD STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + v2 = x87_do_push(dyn, ninst); + FMOVD(v2, v1); + break; + + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + INST_NAME("FXCH STx"); + // swap the cache value, not the double value itself :p + i1 = x87_get_cache(dyn, ninst, x1, x2, nextop&7); + i2 = x87_get_cache(dyn, ninst, x1, x2, 0); + i3 = dyn->x87cache[i1]; + dyn->x87cache[i1] = dyn->x87cache[i2]; + dyn->x87cache[i2] = i3; + break; + + case 0xD0: + INST_NAME("FNOP"); + break; + + case 0xE0: + INST_NAME("FCHS"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + FNEGD(v1, v1); + break; + case 0xE1: + INST_NAME("FABS"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + FABSD(v1, v1); + break; + + case 0xE4: + INST_NAME("FTST"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + FCMPD_0(v1); + FCOM(x1, x2, x3); // same flags... + break; + case 0xE5: + INST_NAME("FXAM"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_refresh(dyn, ninst, x1, x2, 0); + CALL(fpu_fxam, -1); // should be possible inline, but is it worth it? + break; + + case 0xE8: + INST_NAME("FLD1"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, 1.0); + break; + case 0xE9: + INST_NAME("FLDL2T"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, L2T); + break; + case 0xEA: + INST_NAME("FLDL2E"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, L2E); + break; + case 0xEB: + INST_NAME("FLDPI"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, PI); + break; + case 0xEC: + INST_NAME("FLDLG2"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, LG2); + break; + case 0xED: + INST_NAME("FLDLN2"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, LN2); + break; + case 0xEE: + INST_NAME("FLDZ"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, 0.0); + break; + + case 0xFA: + INST_NAME("FSQRT"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + FSQRTD(v1, v1); + break; + + case 0xFC: + INST_NAME("FRNDINT"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + // use C helper for now, nothing staightforward is available + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_frndint, -1); + /* + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + VCMP_F64_0(v1); + VMRS_APSR(); + B_NEXT(cVS); // Unordered, skip + B_NEXT(cEQ); // Zero, skip + u8 = x87_setround(dyn, ninst, x1, x2, x3); + VCVT_S32_F64(x1, v1); // limit to 32bits.... + VCVT_F64_S32(v1, x1); + x87_restoreround(dyn, ninst, u8); + */ + break; + case 0xF0: + INST_NAME("F2XM1"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_f2xm1, -1); + break; + case 0xF1: + INST_NAME("FYL2X"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fyl2x, -1); + x87_do_pop(dyn, ninst); + break; + case 0xF2: + INST_NAME("FTAN"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_ftan, -1); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, 1.0); + break; + case 0xF3: + INST_NAME("FPATAN"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fpatan, -1); + x87_do_pop(dyn, ninst); + break; + case 0xF4: + INST_NAME("FXTRACT"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_do_push_empty(dyn, ninst, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fxtract, -1); + break; + case 0xF5: + INST_NAME("FPREM1"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fprem1, -1); + break; + case 0xF6: + INST_NAME("FDECSTP"); + fpu_purgecache(dyn, ninst, x1, x2, x3); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + SUBw_U12(x2, x2, 1); + ANDw_mask(x2, x2, 0, 2); //mask=7 + STRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + break; + case 0xF7: + INST_NAME("FINCSTP"); + fpu_purgecache(dyn, ninst, x1, x2, x3); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + ADDw_U12(x2, x2, 1); + ANDw_mask(x2, x2, 0, 2); //mask=7 + STRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + break; + case 0xF8: + INST_NAME("FPREM"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fprem, -1); + break; + case 0xF9: + INST_NAME("FYL2XP1"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fyl2xp1, -1); + x87_do_pop(dyn, ninst); + break; + case 0xFB: + INST_NAME("FSINCOS"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_do_push_empty(dyn, ninst, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fsincos, -1); + break; + case 0xFD: + INST_NAME("FSCALE"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fscale, -1); + break; + case 0xFE: + INST_NAME("FSIN"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_fsin, -1); + break; + case 0xFF: + INST_NAME("FCOS"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_fcos, -1); + break; + + + case 0xD1: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + case 0xE2: + case 0xE3: + case 0xE6: + case 0xE7: + case 0xEF: + DEFAULT; + break; + + default: + switch((nextop>>3)&7) { + case 0: + INST_NAME("FLD ST0, float[ED]"); + v1 = x87_do_push(dyn, ninst); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(v1, s0); + break; + case 2: + INST_NAME("FST float[ED], ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + FCVT_S_D(s0, v1); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VSTR32_U12(s0, ed, fixedaddress); + break; + case 3: + INST_NAME("FSTP float[ED], ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + FCVT_S_D(s0, v1); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VSTR32_U12(s0, ed, fixedaddress); + x87_do_pop(dyn, ninst); + break; + case 4: + INST_NAME("FLDENV Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE? + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + MOV32w(x2, 0); + CALL(fpu_loadenv, -1); + break; + case 5: + INST_NAME("FLDCW Ew"); + GETEW(x1, 0); + STRH_U12(x1, xEmu, offsetof(x64emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFXw(x1, x1, 10, 2); // extract round + STRw_U12(x1, xEmu, offsetof(x64emu_t, round)); + break; + case 6: + INST_NAME("FNSTENV Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE? + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + MOV32w(x2, 0); + CALL(fpu_savenv, -1); + break; + case 7: + INST_NAME("FNSTCW Ew"); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + ed = x1; + wb1 = 1; + LDRH_U12(x1, xEmu, offsetof(x64emu_t, cw)); + EWBACK; + break; + default: + DEFAULT; + } + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c new file mode 100644 index 00000000..cbc2c0ef --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_db.c @@ -0,0 +1,307 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "emu/x87emu_private.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t nextop = F8; + uint8_t ed; + uint8_t wback; + uint8_t u8; + int64_t fixedaddress; + int v1, v2; + int s0; + int64_t j64; + + MAYUSE(s0); + MAYUSE(v2); + MAYUSE(v1); + MAYUSE(j64); + + switch(nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FCMOVNB ST0, STx"); + READFLAGS(X_CF); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + TSTw_mask(xFlags, 0, 0); //mask=1<>3)&7) { + case 0: + INST_NAME("FILD ST0, Ed"); + v1 = x87_do_push(dyn, ninst); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + SXTL_32(v1, s0); + SCVTFDD(v1, v1); + break; + case 1: + INST_NAME("FISTTP Ed, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + ed = x1; + } + s0 = fpu_get_scratch(dyn); + #if 0 + FRINT32ZD(s0, v1); + FCVTZSwD(ed, s0); + WBACK; + #else + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + FRINTZD(s0, v1); + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + VSTR32_U12(s0, wback, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + MOV32w(x5, 0x80000000); + STRw_U12(x5, wback, fixedaddress); + MARK3; + #endif + x87_do_pop(dyn, ninst); + break; + case 2: + INST_NAME("FIST Ed, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + ed = x1; + } + s0 = fpu_get_scratch(dyn); + #if 0 + FRINT32XD(s0, v1); + FCVTZSwD(ed, s0); + WBACK; + #else + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + FRINTXD(s0, v1); + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + VSTR32_U12(s0, wback, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + MOV32w(x5, 0x80000000); + STRw_U12(x5, wback, fixedaddress); + MARK3; + #endif + x87_restoreround(dyn, ninst, u8); + break; + case 3: + INST_NAME("FISTP Ed, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + ed = x1; + } + s0 = fpu_get_scratch(dyn); + #if 0 + FRINT32XD(s0, v1); + FCVTZSwD(ed, s0); + WBACK; + #else + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + FRINTXD(s0, v1); + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + VSTR32_U12(s0, wback, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + MOV32w(x5, 0x80000000); + STRw_U12(x5, wback, fixedaddress); + MARK3; + #endif + x87_restoreround(dyn, ninst, u8); + x87_do_pop(dyn, ninst); + break; + case 5: + INST_NAME("FLD tbyte"); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(PK(0)==0xDB && ((PK(1)>>3)&7)==7) { + // the FLD is immediatly followed by an FSTP + LDRx_U12(x5, ed, 0); + LDRH_U12(x6, ed, 8); + // no persistant scratch register, so unrool both instruction here... + MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n"); + nextop = F8; //0xDB + nextop = F8; //modrm + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + STRx_U12(x5, ed, 0); + STRH_U12(x6, ed, 8); + } else { + if(ed!=x1) { + MOVx_REG(x1, ed); + } + x87_do_push_empty(dyn, ninst, x3); + CALL(arm_fld, -1); + } + break; + case 7: + INST_NAME("FSTP tbyte"); + x87_forget(dyn, ninst, x1, x3, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + CALL(arm_fstp, -1); + x87_do_pop(dyn, ninst); + break; + default: + DEFAULT; + } + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c new file mode 100644 index 00000000..3877bcc9 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_dc.c @@ -0,0 +1,219 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "emu/x87emu_private.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t nextop = F8; + uint8_t wback; + int64_t fixedaddress; + int v1, v2; + + MAYUSE(v2); + MAYUSE(v1); + + switch(nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FADD STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FADDD(v1, v1, v2); + break; + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + INST_NAME("FMUL STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FMULD(v1, v1, v2); + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + INST_NAME("FCOM ST0, STx"); //yep + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOM(x1, x2, x3); + break; + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + INST_NAME("FCOMP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOM(x1, x2, x3); + x87_do_pop(dyn, ninst); + break; + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + INST_NAME("FSUBR STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FSUBD(v1, v2, v1); + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + INST_NAME("FSUB STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FSUBD(v1, v1, v2); + break; + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + INST_NAME("FDIVR STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FDIVD(v1, v2, v1); + break; + case 0xF8: + case 0xF9: + case 0xFA: + case 0xFB: + case 0xFC: + case 0xFD: + case 0xFE: + case 0xFF: + INST_NAME("FDIV STx, ST0"); + v2 = x87_get_st(dyn, ninst, x1, x2, 0); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FDIVD(v1, v1, v2); + break; + default: + switch((nextop>>3)&7) { + case 0: + INST_NAME("FADD ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); + VLDR64_U12(v2, wback, fixedaddress); + FADDD(v1, v1, v2); + break; + case 1: + INST_NAME("FMUL ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); + VLDR64_U12(v2, wback, fixedaddress); + FMULD(v1, v1, v2); + break; + case 2: + INST_NAME("FCOM ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); + VLDR64_U12(v2, wback, fixedaddress); + FCMPD(v1, v2); + FCOM(x1, x2, x3); + break; + case 3: + INST_NAME("FCOMP ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); + VLDR64_U12(v2, wback, fixedaddress); + FCMPD(v1, v2); + FCOM(x1, x2, x3); + x87_do_pop(dyn, ninst); + break; + case 4: + INST_NAME("FSUB ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); + VLDR64_U12(v2, wback, fixedaddress); + FSUBD(v1, v1, v2); + break; + case 5: + INST_NAME("FSUBR ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); + VLDR64_U12(v2, wback, fixedaddress); + FSUBD(v1, v2, v1); + break; + case 6: + INST_NAME("FDIV ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); + VLDR64_U12(v2, wback, fixedaddress); + FDIVD(v1, v1, v2); + break; + case 7: + INST_NAME("FDIVR ST0, double[ED]"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); + VLDR64_U12(v2, wback, fixedaddress); + FDIVD(v1, v2, v1); + break; + } + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c new file mode 100644 index 00000000..4b73cc97 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_dd.c @@ -0,0 +1,205 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "emu/x87emu_private.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t nextop = F8; + uint8_t ed; + int64_t fixedaddress; + int v1, v2; + int s0; + + MAYUSE(s0); + MAYUSE(v2); + MAYUSE(v1); + + switch(nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FFREE STx"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + x87_purgecache(dyn, ninst, x1, x2, x3); + MOV32w(x1, nextop-0xC0); + CALL(fpu_do_free, -1); + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + INST_NAME("FST ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FMOVD(v2, v1); + break; + case 0xD8: + INST_NAME("FSTP ST0, ST0"); + x87_do_pop(dyn, ninst); + break; + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + INST_NAME("FSTP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FMOVD(v2, v1); + x87_do_pop(dyn, ninst); + break; + + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + INST_NAME("FUCOM ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOM(x1, x2, x3); + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + INST_NAME("FUCOMP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOM(x1, x2, x3); + x87_do_pop(dyn, ninst); + break; + + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + case 0xF8: + case 0xF9: + case 0xFA: + case 0xFB: + case 0xFC: + case 0xFD: + case 0xFE: + case 0xFF: + DEFAULT; + break; + + default: + switch((nextop>>3)&7) { + case 0: + INST_NAME("FLD double"); + v1 = x87_do_push(dyn, ninst); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VLDR64_U12(v1, ed, fixedaddress); + break; + case 1: + INST_NAME("FISTTP i64, ST0"); + v1 = x87_do_push(dyn, ninst); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + s0 = fpu_get_scratch(dyn); + FRINT64ZD(s0, v1); + FCVTZSxD(x2, s0); + STRx_U12(x2, ed, fixedaddress); + x87_do_pop(dyn, ninst); + break; + case 2: + INST_NAME("FST double"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VSTR64_U12(v1, ed, fixedaddress); + break; + case 3: + INST_NAME("FSTP double"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VSTR64_U12(v1, ed, fixedaddress); + x87_do_pop(dyn, ninst); + break; + case 4: + INST_NAME("FRSTOR m108byte"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(arm_frstor, -1); + break; + case 6: + INST_NAME("FSAVE m108byte"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(arm_fsave, -1); + break; + case 7: + INST_NAME("FNSTSW m2byte"); + fpu_purgecache(dyn, ninst, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &ed, x4, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, top)); + LDRH_U12(x3, xEmu, offsetof(x64emu_t, sw)); + BFIw(x3, x1, 11, 3); // inject TOP at bit 11 (3 bits) + STRH_U12(x3, ed, fixedaddress); // store whole sw flags + break; + default: + DEFAULT; + } + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_df.c b/src/dynarec/arm64/dynarec_arm64_df.c new file mode 100644 index 00000000..a90f3331 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_df.c @@ -0,0 +1,295 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "emu/x87emu_private.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t nextop = F8; + uint8_t ed, wback, u8; + int v1, v2; + int s0; + int64_t j64; + int64_t fixedaddress; + + MAYUSE(s0); + MAYUSE(v2); + MAYUSE(v1); + MAYUSE(j64); + + switch(nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FFREEP STx"); + // not handling Tag... + x87_do_pop(dyn, ninst); + break; + + case 0xE0: + INST_NAME("FNSTSW AX"); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, sw)); + BFIw(x1, x2, 11, 3); // inject top + BFIw(xRAX, x1, 0, 16); + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + INST_NAME("FUCOMIP ST0, STx"); + SETFLAGS(X_ALL, SF_SET); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOMI(x1, x2); + x87_do_pop(dyn, ninst); + break; + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + INST_NAME("FCOMIP ST0, STx"); + SETFLAGS(X_ALL, SF_SET); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOMI(x1, x2); + x87_do_pop(dyn, ninst); + break; + + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xF8: + case 0xF9: + case 0xFA: + case 0xFB: + case 0xFC: + case 0xFD: + case 0xFE: + case 0xFF: + DEFAULT; + break; + + default: + switch((nextop>>3)&7) { + case 0: + INST_NAME("FILD ST0, Ew"); + v1 = x87_do_push(dyn, ninst); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + LDRSHw_U12(x1, wback, fixedaddress); + SCVTFDw(v1, x1); + break; + case 1: + INST_NAME("FISTTP Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + ed = x1; + s0 = fpu_get_scratch(dyn); + #if 0 + // this version needs ARM v8.5, //TODO: add detection of this extensio to use it + FRINT32ZD(s0, v1); + // no saturation instruction on Arm, so using NEON + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + #else + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + VFCVTZSd(s0, v1); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + MOV32w(x5, 0x8000); + STRH_U12(x5, wback, fixedaddress); + MARK3; + #endif + x87_do_pop(dyn, ninst); + break; + case 2: + INST_NAME("FIST Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + u8 = x87_setround(dyn, ninst, x1, x2, x4); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + ed = x1; + s0 = fpu_get_scratch(dyn); + #if 0 + FRINT32XD(s0, v1); + // no saturation instruction on Arm, so using NEON + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + #else + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + FRINTXD(s0, v1); + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + MOV32w(x5, 0x8000); + STRH_U12(x5, wback, fixedaddress); + MARK3; + #endif + x87_restoreround(dyn, ninst, u8); + break; + case 3: + INST_NAME("FISTP Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + u8 = x87_setround(dyn, ninst, x1, x2, x4); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + ed = x1; + s0 = fpu_get_scratch(dyn); + #if 0 + FRINT32XD(s0, v1); + // no saturation instruction on Arm, so using NEON + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + #else + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + FRINTXD(s0, v1); + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + MOV32w(x5, 0x8000); + STRH_U12(x5, wback, fixedaddress); + MARK3; + #endif + x87_do_pop(dyn, ninst); + x87_restoreround(dyn, ninst, u8); + break; + case 4: + INST_NAME("FBLD ST0, tbytes"); + x87_do_push_empty(dyn, ninst, x1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(fpu_fbld, -1); + break; + case 5: + INST_NAME("FILD ST0, i64"); + v1 = x87_do_push(dyn, ninst); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + LDRx_U12(x1, wback, fixedaddress); + SCVTFDx(v1, x1); + break; + case 6: + INST_NAME("FBSTP tbytes, ST0"); + x87_forget(dyn, ninst, x1, x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(fpu_fbst, -1); + x87_do_pop(dyn, ninst); + break; + case 7: + INST_NAME("FISTP i64, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + u8 = x87_setround(dyn, ninst, x1, x2, x4); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + ed = x1; + s0 = fpu_get_scratch(dyn); + #if 0 + FRINT64XD(s0, v1); + VFCVTZSd(s0, s0); + VSTR64_U12(s0, wback, fixedaddress); + #else + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + FRINTXD(s0, v1); + VFCVTZSd(s0, s0); + VSTR64_U12(s0, wback, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + MOV64x(x5, 0x8000000000000000LL); + STRx_U12(x5, wback, fixedaddress); + MARK3; + #endif + x87_restoreround(dyn, ninst, u8); + x87_do_pop(dyn, ninst); + break; + default: + DEFAULT; + } + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c new file mode 100755 index 00000000..24453b44 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c @@ -0,0 +1,679 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "../tools/bridge_private.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +// emit OR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_or64:d_or32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + ORRxw_REG(s1, s1, s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF | X_AF | X_OF) { + MOV32w(s3, (1<=0 && c<256) { +// IFX(X_ALL) { +// ORRS_IMM8(s1, s1, c, 0); +// } else { +// ORR_IMM8(s1, s1, c, 0); +// } +// } else { +// IFX(X_PEND) {} else {MOVW(s3, c);} +// IFX(X_ALL) { +// ORRS_REG_LSL_IMM5(s1, s1, s3, 0); +// } else { +// ORR_REG_LSL_IMM5(s1, s1, s3, 0); +// } +// } +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// IFX(X_CF | X_AF | X_ZF) { +// BIC_IMM8(xFlags, xFlags, (1<=0 && c<256) { +// IFX(X_ALL) { +// XORS_IMM8(s1, s1, c); +// } else { +// XOR_IMM8(s1, s1, c); +// } +// } else { +// IFX(X_PEND) {} else {MOVW(s3, c);} +// IFX(X_ALL) { +// XORS_REG_LSL_IMM5(s1, s1, s3, 0); +// } else { +// XOR_REG_LSL_IMM5(s1, s1, s3, 0); +// } +// } +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// IFX(X_CF | X_AF | X_ZF) { +// BIC_IMM8(xFlags, xFlags, (1<=0 && c<256) { +// IFX(X_ALL) { +// ANDS_IMM8(s1, s1, c); +// } else { +// AND_IMM8(s1, s1, c); +// } +// } else { +// IFX(X_PEND) {} else {MOVW(s3, c);} +// IFX(X_ALL) { +// ANDS_REG_LSL_IMM5(s1, s1, s3, 0); +// } else { +// AND_REG_LSL_IMM5(s1, s1, s3, 0); +// } +// } +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// IFX(X_CF | X_AF | X_ZF) { +// BIC_IMM8(xFlags, xFlags, (1< +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "../tools/bridge_private.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +// emit ADD32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_add32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, rex.w?d_add64:d_add32b); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_AF) { + ORRxw_REG(s3, s1, s2); // s3 = op1 | op2 + ANDxw_REG(s4, s1, s2); // s4 = op1 & op2 + } + IFX(X_ALL) { + ADDSxw_REG(s1, s1, s2); + } else { + ADDxw_REG(s1, s1, s2); + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF) { + BICxw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRxw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + LSRxw(s4, s3, 3); + BFIxw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_CF) { + CSETw(s4, cCS); + BFIw(xFlags, s4, F_CF, 1); + } + IFX(X_OF) { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_SF) { + LSRxw(s3, s1, (rex.w)?63:31); + BFIx(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +void emit_add32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5) +{ + MAYUSE(s5); + if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.need_flags==X_PEND)) + { + // special case when doing math on ESP and only PEND is needed: ignoring it! + if(c>=0 && c<0x1000) { + ADDx_U12(s1, s1, c); + } else { + MOV64x(s3, c); + ADDx_REG(s1, s1, s3); + } + return; + } + IFX(X_PEND) { + MOV64xw(s5, c); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s5, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_add64:d_add32b); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + IFX(X_AF) { + IFX(X_PEND) {} else {MOV64xw(s5, c);} + ORRxw_REG(s3, s1, s5); // s3 = op1 | op2 + ANDxw_REG(s4, s1, s5); // s4 = op1 & op2 + } + if(c>=0 && c<0x1000) { + IFX(X_ALL) { + ADDSxw_U12(s1, s1, c); + } else { + ADDxw_U12(s1, s1, c); + } + } else { + IFX(X_PEND|X_AF) {} else {MOV64xw(s5, c);} + IFX(X_ALL) { + ADDSxw_REG(s1, s1, s5); + } else { + ADDxw_REG(s1, s1, s5); + } + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF) { + BICxw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRxw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + LSRxw(s4, s3, 3); + BFIxw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_CF) { + CSETw(s4, cCS); + BFIw(xFlags, s4, F_CF, 1); + } + IFX(X_OF) { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_SF) { + LSRxw(s3, s1, (rex.w)?63:31); + BFIx(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, rex.w?d_sub64:d_sub32); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_AF) { + MVNxw_REG(s3, s1); + ORRxw_REG(s3, s3, s2); // s3 = ~op1 | op2 + BICxw(s4, s2, s1); // s4 = ~op1 & op2 + } + IFX(X_ALL) { + SUBSxw_REG(s1, s1, s2); + } else { + SUBxw_REG(s1, s1, s2); + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF) { + ANDxw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res + ORRxw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) + LSRxw(s4, s3, 3); + BFIx(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_CF) { + // inverted carry + CSETw(s4, cCC); + BFIw(xFlags, s4, F_CF, 1); + } + IFX(X_OF) { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_SF) { + LSRxw(s3, s1, (rex.w)?63:31); + BFIx(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SUB32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5) +{ + MAYUSE(s5); + if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.need_flags==X_PEND)) + { + // special case when doing math on RSP and only PEND is needed: ignoring it! + if(c>=0 && c<0x1000) { + SUBxw_U12(s1, s1, c); + } else { + MOV64xw(s5, c); + SUBxw_REG(s1, s1, s5); + } + return; + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + MOV64xw(s5, c); + STRxw_U12(s5, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_sub64:d_sub32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + IFX(X_AF) { + IFX(X_PEND) {} else {MOV64xw(s5, c);} + ORNxw_REG(s3, s5, s1); // s3 = ~op1 | op2 + BICxw_REG(s4, s5, s1); // s4 = ~op1 & op2 + } + if(c>=0 && c<0x1000) { + IFX(X_ALL) { + SUBSxw_U12(s1, s1, c); + } else { + SUBxw_U12(s1, s1, c); + } + } else { + IFX(X_PEND|X_AF) {} else {MOV64xw(s5, c);} + IFX(X_ALL) { + SUBSxw_REG(s1, s1, s5); + } else { + SUBxw_REG(s1, s1, s5); + } + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF) { + ANDxw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res + ORRxw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) + LSRxw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_CF) { + // inverted carry + CSETw(s4, cCC); + BFIw(xFlags, s4, F_CF, 1); + } + IFX(X_OF) { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_SF) { + LSRxw(s3, s1, (rex.w)?63:31); + BFIx(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADD8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRB_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_add8); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_AF | X_OF) { + ORRw_REG(s3, s1, s2); // s3 = op1 | op2 + ANDw_REG(s4, s1, s2); // s4 = op1 & op2 + } + ADDw_REG(s1, s1, s2); + IFX(X_AF|X_OF) { + BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 6); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_CF) { + LSRw(s3, s1, 8); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 7); //mask=0xff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADD8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch +void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) +{ + IFX(X_PEND) { + MOV32w(s4, c&0xff); + STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRB_U12(s4, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_add8); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_AF | X_OF) { + if(X_PEND) {} else {MOV32w(s4, c&0xff);} + ORRw_REG(s3, s1, s4); // s3 = op1 | op2 + ANDw_REG(s4, s1, s4); // s4 = op1 & op2 + } + ADDw_U12(s1, s1, c); + + IFX(X_AF|X_OF) { + BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s4 = (op1 & op2) | ((op1 | op2) & ~ res) + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 6); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_CF) { + LSRw(s3, s1, 8); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRB_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_sub8); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_AF|X_OF|X_CF) { + MVNw_REG(s3, s1); + ORRw_REG(s3, s3, s2); // s3 = ~op1 | op2 + BICw_REG(s4, s2, s1); // s4 = ~op1 & op2 + } + + SUBw_REG(s1, s1, s2); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF|X_CF) { + ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res + ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) + IFX(X_CF) { + LSRw(s4, s3, 7); + BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 + } + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 6); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 7); //mask=0xff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SUB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5) +{ + MAYUSE(s5); + IFX(X_ALL|X_PEND) { + MOV32w(s5, c&0xff); + } + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRB_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_sub8); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_AF|X_OF|X_CF) { + MVNw_REG(s3, s1); + ORRw_REG(s3, s3, s5); // s3 = ~op1 | op2 + BICw_REG(s4, s5, s1); // s4 = ~op1 & op2 + } + IFX(X_ALL) { + SUBw_REG(s1, s1, s5); + } else { + SUBw_U12(s1, s1, c&0xff); + } + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF|X_CF) { + ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res + ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) + IFX(X_CF) { + LSRw(s4, s3, 7); + BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 + } + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 6); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADD16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRH_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_add16); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_AF | X_OF) { + ORRw_REG(s3, s1, s2); // s3 = op1 | op2 + ANDw_REG(s4, s1, s2); // s4 = op1 & op2 + } + ADDw_REG(s1, s1, s2); + + IFX(X_AF|X_OF) { + BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 14); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 + } + } + IFX(X_CF) { + LSRw(s3, s1, 16); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_PEND) { + STRw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 15); //mask=0xffff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 15); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADD16 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch +//void emit_add16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) +//{ +// IFX(X_PEND) { +// MOVW(s3, c); +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); +// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); +// SET_DF(s4, d_add16); +// } else IFX(X_ALL) { +// SET_DFNONE(s4); +// } +// IFX(X_AF | X_OF) { +// MOV_REG(s4, s1); +// } +// if(c>=0 && c<256) { +// ADD_IMM8(s1, s1, c); +// } else { +// IFX(X_PEND) {} else {MOVW(s3, c);} +// ADD_REG_LSL_IMM5(s1, s1, s3, 0); +// } +// +// IFX(X_AF|X_OF) { +// if(c>=0 && c<256) { +// ORR_IMM8(s3, s4, c, 0); // s3 = op1 | op2 +// AND_IMM8(s4, s4, c); // s4 = op1 & op2 +// } else { +// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = op1 | op2 +// PUSH(xSP, 1<> 14) ^ ((bc>>14)>>1)) & 1 +// } +// } +// IFX(X_CF) { +// MOV_REG_LSR_IMM5(s3, s1, 16); +// BFI(xFlags, s3, F_CF, 1); +// } +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// IFX(X_ZF) { +// UXTH(s1, s1, 0); +// TSTS_REG_LSL_IMM5(s1, s1, 0); +// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 14) ^ ((bc>>14)>>1)) & 1 + } + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 15); //mask=0xffff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 15); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SUB16 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +//void emit_sub16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) +//{ +// IFX(X_PEND) { +// MOVW(s3, c); +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); +// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); +// SET_DF(s4, d_sub16); +// } else IFX(X_ALL) { +// SET_DFNONE(s4); +// } +// IFX(X_AF|X_OF|X_CF) { +// MVN_REG_LSL_IMM5(s4, s1, 0); +// } +// if(c>=0 && c<255) { +// SUB_IMM8(s1, s1, c); +// } else { +// IFX(X_PEND) {} else {MOVW(s3, c);} +// SUB_REG_LSL_IMM5(s1, s1, s3, 0); +// } +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// IFX(X_AF|X_OF|X_CF) { +// if(c>=0 && c<256) { +// ORR_IMM8(s3, s4, c, 0); // s3 = ~op1 | op2 +// AND_IMM8(s4, s4, c); // s4 = ~op1 & op2 +// } else { +// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = ~op1 | op2 +// PUSH(xSP, 1<> 14) ^ ((bc>>14)>>1)) & 1 +// } +// } +// IFX(X_ZF) { +// UXTH(s1, s1, 0); +// TSTS_REG_LSL_IMM5(s1, s1, 0); +// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 6) ^ ((bc>>6)>>1)) & 1 + } + } + + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 7); //mask=0xff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch +void emit_inc16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) +{ + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_inc16); + } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) { + SET_DFNONE(s3); + } + IFX(X_AF | X_OF) { + MOVw_REG(s4, s1); + } + ADDw_U12(s1, s1, 1); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF) { + ORRw_mask(s3, s4, 0, 0); // s3 = op1 | op2 + ANDw_mask(s4, s4, 0, 0); // s4 = op1 & op2 + BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 14); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 + } + } + IFX(X_ZF) { + TSTw_mask(s1, 0, 0b001111); // mask=0xffff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 15); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit DEC32 instruction, from s1, store result in s1 using s3 and s4 as scratch +void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4) +{ + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, rex.w?d_dec64:d_dec32); + } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) { + SET_DFNONE(s4); + } + IFX(X_AF) { + MVNxw_REG(s3, s1); + if(rex.w) { + ANDx_mask(s4, s3, 1, 0, 0); // s4 = ~op1 & op2 + ORRx_mask(s3, s3, 1, 0, 0); // s3 = ~op1 | op2 + } else { + ANDw_mask(s4, s3, 0, 0); // s4 = ~op1 & op2 + ORRw_mask(s3, s3, 0, 0); // s3 = ~op1 | op2 + } + } + IFX(X_ZF|X_OF) { + SUBSxw_U12(s1, s1, 1); + } else { + SUBxw_U12(s1, s1, 1); + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF) { + ANDxw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res + ORRxw_REG(s3, s3, s4); // s4 = (~op1 & op2) | ((~op1 | op2) & ~ res) + LSRxw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_OF) { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_SF) { + LSRxw(s3, s1, rex.w?63:31); + BFIxw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit DEC8 instruction, from s1, store result in s1 using s3 and s4 as scratch +void emit_dec8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) +{ + IFX(X_PEND) { + STRB_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_dec8); + } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) { + SET_DFNONE(s3); + } + IFX(X_AF|X_OF) { + MVNw_REG(s3, s1); + ANDw_mask(s4, s3, 0, 0); // s4 = ~op1 & op2 + ORRw_mask(s3, s3, 0, 0); // s3 = ~op1 | op2 + } + SUBSw_U12(s1, s1, 1); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF) { + ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res + ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 6); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_ZF) { + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit DEC16 instruction, from s1, store result in s1 using s3 and s4 as scratch +void emit_dec16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) +{ + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_dec16); + } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) { + SET_DFNONE(s3); + } + IFX(X_AF|X_OF) { + MVNw_REG(s4, s1); + } + SUBSw_U12(s1, s1, 1); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF) { + ORRw_mask(s3, s4, 0, 0); // s3 = ~op1 | op2 + ANDw_mask(s4, s4, 0, 0); // s4 = ~op1 & op2 + ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res + ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 14); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 + } + } + IFX(X_ZF) { + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 15); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, rex.w?d_adc64:d_adc32b); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_AF) { + MOVxw_REG(s4, s1); + } + MRS_nzvc(s3); + BFIx(s3, xFlags, 29, 1); // set C + MSR_nzvc(s3); // load CC into ARM CF + IFX(X_ZF|X_CF|X_OF) { + ADCSxw_REG(s1, s1, s2); + } else { + ADCxw_REG(s1, s1, s2); + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF) { + ORRxw_REG(s3, s4, s2); // s3 = op1 | op2 + ANDxw_REG(s4, s4, s2); // s4 = op1 & op2 + BICxw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRxw_REG(s3, s3, s4); // s4 = (op1 & op2) | ((op1 | op2) & ~ res) + LSRxw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_ZF) { + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_CF) { + CSETw(s3, cCS); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_OF) { + CSETw(s3, cVS); + BFIw(xFlags, s3, F_OF, 1); + } + IFX(X_SF) { + LSRx(s3, s1, rex.w?63:31); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADC32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +//void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) +//{ +// IFX(X_PEND) { +// MOV32(s3, c); +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); +// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); +// SET_DF(s4, d_adc32); +// } else IFX(X_ALL) { +// SET_DFNONE(s4); +// } +// IFX(X_AF) { +// MOV_REG(s4, s1); +// } +// MOVS_REG_LSR_IMM5(s3, xFlags, 1); // load CC into ARM CF +// if(c>=0 && c<256) { +// IFX(X_ZF|X_CF|X_OF) { +// ADCS_IMM8(s1, s1, c); +// } else { +// ADC_IMM8(s1, s1, c); +// } +// } else { +// MOV32(s3, c); +// IFX(X_ZF|X_CF|X_OF) { +// ADCS_REG_LSL_IMM5(s1, s1, s3, 0); +// } else { +// ADC_REG_LSL_IMM5(s1, s1, s3, 0); +// } +// } +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// IFX(X_AF) { +// if(c>=0 && c<256) { +// ORR_IMM8(s3, s4, c, 0); // s3 = op1 | op2 +// AND_IMM8(s4, s4, c); // s4 = op1 & op2 +// } else { +// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = op1 | op2 +// PUSH(xSP, 1<> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_CF) { + LSRw(s3, s1, 8); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 7); //mask=0xff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADC8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch +void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5) +{ + MAYUSE(s5); + MOV32w(s5, c&0xff); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRB_U12(s5, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_adc8); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + IFX(X_AF | X_OF) { + MOVw_REG(s4, s1); + } + MRS_nzvc(s3); + BFIx(s3, xFlags, 29, 1); // set C + MSR_nzvc(s3); // load CC into ARM CF + ADCw_REG(s1, s1, s5); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF) { + ORRw_REG(s3, s4, s5); // s3 = op1 | op2 + ANDw_REG(s4, s4, s5); // s4 = op1 & op2 + BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s4 = (op1 & op2) | ((op1 | op2) & ~ res) + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 6); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_CF) { + LSRw(s3, s1, 8); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADC16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRH_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_adc16); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_AF | X_OF) { + MOVw_REG(s4, s1); + } + MRS_nzvc(s3); + BFIx(s3, xFlags, 29, 1); // set C + MSR_nzvc(s3); // load CC into ARM CF + ADCw_REG(s1, s1, s2); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF) { + ORRw_REG(s3, s4, s2); // s3 = op1 | op2 + ANDw_REG(s4, s4, s2); // s4 = op1 & op2 + BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res + ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 14); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 + } + } + IFX(X_CF) { + LSRw(s3, s1, 16); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 15); //mask=0xffff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 15); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADC16 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch +//void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) +//{ +// IFX(X_PEND) { +// MOVW(s3, c); +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); +// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); +// SET_DF(s3, d_adc16); +// } else IFX(X_ALL) { +// SET_DFNONE(s3); +// } +// IFX(X_AF | X_OF) { +// MOV_REG(s4, s1); +// } +// MOVS_REG_LSR_IMM5(s3, xFlags, 1); // load CC into ARM CF +// if(c>=0 && c<256) { +// ADC_IMM8(s1, s1, c); +// } else { +// MOVW(s3, c); +// ADC_REG_LSL_IMM5(s1, s1, s3, 0); +// } +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// IFX(X_AF|X_OF) { +// if(c>=0 && c<256) { +// ORR_IMM8(s3, s4, c, 0); // s3 = op1 | op2 +// AND_IMM8(s4, s4, c); // s4 = op1 & op2 +// } else { +// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = op1 | op2 +// PUSH(xSP, 1<> 14) ^ ((bc>>14)>>1)) & 1 +// } +// } +// IFX(X_CF) { +// MOV_REG_LSR_IMM5(s3, s1, 16); +// BFI(xFlags, s3, F_CF, 1); +// } +// IFX(X_ZF) { +// UXTH(s1, s1, 0); +// TSTS_REG_LSL_IMM5(s1, s1, 0); +// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<=0 && c<256) { +// IFX(X_ZF|X_CF|X_OF) { +// SBCS_IMM8(s1, s1, c); +// } else { +// SBC_IMM8(s1, s1, c); +// } +// } else { +// MOV32(s3, c); +// IFX(X_ZF|X_CF|X_OF) { +// SBCS_REG_LSL_IMM5(s1, s1, s3, 0); +// } else { +// SBC_REG_LSL_IMM5(s1, s1, s3, 0); +// } +// } +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// IFX(X_AF) { +// if(c>=0 && c<256) { +// AND_IMM8(s4, s3, c); // s4 = ~op1 & op2 +// ORR_IMM8(s3, s3, c, 0); // s3 = ~op1 | op2 +// } else { +// ORR_REG_LSL_IMM5(s3, s4, s3, 0); +// PUSH(xSP, 1<> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 7); //mask=0xff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SBB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5) +{ + MAYUSE(s5); + MOV32w(s5, c&0xff); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRB_U12(s5, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_sbb8); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM + MRS_nzvc(s3); + BFIx(s3, s4, 29, 1); // set C, bit 29 + MSR_nzvc(s3); // load CC into ARM CF + IFX(X_AF|X_OF|X_CF) { + MVNw_REG(s4, s1); + } + SBCw_REG(s1, s1, s5); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF|X_CF) { + ORRw_REG(s3, s4, s5); // s3 = ~op1 | op2 + ANDw_REG(s4, s4, s5); // s4 = ~op1 & op2 + ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res + ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) + IFX(X_CF) { + LSRw(s4, s3, 7); + BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 + } + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 6); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SBB16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRH_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_sbb16); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM + MRS_nzvc(s3); + BFIx(s3, s4, 29, 1); // set C, bit 29 + MSR_nzvc(s3); // load CC into ARM CF + IFX(X_AF|X_OF|X_CF) { + MVNw_REG(s4, s1); + } + SBCw_REG(s1, s1, s2); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF|X_CF) { + ORRw_REG(s3, s4, s2); // s3 = ~op1 | op2 + ANDw_REG(s4, s2, s4); // s4 = ~op1 & op2 + ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res + ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) + IFX(X_CF) { + LSRw(s4, s3, 15); + BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x8000 + } + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 14); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 + } + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 15); //mask=0xffff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 15); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SBB16 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +//void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) +//{ +// IFX(X_PEND) { +// MOVW(s3, c); +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); +// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); +// SET_DF(s3, d_sbb16); +// } else IFX(X_ALL) { +// SET_DFNONE(s3); +// } +// IFX(X_AF|X_OF|X_CF) { +// MVN_REG_LSL_IMM5(s4, s1, 0); +// } +// XOR_IMM8(s3, xFlags, 1); // invert CC because it's reverted for SUB on ARM +// MOVS_REG_LSR_IMM5(s3, s3, 1); // load into ARM CF +// if(c>=0 && c<255) { +// SBC_IMM8(s1, s1, c); +// } else { +// MOVW(s3, c); +// SBC_REG_LSL_IMM5(s1, s1, s3, 0); +// } +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// IFX(X_AF|X_OF|X_CF) { +// if(c>=0 && c<256) { +// ORR_IMM8(s3, s4, c, 0); // s3 = ~op1 | op2 +// AND_IMM8(s4, s4, c); // s4 = ~op1 & op2 +// } else { +// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = ~op1 | op2 +// PUSH(xSP, 1<> 14) ^ ((bc>>14)>>1)) & 1 +// } +// } +// IFX(X_ZF) { +// UXTH(s1, s1, 0); +// TSTS_REG_LSL_IMM5(s1, s1, 0); +// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 14) ^ ((bc>>14)>>1)) & 1 + } + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 15); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit NEG8 instruction, from s1, store result in s1 using s3 and s4 as scratch +void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) +{ + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_neg8); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_CF) { + TSTw_REG(s1, s1); + CSETw(s4, cNE); + BFIw(xFlags, s4, F_CF, 1); + } + IFX(X_AF|X_OF) { + MOVw_REG(s3, s1); + } + NEGSw_REG(s1, s1); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF|X_OF) { + ORRw_REG(s3, s3, s1); // bc = op1 | res + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 6); + EORx_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c new file mode 100755 index 00000000..51903720 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c @@ -0,0 +1,449 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "../tools/bridge_private.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +// emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3 and s4 as scratch. s3 can be same as s2 +void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + int64_t j64; + MAYUSE(j64); + + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_shl64:d_shl32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + IFX(F_OF) { + CMPSxw_U12(s2, 0); + IFX(F_OF) { + Bcond(cNE, +8); + BFCx(xFlags, F_OF, 1); + } + IFX(X_PEND) { + Bcond(cNE, +8); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + B_NEXT(cEQ); + } + IFX(X_CF | X_OF) { + MOV32w(s4, rex.w?64:32); + SUBxw_REG(s4, s4, s2); + LSRxw_REG(s4, s1, s4); + BFIw(xFlags, s4, F_CF, 1); + } + LSLxw_REG(s1, s1, s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTxw_REG(s1, s1); + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + CMPSxw_U12(s2, 1); // if s2==1 + IFX(X_SF) {} else {LSRxw(s4, s1, (rex.w)?63:31);} + EORxw_REG(s4, s4, xFlags); // CF is set if OF is asked + CSELw(s4, s4, wZR, cEQ); + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_shl64:d_shl32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(c==0) { + IFX(F_OF) { + BFCx(xFlags, F_OF, 1); + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + IFX(X_CF|X_OF) { + LSRxw(s3, s1, (rex.w?64:32)-c); + BFIxw(xFlags, s3, F_CF, 1); + } + LSLxw(s1, s1, c); + + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTxw_REG(s1, s1); + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + if(c==1) { + IFX(X_SF) {} else {LSRxw(s4, s1, (rex.w)?63:31);} + EORxw_REG(s4, s4, xFlags); // CF is set if OF is asked + BFIw(xFlags, s4, F_OF, 1); + } else { + BFCw(xFlags, F_OF, 1); + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHR32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, s2 can be same as s3 +void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + int64_t j64; + MAYUSE(j64); + + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_shr64:d_shr32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + IFX(X_ALL) { + CMPSxw_U12(s2, 0); //if(!c) + IFX(X_PEND) { + Bcond(cNE, +12); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + B_NEXT(cEQ); + } + IFX(X_CF) { + SUBxw_U12(s3, s2, 1); + LSRxw_REG(s3, s1, s3); + BFIw(xFlags, s3, 0, 1); + } + LSRxw_REG(s1, s1, s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTxw_REG(s1, s1); + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + CMPSxw_U12(s2, 1); // if s2==1 + Bcond(cNE, 4+3*4); + if(rex.w) { + LSRx(s4, s1, 62); + } else { + LSRw(s4, s1, 30); + } + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_shr64:d_shr32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + IFX(X_CF) { + if(c>1) { + LSRxw(s3, s1, c-1); + } + BFIw(xFlags, (c>1)?s3:s1, 0, 1); + } + LSRxw(s1, s1, c); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTxw_REG(s1, s1); + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + if(c==1) { + LSRxw(s4, s1, rex.w?62:30); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_sar64:d_sar32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + IFX(X_CF) { + if(c>1) { + ASRxw(s3, s1, c-1); + } + BFIw(xFlags, (c>1)?s3:s1, 0, 1); + } + ASRxw(s1, s1, c); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTw_REG(s1, s1); + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + MAYUSE(rex); MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_rol32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + RORxw(s1, s1, (rex.w?64:32)-c); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF) { + BFIw(xFlags, s1, F_CF, 1); + } + IFX(X_OF) { + if(c==1) { + ADDxw_REG_LSR(s3, s1, s1, rex.w?63:31); + BFIw(xFlags, s3, F_OF, 1); + } + } +} + +// emit ROR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_ror64:d_ror32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + RORxw(s1, s1, c); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF) { + LSRxw(s3, s1, rex.w?63:31); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_OF) { + if(c==1) { + LSRxw(s3, s1, rex.w?62:30); + EORxw_REG_LSR(s3, s3, s3, 1); + BFIw(xFlags, s4, F_OF, 1); + } + } +} + +// emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch +void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4) +{ + c&=(rex.w?0x3f:0x1f); + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + // same flags computation as with shl64/shl32 + SET_DF(s4, rex.w?d_shl64:d_shl32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + IFX(X_CF) { + if(c>1) { + LSRxw(s3, s1, c-1); + } + BFIw(xFlags, (c>1)?s3:s1, 0, 1); + } + LSRxw(s3, s1, c); + ORRxw_REG_LSL(s1, s3, s2, (rex.w?64:32)-c); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTxw_REG(s1, s1); + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + if(c==1) { + LSRxw(s4, s1, rex.w?62:30); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4) +{ + c&=(rex.w?0x3f:0x1f); + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + // same flags computation as with shl64/shl32 + SET_DF(s4, rex.w?d_shl64:d_shl32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(c==0) { + IFX(F_OF) { + BFCx(xFlags, F_OF, 1); + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + IFX(X_CF|X_OF) { + LSRxw(s3, s1, (rex.w?64:32)-c); + BFIxw(xFlags, s3, F_CF, 1); + } + LSLxw(s3, s1, c); + ORRxw_REG_LSR(s1, s3, s2, (rex.w?64:32)-c); + + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTxw_REG(s1, s1); + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + if(c==1) { + UBFXxw(s3, s1, rex.w?63:31, 1); + EORxw_REG(s3, s3, xFlags); // CF is set if OF is asked + BFIw(xFlags, s3, F_OF, 1); + } else { + BFCw(xFlags, F_OF, 1); + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} diff --git a/src/dynarec/arm64/dynarec_arm64_emit_tests.c b/src/dynarec/arm64/dynarec_arm64_emit_tests.c new file mode 100755 index 00000000..301ab2f2 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_emit_tests.c @@ -0,0 +1,374 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "../tools/bridge_private.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +// emit CMP32 instruction, from cmp s1, s2, using s3 and s4 as scratch +void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) +{ + MAYUSE(s1); MAYUSE(s2); + IFX_PENDOR0 { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_cmp64:d_cmp32); + } else { + SET_DFNONE(s4); + } + IFX(X_AF) { + ORNxw_REG(s3, s2, s1); // s3 = ~op1 | op2 + BICxw(s4, s2, s1); // s4 = ~op1 & op2 + } + SUBSxw_REG(s5, s1, s2); // res = s1 - s2 + IFX_PENDOR0 { + STRxw_U12(s5, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF) { + ANDxw_REG(s3, s3, s5); // s3 = (~op1 | op2) & res + ORRxw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) + LSRxw(s4, s3, 3); + BFIx(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_CF) { + // inverted carry + CSETw(s4, cCC); + BFIw(xFlags, s4, F_CF, 1); + } + IFX(X_OF) { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_SF) { + LSRxw(s3, s5, (rex.w)?63:31); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s5, s3, s4); + } +} + +// emit CMP32 instruction, from cmp s1 , 0, using s3 and s4 as scratch +void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4) +{ + IFX_PENDOR0 { + MOV64xw(s4, 0); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s4, xEmu, offsetof(x64emu_t, op2)); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + SET_DF(s4, rex.w?d_cmp64:d_cmp32); + } else { + SET_DFNONE(s4); + } + SUBSxw_U12(s3, s1, 0); // res = s1 - 0 + // and now the tricky ones (and mostly unused), PF and AF + // bc = (res & (~d | s)) | (~d & s) => is 0 here... + IFX(X_OF|X_AF) { + MOV32w(s4, (1<> 14) ^ ((bc>>14)>>1)) & 1 + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s5, s3, s4); + } +} + +// emit CMP16 instruction, from cmp s1 , #0, using s3 and s4 as scratch +void emit_cmp16_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) +{ + IFX_PENDOR0 { + MOV32w(s3, 0); + STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRH_U12(s3, xEmu, offsetof(x64emu_t, op2)); + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + SET_DF(s3, d_cmp16); + } else { + SET_DFNONE(s3); + } + // bc = (res & (~d | s)) | (~d & s) = 0 + IFX(X_CF | X_AF | X_OF) { + MOV32w(s3, (1<> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s5, s3, s4); + } +} +// emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch +void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) +{ + IFX_PENDOR0 { + STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); + MOV32w(s4, 0); + STRB_U12(s4, xEmu, offsetof(x64emu_t, op2)); + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + SET_DF(s3, d_cmp8); + } else { + SET_DFNONE(s4); + } + // bc = (res & (~d | s)) | (~d & s) = 0 + IFX(X_CF | X_AF | X_OF) { + MOV32w(s3, (1<x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0) + IFX(X_PF) { + ANDw_mask(s3, s3, 0b011011, 0b000010); // 0xE0 + LSRw(s3, s3, 5); + MOV64x(s4, (uintptr_t)GetParityTab()); + LDRw_REG_LSL2(s4, s4, s3); + ANDw_mask(s3, s1, 0, 0b000100); // 0x1f + LSRw_REG(s4, s4, s3); + MVNx_REG(s4, s4); + BFIw(xFlags, s4, F_PF, 1); + } +} + +// emit TEST16 instruction, from test s1, s2, using s3 and s4 as scratch +void emit_test16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + MAYUSE(s1); MAYUSE(s2); + IFX_PENDOR0 { + SET_DF(s3, d_tst16); + } else { + SET_DFNONE(s4); + } + IFX(X_OF) { + BFCw(xFlags, F_OF, 1); + } + IFX(X_CF) { + BFCw(xFlags, F_CF, 1); + } + ANDSw_REG(s5, s1, s2); // res = s1 & s2 + IFX_PENDOR0 { + STRH_U12(s5, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s4, s5, 15); + BFIw(xFlags, s4, F_SF, 1); + } + // PF: (((emu->x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0) + IFX(X_PF) { + emit_pf(dyn, ninst, s5, s3, s4); + } +} + +// emit TEST8 instruction, from test s1, s2, using s3 and s4 as scratch +void emit_test8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + MAYUSE(s1); MAYUSE(s2); + IFX_PENDOR0 { + SET_DF(s3, d_tst8); + } else { + SET_DFNONE(s4); + } + IFX(X_OF) { + BFCw(xFlags, F_OF, 1); + } + IFX(X_CF) { + BFCw(xFlags, F_CF, 1); + } + ANDSw_REG(s5, s1, s2); // res = s1 & s2 + IFX_PENDOR0 { + STRB_U12(s5, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s4, s5, 7); + BFIw(xFlags, s4, F_SF, 1); + } + // PF: (((emu->x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0) + IFX(X_PF) { + emit_pf(dyn, ninst, s5, s3, s4); + } +} diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c new file mode 100644 index 00000000..b127b0b3 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -0,0 +1,939 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop; + uint8_t gd, ed, u8; + uint8_t wback, wb1, wb2, gb1, gb2; + int32_t i32; + int64_t i64, j64; + int64_t fixedaddress; + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(wb1); + MAYUSE(wb2); + MAYUSE(j64); + + while((opcode==0xF2) || (opcode==0xF3)) { + rep = opcode-0xF1; + opcode = F8; + } + // REX prefix before the F0 are ignored + rex.rex = 0; + while(opcode>=0x40 && opcode<=0x4f) { + rex.rex = opcode; + opcode = F8; + } + + switch(opcode) { + case 0x00: + INST_NAME("LOCK ADD Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + DMB_ISH(); + GETGB(x2); + if((nextop&0xC0)==0xC0) { + if(rex.rex) { + wback = xRAX + (nextop&7) + (rex.b<<3); + wb2 = 0; + } else { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xRAX+(wback&3); + } + UBFXw(x1, wback, wb2*8, 8); + emit_add8(dyn, ninst, x1, x2, x4, x3); + BFIx(wback, x1, wb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 0); + MARKLOCK; + LDAXRB(x1, wback); + emit_add8(dyn, ninst, x1, x2, x4, x3); + STLXRB(x4, x1, wback); + CBNZx_MARKLOCK(x4); + } + DMB_ISH(); + break; + case 0x01: + INST_NAME("LOCK ADD Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + DMB_ISH(); + if((nextop&0xC0)==0xC0) { + ed = xRAX+(nextop&7)+(rex.b<<3); + emit_add32(dyn, ninst, rex, ed, gd, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); + MARKLOCK; + LDAXRxw(x1, wback); + emit_add32(dyn, ninst, rex, x1, gd, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + DMB_ISH(); + break; + + case 0x09: + INST_NAME("LOCK OR Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + DMB_ISH(); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + emit_or32(dyn, ninst, rex, ed, gd, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); + MARKLOCK; + LDAXRxw(x1, wback); + emit_or32(dyn, ninst, rex, x1, gd, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + DMB_ISH(); + break; + + case 0x0F: + nextop = F8; + switch(nextop) { + + case 0xB1: + INST_NAME("LOCK CMPXCHG Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + DMB_ISH(); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);} + MOVxw_REG(x1, ed); // save value + CMPSxw_REG(xRAX, x1); + B_MARK2(cNE); + MOVxw_REG(ed, gd); + MARK2; + MOVxw_REG(xRAX, x1); + B_NEXT_nocond; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK3(cNE); + // Aligned version + MARKLOCK; + LDAXRxw(x1, wback); + CMPSxw_REG(xRAX, x1); + B_MARK(cNE); + // EAX == Ed + STLXRxw(x4, gd, wback); + CBNZx_MARKLOCK(x4); + // done + B_MARK_nocond; + // Unaligned version + MARK3; + LDRxw_U12(x1, wback, 0); + LDAXRB(x3, wback); // dummy read, to arm the write... + CMPSxw_REG(xRAX, x1); + B_MARK(cNE); + // EAX == Ed + STLXRB(x4, gd, wback); + CBNZx_MARK3(x4); + STRxw_U12(gd, wback, 0); + MARK; + // Common part (and fallback for EAX != Ed) + UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} + MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what + } + DMB_ISH(); + break; + + case 0xC1: + INST_NAME("LOCK XADD Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + DMB_ISH(); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + MOVxw_REG(x1, ed); + MOVxw_REG(ed, gd); + MOVxw_REG(gd, x1); + emit_add32(dyn, ninst, rex, ed, gd, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); // unaligned + MARKLOCK; + LDAXRxw(x1, wback); + ADDxw_REG(x4, x1, gd); + STLXRxw(x3, x4, wback); + CBNZx_MARKLOCK(x3); + B_MARK2_nocond; + MARK; + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); + ADDxw_REG(x4, x1, gd); + STLXRB(x3, x4, wback); + CBNZx_MARK(x3); + STRxw_U12(x4, wback, 0); + MARK2; + IFX(X_ALL|X_PEND) { + MOVxw_REG(x2, x1); + emit_add32(dyn, ninst, rex, x2, gd, x3, x4); + } + MOVxw_REG(gd, x1); + } + DMB_ISH(); + break; + + case 0xC7: + INST_NAME("LOCK CMPXCHG8B Gq, Eq"); + SETFLAGS(X_ZF, SF_SUBSET); + nextop = F8; + addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, 0, 0, rex, 0, 0); + DMB_ISH(); + MARKLOCK; + LDAXPxw(x2, x3, wback); + CMPSxw_REG(xRAX, x2); + B_MARK(cNE); // EAX != Ed[0] + CMPSxw_REG(xRDX, x3); + B_MARK(cNE); // EDX != Ed[1] + STLXPxw(x4, xRBX, xRCX, wback); + CBNZx_MARKLOCK(x4); + MOV32w(x1, 1); + B_MARK3_nocond; + MARK; + MOVxw_REG(xRAX, x2); + MOVxw_REG(xRDX, x3); + MOV32w(x1, 0); + MARK3; + DMB_ISH(); + BFIw(xFlags, x1, F_ZF, 1); + break; + + default: + DEFAULT; + } + break; + + case 0x29: + INST_NAME("LOCK SUB Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + DMB_ISH(); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + emit_sub32(dyn, ninst, rex, ed, gd, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); + MARKLOCK; + LDAXRxw(x1, wback); + emit_sub32(dyn, ninst, rex, x1, gd, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + DMB_ISH(); + break; + + case 0x66: + opcode = F8; + switch(opcode) { + case 0x09: + INST_NAME("LOCK OR Ew, Gw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x5); + DMB_ISH(); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + UXTHw(x6, ed); + emit_or16(dyn, ninst, x6, x5, x3, x4); + BFIx(ed, x6, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); + MARKLOCK; + LDAXRH(x1, wback); + emit_or16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + DMB_ISH(); + break; + + case 0x81: + case 0x83: + nextop = F8; + DMB_ISH(); + switch((nextop>>3)&7) { + case 0: //ADD + if(opcode==0x81) { + INST_NAME("LOCK ADD Ew, Iw"); + } else { + INST_NAME("LOCK ADD Ew, Iw"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i32 = F16S; else i32 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV32w(x5, i32); + UXTHw(x6, ed); + emit_add16(dyn, ninst, x6, x5, x3, x4); + BFIx(ed, x6, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); + if(opcode==0x81) i32 = F32S; else i32 = F8S; + MOV32w(x5, i32); + TSTx_mask(wback, 1, 0, 0); // mask=1 + B_MARK(cNE); + MARKLOCK; + LDAXRH(x1, wback); + emit_add16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + B_NEXT_nocond; + MARK; // unaligned! also, not enough + LDRH_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIw(x1, x4, 0, 8); // re-inject + emit_add16(dyn, ninst, x1, x5, x3, x4); + STLXRB(x3, x1, wback); + CBNZx_MARK(x3); + STRH_U12(x1, wback, 0); // put the whole value + } + break; + case 1: //OR + if(opcode==0x81) {INST_NAME("LOCK OR Ew, Iw");} else {INST_NAME("LOCK OR Ew, Iw");} + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i32 = F16S; else i32 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV32w(x5, i32); + UXTHw(x6, ed); + emit_or16(dyn, ninst, x6, x5, x3, x4); + BFIx(ed, x6, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); + if(opcode==0x81) i32 = F16S; else i32 = F8S; + MOV32w(x5, i32); + MARKLOCK; + LDAXRH(x1, wback); + emit_or16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 2: //ADC + if(opcode==0x81) {INST_NAME("LOCK ADC Ew, Iw");} else {INST_NAME("LOCK ADC Ew, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i32 = F16S; else i32 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV32w(x5, i32); + UXTHw(x6, ed); + emit_adc16(dyn, ninst, x6, x5, x3, x4); + BFIx(ed, x6, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); + if(opcode==0x81) i32 = F16S; else i32 = F8S; + MOV32w(x5, i32); + MARKLOCK; + LDAXRH(x1, wback); + emit_adc16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 3: //SBB + if(opcode==0x81) {INST_NAME("LOCK SBB Ew, Iw");} else {INST_NAME("LOCK SBB Ew, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i32 = F16S; else i32 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV32w(x5, i32); + UXTHw(x6, ed); + emit_sbb16(dyn, ninst, x6, x5, x3, x4); + BFIx(ed, x6, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); + if(opcode==0x81) i32 = F16S; else i32 = F8S; + MOV32w(x5, i32); + MARKLOCK; + LDAXRH(x1, wback); + emit_sbb16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 4: //AND + if(opcode==0x81) {INST_NAME("LOCK AND Ew, Iw");} else {INST_NAME("LOCK AND Ew, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i32 = F16S; else i32 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV32w(x5, i32); + UXTHw(x6, ed); + emit_and16(dyn, ninst, x6, x5, x3, x4); + BFIx(ed, x6, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); + if(opcode==0x81) i32 = F16S; else i32 = F8S; + MOV32w(x5, i32); + MARKLOCK; + LDAXRH(x1, wback); + emit_and16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 5: //SUB + if(opcode==0x81) {INST_NAME("LOCK SUB Ew, Iw");} else {INST_NAME("LOCK SUB Ew, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i32 = F16S; else i32 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV32w(x5, i32); + UXTHw(x6, ed); + emit_sub16(dyn, ninst, x6, x5, x3, x4); + BFIx(ed, x6, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); + if(opcode==0x81) i32 = F16S; else i32 = F8S; + MOV32w(x5, i32); + TSTx_mask(wback, 1, 0, 0); // mask=1 + B_MARK(cNE); + MARKLOCK; + LDAXRH(x1, wback); + emit_sub16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + B_NEXT_nocond; + MARK; // unaligned! also, not enough + LDRH_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIw(x1, x4, 0, 8); // re-inject + emit_sub16(dyn, ninst, x1, x5, x3, x4); + STLXRB(x3, x1, wback); + CBNZx_MARK(x3); + STRH_U12(x1, wback, 0); // put the whole value + } + break; + case 6: //XOR + if(opcode==0x81) {INST_NAME("LOCK XOR Ew, Iw");} else {INST_NAME("LOCK XOR Ew, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i32 = F16S; else i32 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV32w(x5, i32); + UXTHw(x6, ed); + emit_xor16(dyn, ninst, x6, x5, x3, x4); + BFIx(ed, x6, 0, 16); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); + if(opcode==0x81) i32 = F16S; else i32 = F8S; + MOV32w(x5, i32); + MARKLOCK; + LDAXRH(x1, wback); + emit_xor16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 7: //CMP + if(opcode==0x81) {INST_NAME("(LOCK) CMP Ew, Iw");} else {INST_NAME("(LOCK) CMP Ew, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x6, (opcode==0x81)?2:1); + (void)wb1; + // No need to LOCK, this is readonly + if(opcode==0x81) i32 = F16S; else i32 = F8S; + if(i32) { + MOV32w(x5, i32); + UXTHw(x6, ed); + emit_cmp16(dyn, ninst, x6, x5, x3, x4, x6); + BFIx(ed, x6, 0, 16); + } else { + emit_cmp16_0(dyn, ninst, ed, x3, x4); + } + break; + } + DMB_ISH(); + break; + default: + DEFAULT; + } + break; + + case 0x80: + nextop = F8; + DMB_ISH(); + switch((nextop>>3)&7) { + case 0: //ADD + INST_NAME("ADD Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + GETEB(x1, 1); + u8 = F8; + emit_add8c(dyn, ninst, x1, u8, x2, x4); + wb1 = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + wb1 = 1; + MARKLOCK; + LDAXRB(x1, wback); + emit_add8c(dyn, ninst, x1, u8, x2, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + EBBACK; + break; + case 1: //OR + INST_NAME("OR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + GETEB(x1, 1); + u8 = F8; + emit_or8c(dyn, ninst, x1, u8, x2, x4); + wb1 = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + wb1 = 1; + MARKLOCK; + LDAXRB(x1, wback); + emit_or8c(dyn, ninst, x1, u8, x2, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + EBBACK; + break; + case 2: //ADC + INST_NAME("ADC Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + GETEB(x1, 1); + u8 = F8; + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); + wb1 = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + wb1 = 1; + MARKLOCK; + LDAXRB(x1, wback); + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + EBBACK; + break; + case 3: //SBB + INST_NAME("SBB Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + GETEB(x1, 1); + u8 = F8; + emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); + wb1 = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + wb1 = 1; + MARKLOCK; + LDAXRB(x1, wback); + emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + EBBACK; + break; + case 4: //AND + INST_NAME("AND Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + GETEB(x1, 1); + u8 = F8; + emit_and8c(dyn, ninst, x1, u8, x2, x4); + wb1 = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + wb1 = 1; + MARKLOCK; + LDAXRB(x1, wback); + emit_and8c(dyn, ninst, x1, u8, x2, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + EBBACK; + break; + case 5: //SUB + INST_NAME("SUB Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + GETEB(x1, 1); + u8 = F8; + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); + wb1 = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + wb1 = 1; + MARKLOCK; + LDAXRB(x1, wback); + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + EBBACK; + break; + case 6: //XOR + INST_NAME("XOR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + GETEB(x1, 1); + u8 = F8; + emit_xor8c(dyn, ninst, x1, u8, x2, x4); + wb1 = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + wb1 = 1; + MARKLOCK; + LDAXRB(x1, wback); + emit_xor8c(dyn, ninst, x1, u8, x2, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + EBBACK; + break; + case 7: //CMP + INST_NAME("CMP Eb, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + if(u8) { + MOV32w(x2, u8); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + } else { + emit_cmp8_0(dyn, ninst, x1, x3, x4); + } + break; + default: + DEFAULT; + } + DMB_ISH(); + break; + case 0x81: + case 0x83: + nextop = F8; + DMB_ISH(); + switch((nextop>>3)&7) { + case 0: //ADD + if(opcode==0x81) { + INST_NAME("LOCK ADD Ed, Id"); + } else { + INST_NAME("LOCK ADD Ed, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i64 = F32S; else i64 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(x5, i64); + emit_add32(dyn, ninst, rex, ed, x5, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); + MARKLOCK; + LDAXRxw(x1, wback); + emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + DMB_ISH(); + B_NEXT_nocond; + MARK; // unaligned! also, not enough + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); // re-inject + emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + STLXRB(x3, x1, wback); + CBNZx_MARK(x3); + STRxw_U12(x1, wback, 0); // put the whole value + } + break; + case 1: //OR + if(opcode==0x81) {INST_NAME("LOCK OR Ed, Id");} else {INST_NAME("LOCK OR Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i64 = F32S; else i64 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(x5, i64); + emit_or32(dyn, ninst, rex, ed, x5, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MOV64xw(x5, i64); + MARKLOCK; + LDAXRxw(x1, wback); + emit_or32(dyn, ninst, rex, x1, x5, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 2: //ADC + if(opcode==0x81) {INST_NAME("LOCK ADC Ed, Id");} else {INST_NAME("LOCK ADC Ed, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i64 = F32S; else i64 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(x5, i64); + emit_adc32(dyn, ninst, rex, ed, x5, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MOV64xw(x5, i64); + MARKLOCK; + LDAXRxw(x1, wback); + emit_adc32(dyn, ninst, rex, x1, x5, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 3: //SBB + if(opcode==0x81) {INST_NAME("LOCK SBB Ed, Id");} else {INST_NAME("LOCK SBB Ed, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i64 = F32S; else i64 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(x5, i64); + emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MOV64xw(x5, i64); + MARKLOCK; + LDAXRxw(x1, wback); + emit_sbb32(dyn, ninst, rex, x1, x5, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 4: //AND + if(opcode==0x81) {INST_NAME("LOCK AND Ed, Id");} else {INST_NAME("LOCK AND Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i64 = F32S; else i64 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(x5, i64); + emit_and32(dyn, ninst, rex, ed, x5, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MOV64xw(x5, i64); + MARKLOCK; + LDAXRxw(x1, wback); + emit_and32(dyn, ninst, rex, x1, x5, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 5: //SUB + if(opcode==0x81) {INST_NAME("LOCK SUB Ed, Id");} else {INST_NAME("LOCK SUB Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i64 = F32S; else i64 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(x5, i64); + emit_sub32(dyn, ninst, rex, ed, x5, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); + MARKLOCK; + LDAXRxw(x1, wback); + emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + DMB_ISH(); + B_NEXT_nocond; + MARK; // unaligned! also, not enough + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); // re-inject + emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + STLXRB(x3, x1, wback); + CBNZx_MARK(x3); + STRxw_U12(x1, wback, 0); // put the whole value + } + break; + case 6: //XOR + if(opcode==0x81) {INST_NAME("LOCK XOR Ed, Id");} else {INST_NAME("LOCK XOR Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i64 = F32S; else i64 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV64xw(x5, i64); + emit_xor32(dyn, ninst, rex, ed, x5, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MOV64xw(x5, i64); + MARKLOCK; + LDAXRxw(x1, wback); + emit_xor32(dyn, ninst, rex, x1, x5, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } + break; + case 7: //CMP + if(opcode==0x81) {INST_NAME("(LOCK) CMP Ed, Id");} else {INST_NAME("(LOCK) CMP Ed, Ib");} + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED((opcode==0x81)?4:1); + // No need to LOCK, this is readonly + if(opcode==0x81) i64 = F32S; else i64 = F8S; + if(i64) { + MOV64xw(x5, i64); + emit_cmp32(dyn, ninst, rex, ed, x5, x3, x4, x6); + } else { + emit_cmp32_0(dyn, ninst, rex, ed, x3, x4); + } + break; + } + DMB_ISH(); + break; + + case 0x87: + INST_NAME("LOCK XCHG Ed, Gd"); + nextop = F8; + if(MODREG) { + GETGD; + GETED(0); + MOVxw_REG(x1, gd); + MOVxw_REG(gd, ed); + MOVxw_REG(ed, x1); + } else { + GETGD; + DMB_ISH(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); + TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); + MARKLOCK; + LDAXRxw(x1, ed); + STLXRxw(x3, gd, ed); + CBNZx_MARKLOCK(x3); + B_MARK2_nocond; + MARK; + LDRxw_U12(x1, ed, 0); + STRxw_U12(gd, ed, 0); + MARK2; + DMB_ISH(); + MOVxw_REG(gd, x1); + } + break; + + case 0xFF: + nextop = F8; + switch((nextop>>3)&7) + { + case 0: // INC Ed + INST_NAME("LOCK INC Ed"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + DMB_ISH(); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + emit_inc32(dyn, ninst, rex, ed, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); // unaligned + MARKLOCK; + LDAXRxw(x1, wback); + emit_inc32(dyn, ninst, rex, x1, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + B_NEXT_nocond; + MARK; + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); // re-inject + emit_inc32(dyn, ninst, rex, x1, x3, x4); + STLXRB(x3, x1, wback); + CBNZw_MARK(x3); + STRxw_U12(x1, wback, 0); + } + DMB_ISH(); + break; + case 1: //DEC Ed + INST_NAME("LOCK DEC Ed"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + DMB_ISH(); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + emit_dec32(dyn, ninst, rex, ed, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); // unaligned + MARKLOCK; + LDAXRxw(x1, wback); + emit_dec32(dyn, ninst, rex, x1, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + B_NEXT_nocond; + MARK; + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); // re-inject + emit_dec32(dyn, ninst, rex, x1, x3, x4); + STLXRB(x3, x1, wback); + CBNZw_MARK(x3); + STRxw_U12(x1, wback, 0); + } + DMB_ISH(); + break; + default: + DEFAULT; + } + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c new file mode 100755 index 00000000..cf047a10 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -0,0 +1,367 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +// Get Ex as a double, not a quad (warning, x2 get used) +#define GETEX(a, D) \ + if(MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ + } else { \ + a = fpu_get_scratch(dyn); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \ + VLDR64_U12(a, ed, fixedaddress); \ + } + +#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) + +#define GETGX(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg(dyn, ninst, x1, gd) + +#define GETGX_empty(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg_empty(dyn, ninst, x1, gd) + +#define GETGM(a) \ + gd = ((nextop&0x38)>>3); \ + a = mmx_get_reg(dyn, ninst, x1, gd) + +uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) +{ + (void)ip; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop; + uint8_t gd, ed; + uint8_t wback; + uint8_t u8; + uint64_t u64, j64; + int v0, v1; + int q0; + int d0, d1; + int64_t fixedaddress; + +#ifdef PRECISE_CVT + int j32; + MAYUSE(j32); +#endif + MAYUSE(d0); + MAYUSE(d1); + MAYUSE(q0); + MAYUSE(v0); + MAYUSE(v1); + + switch(opcode) { + + case 0x10: + INST_NAME("MOVSD Gx, Ex"); + nextop = F8; + GETG; + if(MODREG) { + ed = (nextop&7)+ (rex.b<<3); + v0 = sse_get_reg(dyn, ninst, x1, gd); + d0 = sse_get_reg(dyn, ninst, x1, ed); + VMOVeD(v0, 0, d0, 0); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VLDR64_U12(v0, ed, fixedaddress); // upper part reseted + } + break; + case 0x11: + INST_NAME("MOVSD Ex, Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + ed = (nextop&7)+ (rex.b<<3); + d0 = sse_get_reg(dyn, ninst, x1, ed); + VMOVeD(d0, 0, v0, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VSTR64_U12(v0, ed, fixedaddress); + } + break; + case 0x12: + INST_NAME("MOVDDUP Gx, Ex"); + nextop = F8; + GETG; + if(MODREG) { + d0 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VMOVeD(v0, 0, d0, 0); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VLDR64_U12(v0, ed, fixedaddress); + } + VMOVeD(v0, 1, v0, 0); + break; + + case 0x2A: + INST_NAME("CVTSI2SD Gx, Ed"); + nextop = F8; + GETGX(v0); + GETED(0); + d1 = fpu_get_scratch(dyn); + if(rex.w) { + SCVTFDx(d1, ed); + } else { + SCVTFDw(d1, ed); + } + VMOVeD(v0, 0, d1, 0); + break; + + case 0x2C: + INST_NAME("CVTTSD2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEX(q0, 0); + FCVTZSxwD(gd, q0); + break; + case 0x2D: + INST_NAME("CVTSD2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEX(q0, 0); + #ifdef PRECISE_CVT + LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr)); + UBFXx(x1, x1, 13, 2); // extract round requested + LSLx_REG(x1, x1, 3); + // Construct a "switch case", with each case 2 instructions, so 8 bytes + ADR(xLR, GETMARK); + ADDx_REG(xLR, xLR, x1); + B(xLR); + MARK; + FCVTNSxwD(gd, q0); // 0: Nearest (even) + B_NEXT_nocond; + FCVTMSxwD(gd, q0); // 1: Toward -inf + B_NEXT_nocond; + FCVTPSxwD(gd, q0); // 2: Toward +inf + B_NEXT_nocond; + FCVTZSxwD(gd, q0); // 3: Toward 0 + #else + FCVTNSxwD(gd, q0); + #endif + break; + + + case 0x51: + INST_NAME("SQRTSD Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FSQRTD(d1, d0); + VMOVeD(v0, 0, d1, 0); + break; + + case 0x58: + INST_NAME("ADDSD Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FADDD(d1, v0, d0); // the high part of the vector is erased... + VMOVeD(v0, 0, d1, 0); + break; + case 0x59: + INST_NAME("MULSD Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FMULD(d1, v0, d0); + VMOVeD(v0, 0, d1, 0); + break; + case 0x5A: + INST_NAME("CVTSD2SS Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(d0, 0); + d1 = fpu_get_scratch(dyn); + FCVT_S_D(d1, d0); + VMOVeS(v0, 0, d1, 0); + break; + + case 0x5C: + INST_NAME("SUBSD Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FSUBD(d1, v0, d0); + VMOVeD(v0, 0, d1, 0); + break; + case 0x5D: + INST_NAME("MINSD Gx, Ex"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + GETEX(v1, 0); + // MINSD: if any input is NaN, or Ex[0] Gx[0] + #if 0 + d0 = fpu_get_scratch(dyn); + FMINNMD(d0, v0, v1); // NaN handling may be slightly different, is that a problem? + VMOVeD(v0, 0, d0, 0); // to not erase uper part + #else + FCMPD(v0, v1); + B_NEXT(cLS); //Less than or equal + VMOVeD(v0, 0, v1, 0); // to not erase uper part + #endif + break; + case 0x5E: + INST_NAME("DIVSD Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FDIVD(d1, v0, d0); + VMOVeD(v0, 0, d1, 0); + break; + case 0x5F: + INST_NAME("MAXSD Gx, Ex"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + GETEX(v1, 0); + // MAXSD: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0] + #if 0 + d0 = fpu_get_scratch(dyn); + FMAXNMD(d0, v0, v1); // NaN handling may be slightly different, is that a problem? + VMOVeD(v0, 0, d0, 0); // to not erase uper part + #else + FCMPD(v0, v1); + B_NEXT(cGE); //Greater than or equal + VMOVeD(v0, 0, v1, 0); // to not erase uper part + #endif + break; + + case 0x70: + INST_NAME("PSHUFLW Gx, Ex, Ib"); + nextop = F8; + GETEX(v1, 1); + GETGX(v0); + + u8 = F8; + // only low part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits + u64 = 0; + for (int i=0; i<4; ++i) { + u64 |= ((uint64_t)((u8>>(i*2))&3)*2+0)<<(i*16+0); + u64 |= ((uint64_t)((u8>>(i*2))&3)*2+1)<<(i*16+8); + } + MOV64x(x2, u64); + d0 = fpu_get_scratch(dyn); + VMOVQDfrom(d0, 0, x2); + VTBL1_8(d0, v1, d0); + VMOVeD(v0, 0, d0, 0); + if(v0!=v1) { + VMOVeD(v0, 1, v1, 1); + } + break; + + case 0x7C: + INST_NAME("HADDPS Gx, Ex"); + nextop = F8; + GETGX(v0); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + v1 = fpu_get_scratch(dyn); + VLDR128_U12(v1, ed, fixedaddress); + } + VFADDPQS(v0, v0, v1); + break; + + case 0xC2: + INST_NAME("CMPSD Gx, Ex, Ib"); + nextop = F8; + GETGX(v0); + GETEX(v1, 1); + u8 = F8; + FCMPD(v0, v1); + switch(u8&7) { + case 0: CSETMx(x2, cEQ); break; // Equal + case 1: CSETMx(x2, cCC); break; // Less than + case 2: CSETMx(x2, cLS); break; // Less or equal + case 3: CSETMx(x2, cVS); break; // NaN + case 4: CSETMx(x2, cNE); break; // Not Equal or unordered + case 5: CSETMx(x2, cCS); break; // Greater or equal or unordered + case 6: CSETMx(x2, cHI); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + case 7: CSETMx(x2, cVC); break; // not NaN + } + VMOVQDfrom(v0, 0, x2); + break; + + case 0xD0: + INST_NAME("ADDSUBPS Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + q0 = fpu_get_scratch(dyn); + static float addsubps[4] = {-1.f, 1.f, -1.f, 1.f}; + MAYUSE(addsubps); + TABLE64(x2, (uintptr_t)&addsubps); + VLDR128_U12(q0, x2, 0); + VFMLAQS(v0, v1, q0); + break; + + case 0xD6: + INST_NAME("MOVDQ2Q Gm, Ex"); + nextop = F8; + GETGM(v0); + GETEX(v1, 0); + VMOV(v0, v1); + break; + + case 0xE6: + INST_NAME("CVTPD2DQ Gx, Ex"); + nextop = F8; + GETEX(v1, 0); + GETGX_empty(v0); + u8 = sse_setround(dyn, ninst, x1, x2, x3); + VFRINTIDQ(v0, v1); + x87_restoreround(dyn, ninst, u8); + VFCVTNSQD(v0, v0); // convert double -> int64 + SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part + break; + + case 0xF0: + INST_NAME("LDDQU Gx,Ex"); + nextop = F8; + GETG; + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VMOVQ(v0, v1); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 7, rex, 0, 0); + VLDR128_U12(v0, ed, fixedaddress); + } + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c new file mode 100755 index 00000000..5f489168 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -0,0 +1,440 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +// Get Ex as a single, not a quad (warning, x2 get used) +#define GETEX(a, D) \ + if(MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ + } else { \ + a = fpu_get_scratch(dyn); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ + VLDR32_U12(a, ed, fixedaddress); \ + } + +#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) + +#define GETGX(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg(dyn, ninst, x1, gd) + +#define GETGX_empty(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg_empty(dyn, ninst, x1, gd) + +uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) +{ + (void)ip; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop, u8; + uint8_t gd, ed; + uint8_t wback; + uint64_t u64; + int v0, v1; + int q0, q1; + int d0, d1; + int64_t fixedaddress; + int64_t j64; + + MAYUSE(d0); + MAYUSE(d1); + MAYUSE(q0); + MAYUSE(q1); + MAYUSE(v0); + MAYUSE(v1); + MAYUSE(j64); + + switch(opcode) { + + case 0x10: + INST_NAME("MOVSS Gx, Ex"); + nextop = F8; + GETG; + if(MODREG) { + v0 = sse_get_reg(dyn, ninst, x1, gd); + q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + VMOVeS(v0, 0, q0, 0); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(v0, ed, fixedaddress); + } + break; + case 0x11: + INST_NAME("MOVSS Ex, Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + VMOVeS(q0, 0, v0, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VSTR32_U12(v0, ed, fixedaddress); + } + break; + case 0x12: + INST_NAME("MOVSLDUP Gx, Ex"); + nextop = F8; + if(MODREG) { + q1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + q1 = fpu_get_scratch(dyn); + VLDR128_U12(q1, ed, fixedaddress); + } + GETGX_empty(q0); + VTRNQ1_32(q0, q1, q1); + break; + + case 0x16: + INST_NAME("MOVSHDUP Gx, Ex"); + nextop = F8; + if(MODREG) { + q1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + q1 = fpu_get_scratch(dyn); + VLDR128_U12(q1, ed, fixedaddress); + } + GETGX_empty(q0); + VTRNQ2_32(q0, q1, q1); + break; + + case 0x1E: + INST_NAME("NOP / ENDBR32 / ENDBR64"); + nextop = F8; + FAKEED; + break; + + case 0x2A: + INST_NAME("CVTSI2SS Gx, Ed"); + nextop = F8; + GETGX(v0); + GETED(0); + d1 = fpu_get_scratch(dyn); + if(rex.w) { + SCVTFSx(d1, ed); + } else { + SCVTFSw(d1, ed); + } + VMOVeS(v0, 0, d1, 0); + break; + + case 0x2C: + INST_NAME("CVTTSS2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEX(d0, 0); + FCVTZSxwS(gd, d0); + break; + case 0x2D: + INST_NAME("CVTSS2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEX(q0, 0); + #ifdef PRECISE_CVT + LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr)); + UBFXx(x1, x1, 13, 2); // extract round requested + LSLx_REG(x1, x1, 3); + // Construct a "switch case", with each case 2 instructions, so 8 bytes + ADR(xLR, GETMARK); + ADDx_REG(xLR, xLR, x1); + B(xLR); + FCVTNSxwS(gd, q0); // 0: Nearest (even) + B_NEXT_nocond; + FCVTMSxwS(gd, q0); // 1: Toward -inf + B_NEXT_nocond; + FCVTPSxwS(gd, q0); // 2: Toward +inf + B_NEXT_nocond; + FCVTZSxwS(gd, q0); // 3: Toward 0 + #else + FCVTNSxwS(gd, q0); + #endif + break; + case 0x51: + INST_NAME("SQRTSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FSQRTS(d1, d0); + VMOVeS(v0, 0, d1, 0); + break; + case 0x52: + INST_NAME("RSQRTSS Gx, Ex"); + nextop = F8; + GETEX(v1, 0); + GETGX_empty(v0); + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + // so here: F32: Imm8 = abcd efgh that gives => aBbbbbbc defgh000 00000000 00000000 + // and want 1.0f = 0x3f800000 + // so 00111111 10000000 00000000 00000000 + // a = 0, b = 1, c = 1, d = 1, efgh=0 + // 0b01110000 + FMOVS_8(d0, 0b01110000); + FSQRTS(d1, v1); + FDIVS(d0, d0, d1); + VMOVeS(v0, 0, d0, 0); + break; + case 0x53: + INST_NAME("RCPSS Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + d0 = fpu_get_scratch(dyn); + FMOVS_8(d0, 0b01110000); //1.0f + FDIVS(d0, d0, v1); + VMOVeS(v0, 0, d0, 0); + break; + + case 0x58: + INST_NAME("ADDSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FADDS(d1, v0, d0); // the high part of the vector is erased... + VMOVeS(v0, 0, d1, 0); + break; + case 0x59: + INST_NAME("MULSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FMULS(d1, v0, d0); + VMOVeS(v0, 0, d1, 0); + break; + case 0x5A: + INST_NAME("CVTSS2SD Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + d0 = fpu_get_scratch(dyn); + FCVT_D_S(d0, v1); + VMOVeD(v0, 0, d0, 0); + break; + case 0x5B: + INST_NAME("CVTPS2DQ Gx, Ex"); + nextop = F8; + GETEX(d0, 0); + GETGX_empty(v0); + VFCVTZSQS(v0, d0); + break; + + case 0x5C: + INST_NAME("SUBSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FSUBS(d1, v0, d0); + VMOVeS(v0, 0, d1, 0); + break; + case 0x5D: + INST_NAME("MINSS Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + // MINSS: if any input is NaN, or Ex[0] Gx[0] + #if 0 + d0 = fpu_get_scratch(dyn); + FMINNMS(d0, v0, v1); // NaN handling may be slightly different, is that a problem? + VMOVeS(v0, 0, d0, 0); // to not erase uper part + #else + FCMPS(v0, v1); + B_NEXT(cLS); //Less than or equal + VMOVeS(v0, 0, v1, 0); // to not erase uper part + #endif + break; + case 0x5E: + INST_NAME("DIVSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FDIVS(d1, v0, d0); + VMOVeS(v0, 0, d1, 0); + break; + case 0x5F: + INST_NAME("MAXSS Gx, Ex"); + nextop = F8; + GETGX(v0); + GETEX(v1, 0); + // MAXSS: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0] + #if 0 + d0 = fpu_get_scratch(dyn); + FMAXNMS(d0, v0, v1); // NaN handling may be slightly different, is that a problem? + VMOVeS(v0, 0, d0, 0); // to not erase uper part + #else + FCMPS(v0, v1); + B_NEXT(cGE); //Greater than or equal + VMOVeS(v0, 0, v1, 0); // to not erase uper part + #endif + break; + + case 0x6F: + INST_NAME("MOVDQU Gx,Ex");// no alignment constraint on NEON here, so same as MOVDQA + nextop = F8; + GETG; + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVQ(v0, v1); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VLDR128_U12(v0, ed, fixedaddress); + } + break; + case 0x70: + INST_NAME("PSHUFHW Gx, Ex, Ib"); + nextop = F8; + GETEX(v1, 1); + GETGX(v0); + + u8 = F8; + // only high part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits + u64 = 0; + for (int i=0; i<4; ++i) { + u64 |= ((uint64_t)((u8>>(i*2))&3)*2+8)<<(i*16+0); + u64 |= ((uint64_t)((u8>>(i*2))&3)*2+9)<<(i*16+8); + } + MOV64x(x2, u64); + d0 = fpu_get_scratch(dyn); + VMOVQDfrom(d0, 0, x2); + VTBL1_8(d0, v1, d0); + VMOVeD(v0, 1, d0, 0); + if(v0!=v1) { + VMOVeD(v0, 0, v1, 0); + } + break; + + case 0x7E: + INST_NAME("MOVQ Gx, Ex"); + nextop = F8; + GETG; + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + FMOVD(v0, v1); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VLDR64_U12(v0, ed, fixedaddress); + } + break; + case 0x7F: + INST_NAME("MOVDQU Ex,Gx"); + nextop = F8; + GETG; + if(MODREG) { + v0 = sse_get_reg(dyn, ninst, x1, gd); + v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + VMOVQ(v1, v0); + } else { + v0 = sse_get_reg(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + } + break; + + case 0xBC: + INST_NAME("TZCNT Gd, Ed"); + SETFLAGS(X_CF|X_ZF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETED(0); + GETGD; + TSTxw_REG(ed, ed); + BFIw(xFlags, x1, F_CF, 1); // CF = is source 0? + RBITxw(x1, ed); // reverse + CLZxw(gd, x1); // x2 gets leading 0 == TZCNT + TSTxw_REG(gd, gd); + CSETw(x1, cEQ); + BFIw(xFlags, x1, F_ZF, 1); // ZF = is dest 0? + break; + case 0xBD: + INST_NAME("LZCNT Gd, Ed"); + SETFLAGS(X_CF|X_ZF, SF_SUBSET); + SET_DFNONE(x1); + nextop = F8; + GETED(0); + GETGD; + TSTxw_REG(ed, ed); + BFIw(xFlags, x1, F_CF, 1); // CF = is source 0? + CLZxw(gd, x1); // x2 gets leading 0 == LZCNT + TSTxw_REG(gd, gd); + CSETw(x1, cEQ); + BFIw(xFlags, x1, F_ZF, 1); // ZF = is dest 0? + break; + + case 0xC2: + INST_NAME("CMPSS Gx, Ex, Ib"); + nextop = F8; + GETGX(v0); + GETEX(v1, 1); + u8 = F8; + FCMPS(v0, v1); + switch(u8&7) { + case 0: CSETMw(x2, cEQ); break; // Equal + case 1: CSETMw(x2, cCC); break; // Less than + case 2: CSETMw(x2, cLS); break; // Less or equal + case 3: CSETMw(x2, cVS); break; // NaN + case 4: CSETMw(x2, cNE); break; // Not Equal or unordered + case 5: CSETMw(x2, cCS); break; // Greater or equal or unordered + case 6: CSETMw(x2, cHI); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + case 7: CSETMw(x2, cVC); break; // not NaN + } + VMOVQSfrom(v0, 0, x2); + break; + + case 0xD6: + INST_NAME("MOVQ2DQ Gx, Em"); + nextop = F8; + GETGX_empty(v0); + if(MODREG) { + v1 = mmx_get_reg(dyn, ninst, x1, (nextop&7)); + VEORQ(v0, v0, v0); // usefull? + VMOV(v0, v1); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + VLDR64_U12(v0, ed, fixedaddress); + } + break; + + case 0xE6: + INST_NAME("CVTDQ2PD Gx, Ex"); + nextop = F8; + GETEX(v1, 0); + GETGX_empty(v0); + d0 = fpu_get_scratch(dyn); + SXTL_32(v0, v1); + SCVTQFD(v0, v0); // there is only I64 -> Double vector conversion, not from i32 + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c new file mode 100755 index 00000000..32bea7d6 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -0,0 +1,466 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "emu/x87emu_private.h" +#include "x64trace.h" +#include "signals.h" +#include "dynarec_native.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "custommem.h" +#include "bridge.h" + +void arm_fstp(x64emu_t* emu, void* p) +{ + if(ST0.q!=STld(0).uref) + D2LD(&ST0.d, p); + else + memcpy(p, &STld(0).ld, 10); +} + +void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) +{ + (void)emu; + dynarec_log(LOG_DEBUG, "R%lu=0x%lx (%lu)\n", n, reg, reg); +} + +void arm_f2xm1(x64emu_t* emu) +{ + ST0.d = exp2(ST0.d) - 1.0; +} +void arm_fyl2x(x64emu_t* emu) +{ + ST(1).d = log2(ST0.d)*ST(1).d; +} +void arm_ftan(x64emu_t* emu) +{ + ST0.d = tan(ST0.d); + emu->sw.f.F87_C2 = 0; +} +void arm_fpatan(x64emu_t* emu) +{ + ST1.d = atan2(ST1.d, ST0.d); +} +void arm_fxtract(x64emu_t* emu) +{ + int32_t tmp32s = (ST1.q&0x7ff0000000000000LL)>>52; + tmp32s -= 1023; + ST1.d /= exp2(tmp32s); + ST0.d = tmp32s; +} +void arm_fprem(x64emu_t* emu) +{ + int32_t tmp32s = ST0.d / ST1.d; + ST0.d -= ST1.d * tmp32s; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = (tmp32s&1); + emu->sw.f.F87_C3 = ((tmp32s>>1)&1); + emu->sw.f.F87_C1 = ((tmp32s>>2)&1); +} +void arm_fyl2xp1(x64emu_t* emu) +{ + ST(1).d = log2(ST0.d + 1.0)*ST(1).d; +} +void arm_fsincos(x64emu_t* emu) +{ + sincos(ST1.d, &ST1.d, &ST0.d); + emu->sw.f.F87_C2 = 0; +} +void arm_frndint(x64emu_t* emu) +{ + ST0.d = fpu_round(emu, ST0.d); +} +void arm_fscale(x64emu_t* emu) +{ + if(ST0.d!=0.0) + ST0.d *= exp2(trunc(ST1.d)); +} +void arm_fsin(x64emu_t* emu) +{ + ST0.d = sin(ST0.d); + emu->sw.f.F87_C2 = 0; +} +void arm_fcos(x64emu_t* emu) +{ + ST0.d = cos(ST0.d); + emu->sw.f.F87_C2 = 0; +} + +void arm_fbld(x64emu_t* emu, uint8_t* ed) +{ + fpu_fbld(emu, ed); +} + +void arm_fild64(x64emu_t* emu, int64_t* ed) +{ + int64_t tmp; + memcpy(&tmp, ed, sizeof(tmp)); + ST0.d = tmp; + STll(0).sq = tmp; + STll(0).sref = ST0.sq; +} + +void arm_fbstp(x64emu_t* emu, uint8_t* ed) +{ + fpu_fbst(emu, ed); +} + +void arm_fistp64(x64emu_t* emu, int64_t* ed) +{ + // used of memcpy to avoid aligments issues + if(STll(0).sref==ST(0).sq) { + memcpy(ed, &STll(0).sq, sizeof(int64_t)); + } else { + int64_t tmp; + if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d)) + tmp = 0x8000000000000000LL; + else + tmp = fpu_round(emu, ST0.d); + memcpy(ed, &tmp, sizeof(tmp)); + } +} + +void arm_fistt64(x64emu_t* emu, int64_t* ed) +{ + // used of memcpy to avoid aligments issues + int64_t tmp = ST0.d; + memcpy(ed, &tmp, sizeof(tmp)); +} + +void arm_fld(x64emu_t* emu, uint8_t* ed) +{ + memcpy(&STld(0).ld, ed, 10); + LD2D(&STld(0), &ST(0).d); + STld(0).uref = ST0.q; +} + +void arm_ud(x64emu_t* emu) +{ + emit_signal(emu, SIGILL, (void*)R_RIP, 0); +} + +void arm_fsave(x64emu_t* emu, uint8_t* ed) +{ + fpu_savenv(emu, (char*)ed, 0); + + uint8_t* p = ed; + p += 28; + for (int i=0; i<8; ++i) { + LD2D(p, &ST(i).d); + p+=10; + } +} +void arm_frstor(x64emu_t* emu, uint8_t* ed) +{ + fpu_loadenv(emu, (char*)ed, 0); + + uint8_t* p = ed; + p += 28; + for (int i=0; i<8; ++i) { + D2LD(&ST(i).d, p); + p+=10; + } + +} + +void arm_fprem1(x64emu_t* emu) +{ + // simplified version + int32_t tmp32s = round(ST0.d / ST1.d); + ST0.d -= ST1.d*tmp32s; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = (tmp32s&1); + emu->sw.f.F87_C3 = ((tmp32s>>1)&1); + emu->sw.f.F87_C1 = ((tmp32s>>2)&1); +} + +static uint8_t ff_mult(uint8_t a, uint8_t b) +{ + int retval = 0; + + for(int i = 0; i < 8; i++) { + if((b & 1) == 1) + retval ^= a; + + if((a & 0x80)) { + a <<= 1; + a ^= 0x1b; + } else { + a <<= 1; + } + + b >>= 1; + } + + return retval; +} + +void arm_aesimc(x64emu_t* emu, int xmm) +{ + sse_regs_t eax1 = emu->xmm[xmm]; + + for(int j=0; j<4; ++j) { + emu->xmm[xmm].ub[0+j*4] = ff_mult(0x0E, eax1.ub[0+j*4]) ^ ff_mult(0x0B, eax1.ub[1+j*4]) ^ ff_mult(0x0D, eax1.ub[2+j*4]) ^ ff_mult(0x09, eax1.ub[3+j*4]); + emu->xmm[xmm].ub[1+j*4] = ff_mult(0x09, eax1.ub[0+j*4]) ^ ff_mult(0x0E, eax1.ub[1+j*4]) ^ ff_mult(0x0B, eax1.ub[2+j*4]) ^ ff_mult(0x0D, eax1.ub[3+j*4]); + emu->xmm[xmm].ub[2+j*4] = ff_mult(0x0D, eax1.ub[0+j*4]) ^ ff_mult(0x09, eax1.ub[1+j*4]) ^ ff_mult(0x0E, eax1.ub[2+j*4]) ^ ff_mult(0x0B, eax1.ub[3+j*4]); + emu->xmm[xmm].ub[3+j*4] = ff_mult(0x0B, eax1.ub[0+j*4]) ^ ff_mult(0x0D, eax1.ub[1+j*4]) ^ ff_mult(0x09, eax1.ub[2+j*4]) ^ ff_mult(0x0E, eax1.ub[3+j*4]); + } +} +void arm_aesmc(x64emu_t* emu, int xmm) +{ + sse_regs_t eax1 = emu->xmm[xmm]; + + for(int j=0; j<4; ++j) { + emu->xmm[xmm].ub[0+j*4] = ff_mult(0x02, eax1.ub[0+j*4]) ^ ff_mult(0x03, eax1.ub[1+j*4]) ^ eax1.ub[2+j*4] ^ eax1.ub[3+j*4] ; + emu->xmm[xmm].ub[1+j*4] = eax1.ub[0+j*4] ^ ff_mult(0x02, eax1.ub[1+j*4]) ^ ff_mult(0x03, eax1.ub[2+j*4]) ^ eax1.ub[3+j*4] ; + emu->xmm[xmm].ub[2+j*4] = eax1.ub[0+j*4] ^ eax1.ub[1+j*4] ^ ff_mult(0x02, eax1.ub[2+j*4]) ^ ff_mult(0x03, eax1.ub[3+j*4]); + emu->xmm[xmm].ub[3+j*4] = ff_mult(0x03, eax1.ub[0+j*4]) ^ eax1.ub[1+j*4] ^ eax1.ub[2+j*4] ^ ff_mult(0x02, eax1.ub[3+j*4]); + } +} +void arm_aesdlast(x64emu_t* emu, int xmm) +{ + // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf + // A N K H E B O L I F C P M J G D + const uint8_t invshiftrows[] = {0,13,10, 7, 4, 1,14,11, 8, 5, 2,15,12, 9, 6, 3}; + const uint8_t invsubbytes[256] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, + }; + + sse_regs_t eax1; + for(int i=0; i<16; ++i) + eax1.ub[i] = emu->xmm[xmm].ub[invshiftrows[i]]; + //STATE ← InvSubBytes( STATE ); + for(int i=0; i<16; ++i) + emu->xmm[xmm].ub[i] = invsubbytes[eax1.ub[i]]; + +} +void arm_aeselast(x64emu_t* emu, int xmm) +{ + // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf + // A F K P E J O D I N C H M B G L + const uint8_t shiftrows[] = {0, 5,10,15, 4, 9,14, 3, 8,13, 2, 7,12, 1, 6,11}; + const uint8_t subbytes[256] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, + }; + + sse_regs_t eax1; + for(int i=0; i<16; ++i) + eax1.ub[i] = emu->xmm[xmm].ub[shiftrows[i]]; + //STATE ← SubBytes( STATE ); + for(int i=0; i<16; ++i) + emu->xmm[xmm].ub[i] = subbytes[eax1.ub[i]]; +} +void arm_aesd(x64emu_t* emu, int xmm) +{ + arm_aesdlast(emu, xmm); + arm_aesimc(emu, xmm); +} +void arm_aese(x64emu_t* emu, int xmm) +{ + arm_aeselast(emu, xmm); + arm_aesmc(emu, xmm); +} + + +#define XMM0 0 +#define XMM8 16 +#define X870 8 +#define EMM0 8 +#define SCRATCH0 24 + +// Get a FPU scratch reg +int fpu_get_scratch(dynarec_arm_t* dyn) +{ + return SCRATCH0 + dyn->fpu_scratch++; // return an Sx +} +// Reset scratch regs counter +void fpu_reset_scratch(dynarec_arm_t* dyn) +{ + dyn->fpu_scratch = 0; +} +// Get a x87 double reg +int fpu_get_reg_x87(dynarec_arm_t* dyn) +{ + int i=X870; + while (dyn->fpuused[i]) ++i; + dyn->fpuused[i] = 1; + return i; // return a Dx +} +// Free a FPU double reg +void fpu_free_reg(dynarec_arm_t* dyn, int reg) +{ + // TODO: check upper limit? + dyn->fpuused[reg] = 0; +} +// Get an MMX double reg +int fpu_get_reg_emm(dynarec_arm_t* dyn, int emm) +{ + dyn->fpuused[EMM0 + emm] = 1; + return EMM0 + emm; +} +// Get an XMM quad reg +int fpu_get_reg_xmm(dynarec_arm_t* dyn, int xmm) +{ + if(xmm>7) { + dyn->fpuused[XMM8 + xmm - 8] = 1; + return XMM8 + xmm - 8; + } else { + dyn->fpuused[XMM0 + xmm] = 1; + return XMM0 + xmm; + } +} +// Reset fpu regs counter +void fpu_reset_reg(dynarec_arm_t* dyn) +{ + dyn->fpu_reg = 0; + for (int i=0; i<32; ++i) + dyn->fpuused[i]=0; +} + +#define F8 *(uint8_t*)(addr++) +#define F32 *(uint32_t*)(addr+=4, addr-4) +#define F32S64 (uint64_t)(int64_t)*(int32_t*)(addr+=4, addr-4) +// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD +int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta) +{ + (void)dyn; (void)ninst; + + uint32_t tested = (1<>3)&7; + if((sib&0x7)==5) { + uint64_t tmp = F32S64; + if (sib_reg!=4) { + // if XXXXXX+reg<>6)>=parity)?1:0; + } else { + // just a constant... + return (tmp&tested)?0:1; + } + } else { + if(sib_reg==4 && parity<3) + return 0; // simple [reg] + // don't try [reg1 + reg2<>6)>=parity)?1:0; + } + } else if((nextop&7)==5) { + uint64_t tmp = F32S64; + tmp+=addr+delta; + return (tmp&tested)?0:1; + } else { + return 0; + } + } else { + return 0; //Form [reg1 + reg2<CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) { + // found ! + if(retn) *retn = (b->C3==0xC2)?b->N:0; + if(calladdress) *calladdress = addr+1; + return 1; + } + return 0; +#undef PK32 +#undef PK +} diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h new file mode 100755 index 00000000..d4c861c9 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -0,0 +1,64 @@ +#ifndef __DYNAREC_ARM_FUNCTIONS_H__ +#define __DYNAREC_ARM_FUNCTIONS_H__ + +typedef struct x64emu_s x64emu_t; + +void arm_fstp(x64emu_t* emu, void* p); + +void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n); + +void arm_f2xm1(x64emu_t* emu); +void arm_fyl2x(x64emu_t* emu); +void arm_ftan(x64emu_t* emu); +void arm_fpatan(x64emu_t* emu); +void arm_fxtract(x64emu_t* emu); +void arm_fprem(x64emu_t* emu); +void arm_fyl2xp1(x64emu_t* emu); +void arm_fsincos(x64emu_t* emu); +void arm_frndint(x64emu_t* emu); +void arm_fscale(x64emu_t* emu); +void arm_fsin(x64emu_t* emu); +void arm_fcos(x64emu_t* emu); +void arm_fbld(x64emu_t* emu, uint8_t* ed); +void arm_fild64(x64emu_t* emu, int64_t* ed); +void arm_fbstp(x64emu_t* emu, uint8_t* ed); +void arm_fistp64(x64emu_t* emu, int64_t* ed); +void arm_fistt64(x64emu_t* emu, int64_t* ed); +void arm_fld(x64emu_t* emu, uint8_t* ed); +void arm_fsave(x64emu_t* emu, uint8_t* ed); +void arm_frstor(x64emu_t* emu, uint8_t* ed); +void arm_fprem1(x64emu_t* emu); + +void arm_aesd(x64emu_t* emu, int xmm); +void arm_aese(x64emu_t* emu, int xmm); +void arm_aesdlast(x64emu_t* emu, int xmm); +void arm_aeselast(x64emu_t* emu, int xmm); +void arm_aesimc(x64emu_t* emu, int xmm); + + +void arm_ud(x64emu_t* emu); + +// Get an FPU scratch reg +int fpu_get_scratch(dynarec_arm_t* dyn); +// Reset scratch regs counter +void fpu_reset_scratch(dynarec_arm_t* dyn); +// Get an x87 double reg +int fpu_get_reg_x87(dynarec_arm_t* dyn); +// Get an MMX double reg +int fpu_get_reg_emm(dynarec_arm_t* dyn, int emm); +// Get an XMM quad reg +int fpu_get_reg_xmm(dynarec_arm_t* dyn, int xmm); +// Free a FPU/MMX/XMM reg +void fpu_free_reg(dynarec_arm_t* dyn, int reg); +// Reset fpu regs counter +void fpu_reset_reg(dynarec_arm_t* dyn); + +// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD +int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta); +// Do the GETED, but don't emit anything... +uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); + +// Is what pointed at addr a native call? And if yes, to what function? +int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); + +#endif //__DYNAREC_ARM_FUNCTIONS_H__ \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c new file mode 100755 index 00000000..fc28663c --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -0,0 +1,1280 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "../dynablock_private.h" +#include "../tools/bridge_private.h" +#include "custommem.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ +uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta) +{ + MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta); + + uint8_t ret = x2; + uint8_t scratch = x2; + *fixaddress = 0; + if(hint>0) ret = hint; + if(hint>0 && hint>3)&7)+(rex.x<<3); + if((sib&0x7)==5) { + int64_t tmp = F32S; + if (sib_reg!=4) { + if(tmp && ((tmpabsmax) || (tmp&mask))) { + MOV64x(scratch, tmp); + ADDx_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); + } else { + LSLx(ret, xRAX+sib_reg, (sib>>6)); + *fixaddress = tmp; + } + } else { + MOV64x(ret, tmp); + } + } else { + if (sib_reg!=4) { + ADDx_REG_LSL(ret, xRAX+(sib&0x7)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); + } else { + ret = xRAX+(sib&0x7)+(rex.b<<3); + } + } + } else if((nextop&7)==5) { + uint64_t tmp = F32S64; + if((tmp>=absmin) && (tmp<=absmax) && !(tmp&mask)) { + GETIP(addr+delta); + ret = xRIP; + *fixaddress = tmp; + } else if(tmp<0x1000) { + GETIP(addr+delta); + ADDx_U12(ret, xRIP, tmp); + } else if(tmp+addr+delta<0x1000000000000LL) { // 3 opcodes to load immediate is cheap enough + tmp += addr+delta; + MOV64x(ret, tmp); + } else { + MOV64x(ret, tmp); + GETIP(addr+delta); + ADDx_REG(ret, ret, xRIP); + } + } else { + ret = xRAX+(nextop&7)+(rex.b<<3); + } + } else { + int64_t i64; + uint8_t sib = 0; + int sib_reg = 0; + if((nextop&7)==4) { + sib = F8; + sib_reg = ((sib>>3)&7)+(rex.x<<3); + } + if(nextop&0x80) + i64 = F32S; + else + i64 = F8S; + if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask))) { + *fixaddress = i64; + if((nextop&7)==4) { + if (sib_reg!=4) { + ADDx_REG_LSL(ret, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); + } else { + ret = xRAX+(sib&0x07)+(rex.b<<3); + } + } else + ret = xRAX+(nextop&0x07)+(rex.b<<3); + } else { + int64_t sub = (i64<0)?1:0; + if(sub) i64 = -i64; + if(i64<0x1000) { + if((nextop&7)==4) { + if (sib_reg!=4) { + ADDx_REG_LSL(scratch, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); + } else { + scratch = xRAX+(sib&0x07)+(rex.b<<3); + } + } else + scratch = xRAX+(nextop&0x07)+(rex.b<<3); + if(sub) { + SUBx_U12(ret, scratch, i64); + } else { + ADDx_U12(ret, scratch, i64); + } + } else { + MOV64x(scratch, i64); + if((nextop&7)==4) { + if (sib_reg!=4) { + if(sub) { + SUBx_REG(scratch, xRAX+(sib&0x07)+(rex.b<<3), scratch); + } else { + ADDx_REG(scratch, scratch, xRAX+(sib&0x07)+(rex.b<<3)); + } + ADDx_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); + } else { + PASS3(int tmp = xRAX+(sib&0x07)+(rex.b<<3)); + if(sub) { + SUBx_REG(ret, tmp, scratch); + } else { + ADDx_REG(ret, tmp, scratch); + } + } + } else { + PASS3(int tmp = xRAX+(nextop&0x07)+(rex.b<<3)); + if(sub) { + SUBx_REG(ret, tmp, scratch); + } else { + ADDx_REG(ret, tmp, scratch); + } + } + } + } + } + *ed = ret; + return addr; +} + +/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ +uintptr_t geted32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta) +{ + MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta); + + uint8_t ret = x2; + uint8_t scratch = x2; + *fixaddress = 0; + if(hint>0) ret = hint; + if(hint>0 && hint>3)&7)+(rex.x<<3); + if((sib&0x7)==5) { + int64_t tmp = F32S; + if (sib_reg!=4) { + if(tmp && ((tmpabsmax) || (tmp&mask))) { + MOV64x(scratch, tmp); + ADDw_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); + } else { + LSLw(ret, xRAX+sib_reg, (sib>>6)); + *fixaddress = tmp; + } + } else { + MOV64x(ret, tmp); + } + } else { + if (sib_reg!=4) { + ADDw_REG_LSL(ret, xRAX+(sib&0x7)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); + } else { + ret = xRAX+(sib&0x7)+(rex.b<<3); + } + } + } else if((nextop&7)==5) { + uint32_t tmp = F32; + MOV32w(ret, tmp); + GETIP(addr+delta); + ADDw_REG(ret, ret, xRIP); + } else { + ret = xRAX+(nextop&7)+(rex.b<<3); + if(ret==hint) { + MOVw_REG(hint, ret); //to clear upper part + } + } + } else { + int64_t i64; + uint8_t sib = 0; + int sib_reg = 0; + if((nextop&7)==4) { + sib = F8; + sib_reg = ((sib>>3)&7)+(rex.x<<3); + } + if(nextop&0x80) + i64 = F32S; + else + i64 = F8S; + if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask))) { + *fixaddress = i64; + if((nextop&7)==4) { + if (sib_reg!=4) { + ADDw_REG_LSL(ret, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); + } else { + ret = xRAX+(sib&0x07)+(rex.b<<3); + } + } else { + ret = xRAX+(nextop&0x07)+(rex.b<<3); + } + } else { + int64_t sub = (i64<0)?1:0; + if(sub) i64 = -i64; + if(i64<0x1000) { + if((nextop&7)==4) { + if (sib_reg!=4) { + ADDw_REG_LSL(scratch, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); + } else { + scratch = xRAX+(sib&0x07)+(rex.b<<3); + } + } else + scratch = xRAX+(nextop&0x07)+(rex.b<<3); + if(sub) { + SUBw_U12(ret, scratch, i64); + } else { + ADDw_U12(ret, scratch, i64); + } + } else { + MOV32w(scratch, i64); + if((nextop&7)==4) { + if (sib_reg!=4) { + if(sub) { + SUBw_REG(scratch, xRAX+(sib&0x07)+(rex.b<<3), scratch); + } else { + ADDw_REG(scratch, scratch, xRAX+(sib&0x07)+(rex.b<<3)); + } + ADDw_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); + } else { + PASS3(int tmp = xRAX+(sib&0x07)+(rex.b<<3)); + if(sub) { + SUBw_REG(ret, tmp, scratch); + } else { + ADDw_REG(ret, tmp, scratch); + } + } + } else { + PASS3(int tmp = xRAX+(nextop&0x07)+(rex.b<<3)); + if(sub) { + SUBw_REG(ret, tmp, scratch); + } else { + ADDw_REG(ret, tmp, scratch); + } + } + } + } + } + *ed = ret; + return addr; +} + +/* setup r2 to address pointed by ED, r3 as scratch also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ +uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, int s) +{ + MAYUSE(dyn); MAYUSE(ninst); + + uint8_t ret = x2; + uint8_t scratch = x3; + *fixaddress = 0; + if(hint>0) ret = hint; + if(scratch==ret) scratch = x2; + MAYUSE(scratch); + uint32_t m = nextop&0xC7; + uint32_t n = (m>>6)&3; + int64_t offset = 0; + int absmin = 0; + if(s) absmin = -absmax; + if(!n && m==6) { + offset = F16; + MOVZw(ret, offset); + } else { + switch(n) { + case 0: offset = 0; break; + case 1: offset = F8S; break; + case 2: offset = F16S; break; + } + if(offset && (offset>absmax || offset-0x1000) { + SUBx_U12(ret, ret, -offset); + } else if(offset>0 && offset<0x1000) { + ADDx_U12(ret, ret, offset); + } else { + MOV64x(scratch, offset); + ADDx_REG(ret, ret, scratch); + } + } + } + + *ed = ret; + return addr; +} + +void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) +{ + MAYUSE(dyn); MAYUSE(ip); MAYUSE(ninst); + MESSAGE(LOG_DUMP, "Jump to epilog\n"); + + if(reg) { + if(reg!=xRIP) { + MOVx_REG(xRIP, reg); + } + } else { + GETIP_(ip); + } + TABLE64(x2, (uintptr_t)arm64_epilog); + BR(x2); +} + +void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) +{ + MAYUSE(dyn); MAYUSE(ninst); + MESSAGE(LOG_DUMP, "Jump to next\n"); + + if(reg) { + if(reg!=xRIP) { + MOVx_REG(xRIP, reg); + } + uintptr_t tbl = getJumpTable64(); + MAYUSE(tbl); + TABLE64(x2, tbl); + UBFXx(x3, xRIP, 48, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x3, xRIP, 32, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x3, xRIP, 16, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); + LDRx_REG_LSL3(x3, x2, x3); + } else { + uintptr_t p = getJumpTableAddress64(ip); + MAYUSE(p); + TABLE64(x2, p); + GETIP_(ip); + LDRx_U12(x3, x2, 0); + } + if(reg!=x1) { + MOVx_REG(x1, xRIP); + } + #ifdef HAVE_TRACE + //MOVx(x2, 15); no access to PC reg + #endif + BLR(x3); // save LR... +} + +void ret_to_epilog(dynarec_arm_t* dyn, int ninst) +{ + MAYUSE(dyn); MAYUSE(ninst); + MESSAGE(LOG_DUMP, "Ret to epilog\n"); + POP1(xRIP); + uintptr_t tbl = getJumpTable64(); + MOV64x(x2, tbl); + UBFXx(x3, xRIP, 48, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x3, xRIP, 32, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x3, xRIP, 16, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + MOVx_REG(x1, xRIP); + BLR(x2); // save LR +} + +void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n) +{ + MAYUSE(dyn); MAYUSE(ninst); + MESSAGE(LOG_DUMP, "Retn to epilog\n"); + POP1(xRIP); + if(n>0xfff) { + MOV32w(w1, n); + ADDx_REG(xRSP, xRSP, x1); + } else { + ADDx_U12(xRSP, xRSP, n); + } + uintptr_t tbl = getJumpTable64(); + MOV64x(x2, tbl); + UBFXx(x3, xRIP, 48, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x3, xRIP, 32, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x3, xRIP, 16, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x2, x3); + MOVx_REG(x1, xRIP); + BLR(x2); // save LR +} + +void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits) +{ + #warning TODO: is64bits + MAYUSE(ninst); + MESSAGE(LOG_DUMP, "IRet to epilog\n"); + // POP IP + POP1(xRIP); + // POP CS + POP1(x2); + STRH_U12(x2, xEmu, offsetof(x64emu_t, segs[_CS])); + MOVZw(x1, 0); + STRx_U12(x1, xEmu, offsetof(x64emu_t, segs_serial[_CS])); + STRx_U12(x1, xEmu, offsetof(x64emu_t, segs_serial[_SS])); + // POP EFLAGS + POP1(xFlags); + MOV32w(x1, 0x3F7FD7); + ANDx_REG(xFlags, xFlags, x1); + ORRx_mask(xFlags, xFlags, 1, 0b111111, 0); + SET_DFNONE(x1); + // POP RSP + POP1(x3); + // POP SS + POP1(x2); + STRH_U12(x2, xEmu, offsetof(x64emu_t, segs[_SS])); + // set new RSP + MOVx_REG(xRSP, x3); + // Ret.... + MOV64x(x2, (uintptr_t)arm64_epilog); // epilog on purpose, CS might have changed! + BR(x2); +} + +void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg) +{ + MAYUSE(fnc); + if(savereg==0) + savereg = 7; + if(saveflags) { + STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); + } + fpu_pushcache(dyn, ninst, reg, 0); + if(ret!=-2) { + STPx_S7_preindex(xEmu, savereg, xSP, -16); // ARM64 stack needs to be 16byte aligned + STPx_S7_offset(xRAX, xRCX, xEmu, offsetof(x64emu_t, regs[_AX])); // x9..x15, x16,x17,x18 those needs to be saved by caller + STPx_S7_offset(xRDX, xRBX, xEmu, offsetof(x64emu_t, regs[_DX])); // but x18 is R8 wich is lost, so it's fine to not save it? + STPx_S7_offset(xRSP, xRBP, xEmu, offsetof(x64emu_t, regs[_SP])); + STPx_S7_offset(xRSI, xRDI, xEmu, offsetof(x64emu_t, regs[_SI])); + STPx_S7_offset(xR8, xR9, xEmu, offsetof(x64emu_t, regs[_R8])); + } + TABLE64(reg, (uintptr_t)fnc); + BLR(reg); + if(ret>=0) { + MOVx_REG(ret, xEmu); + } + if(ret!=-2) { + LDPx_S7_postindex(xEmu, savereg, xSP, 16); + #define GO(A, B) if(ret==x##A) { \ + LDRx_U12(x##B, xEmu, offsetof(x64emu_t, regs[_##B])); \ + } else if(ret==x##B) { \ + LDRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])); \ + } else { \ + LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])); \ + } + GO(RAX, RCX); + GO(RDX, RBX); + GO(RSP, RBP); + GO(RSI, RDI); + GO(R8, R9); + #undef GO + } + fpu_popcache(dyn, ninst, reg, 0); + if(saveflags) { + LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); + } + SET_NODF(); +} + +void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w) +{ + MAYUSE(fnc); + STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); + fpu_pushcache(dyn, ninst, x3, 1); + // x9..x15, x16,x17,x18 those needs to be saved by caller + // RDI, RSI, RDX, RCX, R8, R9 are used for function call + STPx_S7_preindex(xEmu, xRBX, xSP, -16); // ARM64 stack needs to be 16byte aligned + STPx_S7_offset(xRSP, xRBP, xEmu, offsetof(x64emu_t, regs[_SP])); + // float and double args + if(abs(w)>1) { + MESSAGE(LOG_DUMP, "Getting %d XMM args\n", abs(w)-1); + for(int i=0; i0) { + MOVx_REG(xRAX, 0); + MOVx_REG(xRDX, x1); + } + // all done, restore all regs + LDPx_S7_postindex(xEmu, xRBX, xSP, 16); + #define GO(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) + GO(RSP, RBP); + #undef GO + + fpu_popcache(dyn, ninst, x3, 1); + LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); + SET_NODF(); +} + +void grab_segdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg, int segment) +{ + (void)addr; + int64_t j64; + MAYUSE(j64); + MESSAGE(LOG_DUMP, "Get %s Offset\n", (segment==_FS)?"FS":"GS"); + int t1 = x1, t2 = x4; + if(reg==t1) ++t1; + if(reg==t2) ++t2; + LDRw_U12(t2, xEmu, offsetof(x64emu_t, segs_serial[segment])); + LDRx_U12(reg, xEmu, offsetof(x64emu_t, segs_offs[segment])); + if(segment==_GS) { + CBNZw_MARKSEG(t2); // fast check + } else { + LDRx_U12(t1, xEmu, offsetof(x64emu_t, context)); + LDRw_U12(t1, t1, offsetof(box64context_t, sel_serial)); + SUBw_REG(t1, t1, t2); + CBZw_MARKSEG(t1); + } + MOVZw(x1, segment); + call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 1, 0); + MARKSEG; + MESSAGE(LOG_DUMP, "----%s Offset\n", (segment==_FS)?"FS":"GS"); +} + +// x87 stuffs +static void x87_reset(dynarec_arm_t* dyn, int ninst) +{ + (void)ninst; +#if STEP > 1 + for (int i=0; i<8; ++i) + dyn->x87cache[i] = -1; + dyn->x87stack = 0; +#else + (void)dyn; +#endif +} + +void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch) +{ +#if STEP > 1 + MAYUSE(scratch); + if(!dyn->x87stack) + return; + MESSAGE(LOG_DUMP, "\tSynch x87 Stackcount (%d)\n", dyn->x87stack); + int a = dyn->x87stack; + // Add x87stack to emu fpu_stack + LDRw_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack)); + if(a>0) { + ADDw_U12(scratch, scratch, a); + } else { + SUBw_U12(scratch, scratch, -a); + } + STRw_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack)); + // Sub x87stack to top, with and 7 + LDRw_U12(scratch, xEmu, offsetof(x64emu_t, top)); + if(a>0) { + SUBw_U12(scratch, scratch, a); + } else { + ADDw_U12(scratch, scratch, -a); + } + ANDw_mask(scratch, scratch, 0, 2); //mask=7 + STRw_U12(scratch, xEmu, offsetof(x64emu_t, top)); + // reset x87stack + dyn->x87stack = 0; + MESSAGE(LOG_DUMP, "\t------x87 Stackcount\n"); +#else + (void)dyn; (void)ninst; (void)scratch; +#endif +} + +int x87_do_push(dynarec_arm_t* dyn, int ninst) +{ + (void)ninst; +#if STEP > 1 + dyn->x87stack+=1; + // move all regs in cache, and find a free one + int ret = -1; + for(int i=0; i<8; ++i) + if(dyn->x87cache[i]!=-1) + ++dyn->x87cache[i]; + else if(ret==-1) { + dyn->x87cache[i] = 0; + ret=dyn->x87reg[i]=fpu_get_reg_x87(dyn); + } + return ret; +#else + (void)dyn; + return 0; +#endif +} +void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1) +{ +#if STEP > 1 + dyn->x87stack+=1; + // move all regs in cache + for(int i=0; i<8; ++i) + if(dyn->x87cache[i]!=-1) + ++dyn->x87cache[i]; + if(s1) + x87_stackcount(dyn, ninst, s1); +#else + (void)dyn; (void)ninst; (void)s1; +#endif +} +void x87_do_pop(dynarec_arm_t* dyn, int ninst) +{ + (void)ninst; +#if STEP > 1 + dyn->x87stack-=1; + // move all regs in cache, poping ST0 + for(int i=0; i<8; ++i) + if(dyn->x87cache[i]!=-1) { + --dyn->x87cache[i]; + if(dyn->x87cache[i]==-1) { + fpu_free_reg(dyn, dyn->x87reg[i]); + dyn->x87reg[i] = -1; + } + } +#else + (void)dyn; +#endif +} + +void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) +{ + (void)ninst; +#if STEP > 1 + MAYUSE(s1); MAYUSE(s2); MAYUSE(s3); + int ret = 0; + for (int i=0; i<8 && !ret; ++i) + if(dyn->x87cache[i] != -1) + ret = 1; + if(!ret && !dyn->x87stack) // nothing to do + return; + MESSAGE(LOG_DUMP, "\tPurge x87 Cache and Synch Stackcount (%+d)\n", dyn->x87stack); + int a = dyn->x87stack; + if(a!=0) { + // reset x87stack + dyn->x87stack = 0; + // Add x87stack to emu fpu_stack + LDRw_U12(s2, xEmu, offsetof(x64emu_t, fpu_stack)); + if(a>0) { + ADDw_U12(s2, s2, a); + } else { + SUBw_U12(s2, s2, -a); + } + STRw_U12(s2, xEmu, offsetof(x64emu_t, fpu_stack)); + // Sub x87stack to top, with and 7 + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + // update tags (and top at the same time) + if(a>0) { + // new tag to fulls + MOVZw(s3, 0); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, p_regs)); + for (int i=0; itop + st)&7 + STRw_REG_LSL2(s3, s1, s2); + } + } else { + // empty tags + MOVZw(s3, 0b11); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, p_regs)); + for (int i=0; i<-a; ++i) { + STRw_REG_LSL2(s3, s1, s2); + ADDw_U12(s2, s2, 1); + ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + st)&7 + } + } + STRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + } else { + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + } + if(ret!=0) { + // --- set values + // prepare offset to fpu => s1 + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); + // Get top + // loop all cache entries + for (int i=0; i<8; ++i) + if(dyn->x87cache[i]!=-1) { + ADDw_U12(s3, s2, dyn->x87cache[i]); + ANDw_mask(s3, s3, 0, 2); //mask=7 // (emu->top + st)&7 + VSTR64_REG_LSL3(dyn->x87reg[i], s1, s3); + fpu_free_reg(dyn, dyn->x87reg[i]); + dyn->x87reg[i] = -1; + dyn->x87cache[i] = -1; + } + } +#else + (void)dyn; (void)s1; (void)s2; (void)s3; +#endif +} + +#ifdef HAVE_TRACE +static void x87_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) +{ +#if STEP > 1 + MAYUSE(s2); MAYUSE(s3); + x87_stackcount(dyn, ninst, s1); + int ret = 0; + for (int i=0; (i<8) && (!ret); ++i) + if(dyn->x87cache[i] != -1) + ret = 1; + if(!ret) // nothing to do + return; + // prepare offset to fpu => s1 + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); + // Get top + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + // loop all cache entries + for (int i=0; i<8; ++i) + if(dyn->x87cache[i]!=-1) { + ADDw_U12(s3, s2, dyn->x87cache[i]); + ANDw_mask(s3, s3, 0, 2); // mask=7 // (emu->top + i)&7 + VSTR64_REG_LSL3(dyn->x87reg[i], s1, s3); + } +#else + (void)dyn; (void)ninst; (void)s1; (void)s2; (void)s3; +#endif +} +#endif + +int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) +{ + (void)ninst; +#if STEP > 1 + MAYUSE(s1); MAYUSE(s2); + // search in cache first + for (int i=0; i<8; ++i) + if(dyn->x87cache[i]==st) + return i; + MESSAGE(LOG_DUMP, "\tCreate x87 Cache for ST%d\n", st); + // get a free spot + int ret = -1; + for (int i=0; (i<8) && (ret==-1); ++i) + if(dyn->x87cache[i]==-1) + ret = i; + // found, setup and grab the value + dyn->x87cache[ret] = st; + dyn->x87reg[ret] = fpu_get_reg_x87(dyn); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + int a = st - dyn->x87stack; + if(a) { + if(a<0) { + SUBw_U12(s2, s2, -a); + } else { + ADDw_U12(s2, s2, a); + } + ANDw_mask(s2, s2, 0, 2); //mask=7 + } + VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2); + MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st); + + return ret; +#else + (void)dyn; (void)s1; (void)s2; (void)st; + return 0; +#endif +} + +int x87_get_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a) +{ +#if STEP > 1 + return dyn->x87reg[x87_get_cache(dyn, ninst, s1, s2, a)]; +#else + (void)dyn; (void)ninst; (void)s1; (void)s2; (void)a; + return 0; +#endif +} + + +void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) +{ +#if STEP > 1 + MAYUSE(s2); + x87_stackcount(dyn, ninst, s1); + int ret = -1; + for (int i=0; (i<8) && (ret==-1); ++i) + if(dyn->x87cache[i] == st) + ret = i; + if(ret==-1) // nothing to do + return; + MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st); + // prepare offset to fpu => s1 + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); + // Get top + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + // Update + if(st) { + ADDw_U12(s2, s2, st); + ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 + } + VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2); + MESSAGE(LOG_DUMP, "\t--------x87 Cache for ST%d\n", st); +#else + (void)dyn; (void)ninst; (void)s1; (void)s2; (void)st; +#endif +} + +void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) +{ +#if STEP > 1 + MAYUSE(s2); + x87_stackcount(dyn, ninst, s1); + int ret = -1; + for (int i=0; (i<8) && (ret==-1); ++i) + if(dyn->x87cache[i] == st) + ret = i; + if(ret==-1) // nothing to do + return; + MESSAGE(LOG_DUMP, "\tForget x87 Cache for ST%d\n", st); + // prepare offset to fpu => s1 + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); + // Get top + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + // Update + if(st) { + ADDw_U12(s2, s2, st); + ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 + } + VSTR64_REG_LSL3(dyn->x87reg[ret], s1, s2); + MESSAGE(LOG_DUMP, "\t--------x87 Cache for ST%d\n", st); + // and forget that cache + fpu_free_reg(dyn, dyn->x87reg[ret]); + dyn->x87cache[ret] = -1; + dyn->x87reg[ret] = -1; +#else + (void)dyn; (void)ninst; (void)s1; (void)s2; (void)st; +#endif +} + +void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) +{ + (void)ninst; +#if STEP > 1 + MAYUSE(s1); MAYUSE(s2); + // search in cache first + for (int i=0; i<8; ++i) + if(dyn->x87cache[i]==st) { + // refresh the value + MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + int a = st - dyn->x87stack; + if(a<0) { + SUBw_U12(s2, s2, -a); + } else { + ADDw_U12(s2, s2, a); + } + ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 + VLDR64_REG_LSL3(dyn->x87reg[i], s1, s2); + MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st); + // ok + return; + } + // Was not in the cache? creating it.... + MESSAGE(LOG_DUMP, "\tCreate x87 Cache for ST%d\n", st); + // get a free spot + int ret = -1; + for (int i=0; (i<8) && (ret==-1); ++i) + if(dyn->x87cache[i]==-1) + ret = i; + // found, setup and grab the value + dyn->x87cache[ret] = st; + dyn->x87reg[ret] = fpu_get_reg_x87(dyn); + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + int a = st - dyn->x87stack; + if(a<0) { + SUBw_U12(s2, s2, -a); + } else { + ADDw_U12(s2, s2, a); + } + ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 + VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2); + MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st); +#else + (void)dyn; (void)s1; (void)s2; (void)st; +#endif +} + +// Set rounding according to cw flags, return reg to restore flags +int x87_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) +{ + MAYUSE(dyn); MAYUSE(ninst); + MAYUSE(s1); MAYUSE(s2); + LDRw_U12(s1, xEmu, offsetof(x64emu_t, round)); + UBFXw(s2, s1, 1, 1); // bit 1 of round in bit 0 (zero extented) of s2 + BFIw(s2, s1, 1, 1); // bit 0 of round in bit 1 of s2 + MRS_fpcr(s1); // get fpscr + MOVx_REG(s3, s1); + BFIx(s1, s2, 22, 2); // inject new round + MSR_fpcr(s1); // put new fpscr + return s3; +} + +// Set rounding according to mxcsr flags, return reg to restore flags +int sse_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) +{ + MAYUSE(dyn); MAYUSE(ninst); + MAYUSE(s1); MAYUSE(s2); + LDRH_U12(s1, xEmu, offsetof(x64emu_t, mxcsr)); + RBITw(s2, s1); // round is on bits 13-14 on x86, + LSRw(s2, s2, 17); // but we want the reverse of that + MRS_fpcr(s1); // get fpscr + MOVx_REG(s3, s1); + BFIx(s1, s2, 22, 2); // inject new round + MSR_fpcr(s1); // put new fpscr + return s3; +} + +// Restore round flag +void x87_restoreround(dynarec_arm_t* dyn, int ninst, int s1) +{ + MAYUSE(dyn); MAYUSE(ninst); + MAYUSE(s1); + MSR_fpcr(s1); // put back fpscr +} + +// MMX helpers +static void mmx_reset(dynarec_arm_t* dyn, int ninst) +{ + (void)ninst; +#if STEP > 1 + MAYUSE(dyn); + for (int i=0; i<8; ++i) + dyn->mmxcache[i] = -1; +#else + (void)dyn; +#endif +} +// get neon register for a MMX reg, create the entry if needed +int mmx_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a) +{ + (void)ninst; (void)s1; +#if STEP > 1 + if(dyn->mmxcache[a]!=-1) + return dyn->mmxcache[a]; + int ret = dyn->mmxcache[a] = fpu_get_reg_emm(dyn, a); + VLDR64_U12(ret, xEmu, offsetof(x64emu_t, mmx[a])); + return ret; +#else + (void)dyn; (void)a; + return 0; +#endif +} +// get neon register for a MMX reg, but don't try to synch it if it needed to be created +int mmx_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a) +{ + (void)ninst; (void)s1; +#if STEP > 1 + if(dyn->mmxcache[a]!=-1) + return dyn->mmxcache[a]; + int ret = dyn->mmxcache[a] = fpu_get_reg_emm(dyn, a); + return ret; +#else + (void)dyn; (void)a; + return 0; +#endif +} +// purge the MMX cache only(needs 3 scratch registers) +void mmx_purgecache(dynarec_arm_t* dyn, int ninst, int s1) +{ + (void)ninst; (void)s1; +#if STEP > 1 + int old = -1; + for (int i=0; i<8; ++i) + if(dyn->mmxcache[i]!=-1) { + if (old==-1) { + MESSAGE(LOG_DUMP, "\tPurge MMX Cache ------\n"); + ++old; + } + VSTR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i])); + fpu_free_reg(dyn, dyn->mmxcache[i]); + dyn->mmxcache[i] = -1; + } + if(old!=-1) { + MESSAGE(LOG_DUMP, "\t------ Purge MMX Cache\n"); + } +#else + (void)dyn; +#endif +} +#ifdef HAVE_TRACE +static void mmx_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) +{ + (void) ninst; (void)s1; +#if STEP > 1 + for (int i=0; i<8; ++i) + if(dyn->mmxcache[i]!=-1) { + VLDR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i])); + } +#else + (void)dyn; +#endif +} +#endif + + +// SSE / SSE2 helpers +static void sse_reset(dynarec_arm_t* dyn, int ninst) +{ + (void)ninst; +#if STEP > 1 + for (int i=0; i<16; ++i) + dyn->ssecache[i] = -1; +#else + (void)dyn; +#endif +} +// get neon register for a SSE reg, create the entry if needed +int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a) +{ + (void) ninst; (void)s1; +#if STEP > 1 + if(dyn->ssecache[a]!=-1) + return dyn->ssecache[a]; + int ret = dyn->ssecache[a] = fpu_get_reg_xmm(dyn, a); + VLDR128_U12(ret, xEmu, offsetof(x64emu_t, xmm[a])); + return ret; +#else + (void)dyn; (void)a; + return 0; +#endif +} +// get neon register for a SSE reg, but don't try to synch it if it needed to be created +int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a) +{ + (void) ninst; (void)s1; +#if STEP > 1 + if(dyn->ssecache[a]!=-1) + return dyn->ssecache[a]; + int ret = dyn->ssecache[a] = fpu_get_reg_xmm(dyn, a); + return ret; +#else + (void)dyn; (void)a; + return 0; +#endif +} +// forget neon register for a SSE reg, create the entry if needed +void sse_forget_reg(dynarec_arm_t* dyn, int ninst, int a) +{ + (void) ninst; +#if STEP > 1 + if(dyn->ssecache[a]==-1) + return; + VSTR128_U12(dyn->ssecache[a], xEmu, offsetof(x64emu_t, xmm[a])); + fpu_free_reg(dyn, dyn->ssecache[a]); + dyn->ssecache[a] = -1; +#else + (void)dyn; (void)a; +#endif + return; +} +// purge the SSE cache for XMM0..XMM7 (to use before function native call) +void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1) +{ + (void) ninst; (void)s1; +#if STEP > 1 + int old = -1; + for (int i=0; i<8; ++i) + if(dyn->ssecache[i]!=-1) { + if (old==-1) { + MESSAGE(LOG_DUMP, "\tPurge XMM0..7 Cache ------\n"); + ++old; + } + VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); + fpu_free_reg(dyn, dyn->ssecache[i]); + dyn->ssecache[i] = -1; + } + if(old!=-1) { + MESSAGE(LOG_DUMP, "\t------ Purge XMM0..7 Cache\n"); + } +#else + (void)dyn; +#endif +} + +// purge the SSE cache only +static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1) +{ + (void) ninst; (void)s1; +#if STEP > 1 + int old = -1; + for (int i=0; i<16; ++i) + if(dyn->ssecache[i]!=-1) { + if (old==-1) { + MESSAGE(LOG_DUMP, "\tPurge SSE Cache ------\n"); + ++old; + } + VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); + fpu_free_reg(dyn, dyn->ssecache[i]); + dyn->ssecache[i] = -1; + } + if(old!=-1) { + MESSAGE(LOG_DUMP, "\t------ Purge SSE Cache\n"); + } +#else + (void)dyn; +#endif +} +#ifdef HAVE_TRACE +static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) +{ + (void) ninst; (void)s1; +#if STEP > 1 + for (int i=0; i<16; ++i) + if(dyn->ssecache[i]!=-1) { + VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); + } +#else + (void)dyn; +#endif +} +#endif + +void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not07) +{ + (void) ninst; (void)s1; +#if STEP > 1 + int start = not07?8:0; + // only SSE regs needs to be push back to xEmu + int n=0; + for (int i=start; i<16; i++) + if(dyn->ssecache[i]!=-1) + ++n; + if(!n) + return; + MESSAGE(LOG_DUMP, "\tPush XMM Cache (%d)------\n", n); + for (int i=start; i<16; ++i) + if(dyn->ssecache[i]!=-1) { + VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); + } + MESSAGE(LOG_DUMP, "\t------- Push XMM Cache (%d)\n", n); +#else + (void)dyn; +#endif +} + +void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not07) +{ + (void) ninst; (void)s1; +#if STEP > 1 + int start = not07?8:0; + // only SSE regs needs to be pop back from xEmu + int n=0; + for (int i=start; i<16; i++) + if(dyn->ssecache[i]!=-1) + ++n; + if(!n) + return; + MESSAGE(LOG_DUMP, "\tPop XMM Cache (%d)------\n", n); + for (int i=start; i<16; ++i) + if(dyn->ssecache[i]!=-1) { + VLDR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); + } + MESSAGE(LOG_DUMP, "\t------- Pop XMM Cache (%d)\n", n); +#else + (void)dyn; +#endif +} + +void fpu_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) +{ + x87_purgecache(dyn, ninst, s1, s2, s3); + mmx_purgecache(dyn, ninst, s1); + sse_purgecache(dyn, ninst, s1); + fpu_reset_reg(dyn); +} + +#ifdef HAVE_TRACE +void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) +{ + x87_reflectcache(dyn, ninst, s1, s2, s3); + if(trace_emm) + mmx_reflectcache(dyn, ninst, s1); + if(trace_xmm) + sse_reflectcache(dyn, ninst, s1); +} +#endif + +void fpu_reset(dynarec_arm_t* dyn, int ninst) +{ + x87_reset(dyn, ninst); + mmx_reset(dyn, ninst); + sse_reset(dyn, ninst); + fpu_reset_reg(dyn); +} + +void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) +{ + MAYUSE(dyn); MAYUSE(ninst); + MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); + // PF: (((emu->x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0) + ANDw_mask(s3, s1, 0b011011, 0b000010); // mask=0xE0 + LSRw(s3, s3, 5); + MOV64x(s4, (uintptr_t)GetParityTab()); + LDRw_REG_LSL2(s4, s4, s3); + ANDw_mask(s3, s1, 0, 0b000100); //0x1f + LSRw_REG(s4, s4, s3); + MVNw_REG(s4, s4); + BFIw(xFlags, s4, F_PF, 1); +} diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h new file mode 100755 index 00000000..e1bbd5b9 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -0,0 +1,1087 @@ +#ifndef __DYNAREC_ARM64_HELPER_H__ +#define __DYNAREC_ARM64_HELPER_H__ + +// undef to get Close to SSE Float->int conversions +//#define PRECISE_CVT + +#if STEP == 0 +#include "dynarec_arm64_pass0.h" +#elif STEP == 1 +#include "dynarec_arm64_pass1.h" +#elif STEP == 2 +#include "dynarec_arm64_pass2.h" +#elif STEP == 3 +#include "dynarec_arm64_pass3.h" +#endif + +#include "debug.h" +#include "arm64_emitter.h" +#include "../emu/x64primop.h" + +#define F8 *(uint8_t*)(addr++) +#define F8S *(int8_t*)(addr++) +#define F16 *(uint16_t*)(addr+=2, addr-2) +#define F16S *(int16_t*)(addr+=2, addr-2) +#define F32 *(uint32_t*)(addr+=4, addr-4) +#define F32S *(int32_t*)(addr+=4, addr-4) +#define F32S64 (uint64_t)(int64_t)F32S +#define F64 *(uint64_t*)(addr+=8, addr-8) +#define PK(a) *(uint8_t*)(addr+a) +#define PK16(a) *(uint16_t*)(addr+a) +#define PK32(a) *(uint32_t*)(addr+a) +#define PK64(a) *(uint64_t*)(addr+a) +#define PKip(a) *(uint8_t*)(ip+a) + +// GETGD get x64 register in gd +#define GETGD gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3) +//GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +#define GETED(D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ + LDRxw_U12(x1, wback, fixedaddress); \ + ed = x1; \ + } +#define GETEDx(D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \ + LDRx_U12(x1, wback, fixedaddress); \ + ed = x1; \ + } +#define GETEDw(D) if((nextop&0xC0)==0xC0) { \ + ed = xEAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ + LDRw_U12(x1, wback, fixedaddress); \ + ed = x1; \ + } +#define GETSEDw(D) if((nextop&0xC0)==0xC0) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + SXTWx(x1, ed); \ + wb = x1; \ + wback = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ + LDRSW_U12(x1, wback, fixedaddress); \ + wb = ed = x1; \ + } +#define GETED32(D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + addr = geted32(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ + LDRxw_U12(x1, wback, fixedaddress); \ + ed = x1; \ + } +#define GETSED32w(D) if((nextop&0xC0)==0xC0) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + SXTWx(x1, ed); \ + wb = x1; \ + wback = 0; \ + } else { \ + addr = geted32(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ + LDRSW_U12(x1, wback, fixedaddress); \ + wb = ed = x1; \ + } +//GETEDH can use hint for ed, and r1 or r2 for wback (depending on hint). wback is 0 if ed is xEAX..xEDI +#define GETEDH(hint, D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ + LDRxw_U12(hint, wback, fixedaddress); \ + ed = hint; \ + } +#define GETED32H(hint, D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + addr = geted32(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ + LDRxw_U12(hint, wback, fixedaddress); \ + ed = hint; \ + } +//GETEDW can use hint for wback and ret for ed. wback is 0 if ed is xEAX..xEDI +#define GETEDW(hint, ret, D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + MOVxw_REG(ret, ed); \ + wback = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ + ed = ret; \ + LDRxw_U12(ed, wback, fixedaddress); \ + } +#define GETED32W(hint, ret, D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + MOVxw_REG(ret, ed); \ + wback = 0; \ + } else { \ + addr = geted32(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ + ed = ret; \ + LDRxw_U12(ed, wback, fixedaddress); \ + } +// Write back ed in wback (if wback not 0) +#define WBACK if(wback) {STRxw_U12(ed, wback, fixedaddress);} +// Write back ed in wback (if wback not 0) +#define WBACKx if(wback) {STRx_U12(ed, wback, fixedaddress);} +// Write back ed in wback (if wback not 0) +#define WBACKw if(wback) {STRw_U12(ed, wback, fixedaddress);} +// Send back wb to either ed or wback +#define SBACK(wb) if(wback) {STRxw(wb, wback, fixedaddress);} else {MOVxw_REG(ed, wb);} +//GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +#define GETEDO(O, D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, D); \ + LDRxw_REG(x1, wback, O); \ + ed = x1; \ + } +#define WBACKO(O) if(wback) {STRxw_REG(ed, wback, O);} +//GETEDOx can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +#define GETEDOx(O, D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, D); \ + LDRx_REG(x1, wback, O); \ + ed = x1; \ + } +#define GETSEDOw(O, D) if((nextop&0xC0)==0xC0) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + SXTWx(x1, ed); \ + wb = x1; \ + wback = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, D); \ + LDRSW_REG(x1, wback, O); \ + wb = ed = x1; \ + } +//FAKEELike GETED, but doesn't get anything +#define FAKEED if(!MODREG) { \ + addr = fakeed(dyn, addr, ninst, nextop); \ + } +// GETGW extract x64 register in gd, that is i +#define GETGW(i) gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); UXTHw(i, gd); gd = i; +// GETGW extract x64 register in gd, that is i, Signed extented +#define GETSGW(i) gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); SXTHw(i, gd); gd = i; +//GETEWW will use i for ed, and can use w for wback. +#define GETEWW(w, i, D) if(MODREG) { \ + wback = xRAX+(nextop&7)+(rex.b<<3);\ + UXTHw(i, wback); \ + ed = i; \ + wb1 = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, w, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, 0, D); \ + LDRH_U12(i, wback, fixedaddress); \ + ed = i; \ + wb1 = 1; \ + } +//GETEW will use i for ed, and can use r3 for wback. +#define GETEW(i, D) if(MODREG) { \ + wback = xRAX+(nextop&7)+(rex.b<<3);\ + UXTHw(i, wback); \ + ed = i; \ + wb1 = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, 0, D); \ + LDRH_U12(i, wback, fixedaddress); \ + ed = i; \ + wb1 = 1; \ + } +//GETSEW will use i for ed, and can use r3 for wback. This is the Signed version +#define GETSEW(i, D) if(MODREG) { \ + wback = xRAX+(nextop&7)+(rex.b<<3);\ + SXTHw(i, wback); \ + ed = i; \ + wb1 = 0; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, 0, D); \ + LDRSHx_U12(i, wback, fixedaddress);\ + ed = i; \ + wb1 = 1; \ + } +// Write ed back to original register / memory +#define EWBACK if(wb1) {STRH_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, 0, 16);} +// Write w back to original register / memory +#define EWBACKW(w) if(wb1) {STRH_U12(w, wback, fixedaddress);} else {BFIx(wback, w, 0, 16);} +// Write back gd in correct register +#define GWBACK BFIx((xRAX+((nextop&0x38)>>3)+(rex.r<<3)), gd, 0, 16); +//GETEB will use i for ed, and can use r3 for wback. +#define GETEB(i, D) if(MODREG) { \ + if(rex.rex) { \ + wback = xRAX+(nextop&7)+(rex.b<<3); \ + wb2 = 0; \ + } else { \ + wback = (nextop&7); \ + wb2 = (wback>>2)*8; \ + wback = xRAX+(wback&3); \ + } \ + UBFXx(i, wback, wb2, 8); \ + wb1 = 0; \ + ed = i; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, 0, D); \ + LDRB_U12(i, wback, fixedaddress); \ + wb1 = 1; \ + ed = i; \ + } +//GETEBO will use i for ed, i is also Offset, and can use r3 for wback. +#define GETEBO(i, D) if(MODREG) { \ + if(rex.rex) { \ + wback = xRAX+(nextop&7)+(rex.b<<3); \ + wb2 = 0; \ + } else { \ + wback = (nextop&7); \ + wb2 = (wback>>2)*8; \ + wback = xRAX+(wback&3); \ + } \ + UBFXx(i, wback, wb2, 8); \ + wb1 = 0; \ + ed = i; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, D); \ + ADDx_REG(wback, wback, i); \ + LDRB_U12(i, wback, fixedaddress); \ + wb1 = 1; \ + ed = i; \ + } +//GETSEB sign extend EB, will use i for ed, and can use r3 for wback. +#define GETSEB(i, D) if(MODREG) { \ + if(rex.rex) { \ + wback = xRAX+(nextop&7)+(rex.b<<3); \ + wb2 = 0; \ + } else { \ + wback = (nextop&7); \ + wb2 = (wback>>2)*8; \ + wback = xRAX+(wback&3); \ + } \ + SBFXx(i, wback, wb2, 8); \ + wb1 = 0; \ + ed = i; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, 0, D); \ + LDRSBx_U12(i, wback, fixedaddress); \ + wb1 = 1; \ + ed = i; \ + } +// Write eb (ed) back to original register / memory +#define EBBACK if(wb1) {STRB_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, wb2, 8);} +//GETGB will use i for gd +#define GETGB(i) if(rex.rex) { \ + gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3); \ + gb2 = 0; \ + } else { \ + gd = (nextop&0x38)>>3; \ + gb2 = ((gd&4)>>2); \ + gb1 = xRAX+(gd&3); \ + } \ + gd = i; \ + UBFXx(gd, gb1, gb2*8, 8); +//GETSGB signe extend GB, will use i for gd +#define GETSGB(i) if(rex.rex) { \ + gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3); \ + gb2 = 0; \ + } else { \ + gd = (nextop&0x38)>>3; \ + gb2 = ((gd&4)>>2); \ + gb1 = xRAX+(gd&3); \ + } \ + gd = i; \ + SBFXx(gd, gb1, gb2, 8); +// Write gb (gd) back to original register / memory +#define GBBACK BFIx(gb1, gd, gb2, 8); + +// Get Direction with size Z and based of F_DF flag, on register r ready for LDR/STR fetching +// F_DF is 1<<10, so 1 ROR 11*2 (so F_OF) +#define GETDIR(r, A) \ + MOV32w(r, A); /* mask=1<<10 */ \ + TSTw_mask(xFlags, 0b010110, 0); \ + CNEGx(r, r, cNE) + +// CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1) +// R0 will not be pushed/popd if ret is -2 +#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0) +// CALL_ will use x7 for the call address. Return value can be put in ret (unless ret is -1) +// R0 will not be pushed/popd if ret is -2 +#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x7, ret, 1, reg) +// CALL_S will use x7 for the call address. Return value can be put in ret (unless ret is -1) +// R0 will not be pushed/popd if ret is -2. Flags are not save/restored +#define CALL_S(F, ret) call_c(dyn, ninst, F, x7, ret, 0, 0) + +#define MARK dyn->insts[ninst].mark = dyn->native_size +#define GETMARK dyn->insts[ninst].mark +#define MARK2 dyn->insts[ninst].mark2 = dyn->native_size +#define GETMARK2 dyn->insts[ninst].mark2 +#define MARK3 dyn->insts[ninst].mark3 = dyn->native_size +#define GETMARK3 dyn->insts[ninst].mark3 +#define MARKF dyn->insts[ninst].markf = dyn->native_size +#define GETMARKF dyn->insts[ninst].markf +#define MARKSEG dyn->insts[ninst].markseg = dyn->native_size +#define GETMARKSEG dyn->insts[ninst].markseg +#define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size +#define GETMARKLOCK dyn->insts[ninst].marklock + +// Branch to MARK if cond (use j64) +#define B_MARK(cond) \ + j64 = GETMARK-(dyn->native_size); \ + Bcond(cond, j64) +// Branch to MARK unconditionnal (use j64) +#define B_MARK_nocond \ + j64 = GETMARK-(dyn->native_size); \ + B(j64) +// Branch to MARK if reg is 0 (use j64) +#define CBZxw_MARK(reg) \ + j64 = GETMARK-(dyn->native_size); \ + CBZxw(reg, j64) +// Branch to MARK if reg is not 0 (use j64) +#define CBNZx_MARK(reg) \ + j64 = GETMARK-(dyn->native_size); \ + CBNZx(reg, j64) +// Branch to MARK if reg is not 0 (use j64) +#define CBNZw_MARK(reg) \ + j64 = GETMARK-(dyn->native_size); \ + CBNZw(reg, j64) +// Test bit N of A and branch to MARK if not set +#define TBZ_MARK(A, N) \ + j64 = GETMARK-(dyn->native_size); \ + TBZ(A, N, j64) +// Test bit N of A and branch to MARK if set +#define TBNZ_MARK(A, N) \ + j64 = GETMARK-(dyn->native_size); \ + TBNZ(A, N, j64) +// Branch to MARK2 if cond (use j64) +#define B_MARK2(cond) \ + j64 = GETMARK2-(dyn->native_size); \ + Bcond(cond, j64) +// Branch to MARK2 unconditionnal (use j64) +#define B_MARK2_nocond \ + j64 = GETMARK2-(dyn->native_size); \ + B(j64) +// Branch to MARK2 if reg is not 0 (use j64) +#define CBNZx_MARK2(reg) \ + j64 = GETMARK2-(dyn->native_size); \ + CBNZx(reg, j64) +// Test bit N of A and branch to MARK2 if set +#define TBNZ_MARK2(A, N) \ + j64 = GETMARK2-(dyn->native_size); \ + TBNZ(A, N, j64) +// Branch to MARK3 if cond (use j64) +#define B_MARK3(cond) \ + j64 = GETMARK3-(dyn->native_size); \ + Bcond(cond, j64) +// Test bit N of A and branch to MARK3 if not set +#define TBZ_MARK2(A, N) \ + j64 = GETMARK2-(dyn->native_size); \ + TBZ(A, N, j64) +// Branch to MARK3 unconditionnal (use j64) +#define B_MARK3_nocond \ + j64 = GETMARK3-(dyn->native_size); \ + B(j64) +// Branch to MARK3 if reg is not 0 (use j64) +#define CBNZx_MARK3(reg) \ + j64 = GETMARK3-(dyn->native_size); \ + CBNZx(reg, j64) +// Branch to MARK3 if reg is 0 (use j64) +#define CBZx_MARK3(reg) \ + j64 = GETMARK3-(dyn->native_size); \ + CBZx(reg, j64) +// Test bit N of A and branch to MARK3 if not set +#define TBZ_MARK3(A, N) \ + j64 = GETMARK3-(dyn->native_size); \ + TBZ(A, N, j64) +// Test bit N of A and branch to MARK3 if set +#define TBNZ_MARK3(A, N) \ + j64 = GETMARK3-(dyn->native_size); \ + TBNZ(A, N, j64) +// Branch to next instruction if cond (use j64) +#define B_NEXT(cond) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + Bcond(cond, j64) +// Branch to next instruction unconditionnal (use j64) +#define B_NEXT_nocond \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;\ + B(j64) +// Branch to next instruction if reg is 0 (use j64) +#define CBZw_NEXT(reg) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + CBZw(reg, j64) +// Branch to next instruction if reg is 0 (use j64) +#define CBZx_NEXT(reg) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + CBZx(reg, j64) +// Branch to next instruction if reg is not 0 (use j64) +#define CBNZx_NEXT(reg) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + CBNZx(reg, j64) +// Test bit N of A and branch to next instruction if not set +#define TBZ_NEXT(A, N) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + TBZ(A, N, j64) +// Test bit N of A and branch to next instruction if set +#define TBNZ_NEXT(A, N) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + TBNZ(A, N, j64) +// Branch to MARKSEG if cond (use j64) +#define B_MARKSEG(cond) \ + j64 = GETMARKSEG-(dyn->native_size); \ + Bcond(cond, j64) +// Branch to MARKSEG if reg is 0 (use j64) +#define CBZw_MARKSEG(reg) \ + j64 = GETMARKSEG-(dyn->native_size); \ + CBZw(reg, j64) +// Branch to MARKSEG if reg is not 0 (use j64) +#define CBNZw_MARKSEG(reg) \ + j64 = GETMARKSEG-(dyn->native_size); \ + CBNZw(reg, j64) +// Branch to MARKLOCK if cond (use j64) +#define B_MARKLOCK(cond) \ + j64 = GETMARKLOCK-(dyn->native_size); \ + Bcond(cond, j64) +// Branch to MARKLOCK if reg is not 0 (use j64) +#define CBNZx_MARKLOCK(reg) \ + j64 = GETMARKLOCK-(dyn->native_size); \ + CBNZx(reg, j64) + +#define IFX(A) if((dyn->insts[ninst].x64.need_flags&(A))) +#define IFX_PENDOR0 if((dyn->insts[ninst].x64.need_flags&(X_PEND) || !dyn->insts[ninst].x64.need_flags)) +#define IFXX(A) if((dyn->insts[ninst].x64.need_flags==(A))) +#define IFX2X(A, B) if((dyn->insts[ninst].x64.need_flags==(A) || dyn->insts[ninst].x64.need_flags==(B) || dyn->insts[ninst].x64.need_flags==((A)|(B)))) +#define IFXN(A, B) if((dyn->insts[ninst].x64.need_flags&(A) && !(dyn->insts[ninst].x64.need_flags&(B)))) + +// Generate FCOM with s1 and s2 scratch regs (the VCMP is already done) +#define FCOM(s1, s2, s3) \ + LDRH_U12(s3, xEmu, offsetof(x64emu_t, sw)); /*offset is 8bits right?*/\ + MOV32w(s1, 0b0100011100000000); \ + BICw_REG(s3, s3, s1); \ + CSETw(s1, cMI); /* 1 if less than, 0 else */ \ + MOV32w(s2, 0b01000101); /* unordered */ \ + CSELw(s1, s2, s1, cVS); \ + MOV32w(s2, 0b01000000); /* zero */ \ + CSELw(s1, s2, s1, cEQ); \ + /* greater than leave 0 */ \ + ORRw_REG_LSL(s3, s3, s1, 8); \ + STRH_U12(s3, xEmu, offsetof(x64emu_t, sw)) + +// Generate FCOMI with s1 and s2 scratch regs (the VCMP is already done) +#define FCOMI(s1, s2) \ + IFX(X_CF|X_PF|X_ZF|X_PEND) { \ + MOV32w(s2, 0b01000101); \ + BICw_REG(xFlags, xFlags, s2); \ + CSETw(s1, cMI); /* 1 if less than, 0 else */ \ + /*s2 already set */ /* unordered */ \ + CSELw(s1, s2, s1, cVS); \ + MOV32w(s2, 0b01000000); /* zero */ \ + CSELw(s1, s2, s1, cEQ); \ + /* greater than leave 0 */ \ + ORRw_REG(xFlags, xFlags, s1); \ + } \ + SET_DFNONE(s1); \ + IFX(X_OF|X_PEND) { \ + BFCw(xFlags, F_OF, 1); \ + } \ + IFX(X_AF|X_PEND) { \ + BFCw(xFlags, F_AF, 1); \ + } \ + IFX(X_SF|X_PEND) { \ + BFCw(xFlags, F_SF, 1); \ + } \ + + +#define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) +#define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) +#define LDP_REGS(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) +#define STORE_XEMU_REGS(A) \ + STORE_REG(RAX); \ + STORE_REG(RCX); \ + STORE_REG(RDX); \ + STORE_REG(RBX); \ + STORE_REG(RSP); \ + STORE_REG(RBP); \ + STORE_REG(RSI); \ + STORE_REG(RDI); \ + STORE_REG(R8); \ + STORE_REG(R9); \ + STORE_REG(R10); \ + STORE_REG(R11); \ + STORE_REG(R12); \ + STORE_REG(R13); \ + STORE_REG(R14); \ + STORE_REG(R15); \ + STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ + if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));} + +#define LOAD_REG(A) LDRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) +#define LOAD_XEMU_REGS(A) \ + LOAD_REG(RAX); \ + LOAD_REG(RCX); \ + LOAD_REG(RDX); \ + LOAD_REG(RBX); \ + LOAD_REG(RSP); \ + LOAD_REG(RBP); \ + LOAD_REG(RSI); \ + LOAD_REG(RDI); \ + LOAD_REG(R8); \ + LOAD_REG(R9); \ + LOAD_REG(R10); \ + LOAD_REG(R11); \ + LOAD_REG(R12); \ + LOAD_REG(R13); \ + LOAD_REG(R14); \ + LOAD_REG(R15); \ + LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ + if(A) {LDRx_U12(A, xEmu, offsetof(x64emu_t, ip));} + +#define STORE_XEMU_MINIMUM(A) \ + STORE_REG(RAX); \ + STORE_REG(RCX); \ + STORE_REG(RDX); \ + STORE_REG(RBX); \ + STORE_REG(RSP); \ + STORE_REG(RBP); \ + STORE_REG(RSI); \ + STORE_REG(RDI); \ + STORE_REG(R8); \ + STORE_REG(R9); \ + STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ + if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));} + +// Need to also store current value of some register, as they may be used by functions like setjump +// so RBX, RSP, RBP, R12..R15 (other are scratch or parameters), R10-R11 not usefull, but why not +// RBX, RSP and RBP are already saved in call function +#define STORE_XEMU_CALL(A) \ + STP_REGS(R10, R11); \ + STP_REGS(R12, R13); \ + STP_REGS(R14, R15); \ + if(A) {STPx_S7_offset(xFlags, A, xEmu, offsetof(x64emu_t, eflags));} \ + else {STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));} + +#define LOAD_XEMU_CALL(A) \ + if(A) {LDPx_S7_offset(xFlags, A, xEmu, offsetof(x64emu_t, eflags));} \ + else {LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));}; \ + if(A==xRIP) dyn->last_ip = 0 + +#define LOAD_XEMU_REM() \ + LDP_REGS(R10, R11); \ + LDP_REGS(R12, R13); \ + LDP_REGS(R14, R15) + +#define SET_DFNONE(S) if(!dyn->dfnone) {MOVZw(S, d_none); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=1;} +#define SET_DF(S, N) if((N)!=d_none) {MOVZw(S, (N)); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S) +#define SET_NODF() dyn->dfnone = 0 +#define SET_DFOK() dyn->dfnone = 1 + +#ifndef READFLAGS +#define READFLAGS(A) \ + if(((A)!=X_PEND) && dyn->state_flags!=SF_SET && dyn->state_flags!=SF_SET_PENDING) { \ + if(dyn->state_flags!=SF_PENDING) { \ + LDRw_U12(x3, xEmu, offsetof(x64emu_t, df)); \ + j64 = (GETMARKF)-(dyn->native_size); \ + CBZw(x3, j64); \ + } \ + CALL_(UpdateFlags, -1, 0); \ + MARKF; \ + dyn->state_flags = SF_SET; \ + SET_DFOK(); \ + } +#endif +#ifndef SETFLAGS +#define SETFLAGS(A, B) \ + if(dyn->state_flags!=SF_SET && B==SF_SUBSET && (dyn->insts[ninst].x64.need_flags&(~((A)/*|X_PEND*/)))) \ + READFLAGS(dyn->insts[ninst].x64.need_flags&(~(A)|X_PEND)); \ + dyn->state_flags = (B==SF_SUBSET)?SF_SET: \ + ((B==SF_SET_PENDING && !(dyn->insts[ninst].x64.need_flags&X_PEND)?SF_SET:B)) + +#endif +#ifndef JUMP +#define JUMP(A) +#endif +#ifndef BARRIER +#define BARRIER(A) +#endif +#ifndef BARRIER_NEXT +#define BARRIER_NEXT(A) +#endif +#define UFLAG_OP1(A) if(dyn->insts[ninst].x64.need_flags) {STRxw_U12(A, xEmu, offsetof(x64emu_t, op1));} +#define UFLAG_OP2(A) if(dyn->insts[ninst].x64.need_flags) {STRxw_U12(A, xEmu, offsetof(x64emu_t, op2));} +#define UFLAG_OP12(A1, A2) if(dyn->insts[ninst].x64.need_flags) {STRxw_U12(A1, xEmu, offsetof(x64emu_t, op1));STRxw_U12(A2, 0, offsetof(x64emu_t, op2));} +#define UFLAG_RES(A) if(dyn->insts[ninst].x64.need_flags) {STRxw_U12(A, xEmu, offsetof(x64emu_t, res));} +#define UFLAG_DF(r, A) if(dyn->insts[ninst].x64.need_flags) {SET_DF(r, A)} +#define UFLAG_IF if(dyn->insts[ninst].x64.need_flags) +#ifndef DEFAULT +#define DEFAULT *ok = -1; BARRIER(2) +#endif +#ifndef NEW_BARRIER_INST +#define NEW_BARRIER_INST +#endif +#ifndef TABLE64 +#define TABLE64(A, V) +#endif +#ifndef FTABLE64 +#define FTABLE64(A, V) +#endif + +#if STEP < 2 +#define GETIP(A) +#define GETIP_(A) +#else +// put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later... +#define GETIP(A) \ + if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) { \ + uint64_t _delta_ip = (A)-dyn->last_ip; \ + dyn->last_ip += _delta_ip; \ + if(_delta_ip) { \ + ADDx_U12(xRIP, xRIP, _delta_ip); \ + } \ + } else { \ + dyn->last_ip = (A); \ + if(dyn->last_ip<0xffffffff) { \ + MOV64x(xRIP, dyn->last_ip); \ + } else \ + TABLE64(xRIP, dyn->last_ip); \ + } +#define GETIP_(A) \ + if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) { \ + uint64_t _delta_ip = (A)-dyn->last_ip; \ + if(_delta_ip) {ADDx_U12(xRIP, xRIP, _delta_ip);}\ + } else { \ + if((A)<0xffffffff) { \ + MOV64x(xRIP, (A)); \ + } else \ + TABLE64(xRIP, (A)); \ + } +#endif + +#if STEP < 2 +#define PASS2IF(A, B) if(A) +#elif STEP == 2 +#define PASS2IF(A, B) if(A) dyn->insts[ninst].pass2choice = B; if(dyn->insts[ninst].pass2choice == B) +#else +#define PASS2IF(A, B) if(dyn->insts[ninst].pass2choice == B) +#endif + +#define MODREG ((nextop&0xC0)==0xC0) + +void arm64_epilog(); +void* arm64_next(x64emu_t* emu, uintptr_t addr); + +#ifndef STEPNAME +#define STEPNAME3(N,M) N##M +#define STEPNAME2(N,M) STEPNAME3(N,M) +#define STEPNAME(N) STEPNAME2(N, STEP) +#endif + +#define arm_pass STEPNAME(arm_pass) + +#define dynarec64_00 STEPNAME(dynarec64_00) +#define dynarec64_0F STEPNAME(dynarec64_0F) +#define dynarec64_64 STEPNAME(dynarec64_64) +#define dynarec64_65 STEPNAME(dynarec64_65) +#define dynarec64_66 STEPNAME(dynarec64_66) +#define dynarec64_67 STEPNAME(dynarec64_67) +#define dynarec64_D8 STEPNAME(dynarec64_D8) +#define dynarec64_D9 STEPNAME(dynarec64_D9) +#define dynarec64_DA STEPNAME(dynarec64_DA) +#define dynarec64_DB STEPNAME(dynarec64_DB) +#define dynarec64_DC STEPNAME(dynarec64_DC) +#define dynarec64_DD STEPNAME(dynarec64_DD) +#define dynarec64_DE STEPNAME(dynarec64_DE) +#define dynarec64_DF STEPNAME(dynarec64_DF) +#define dynarec64_F0 STEPNAME(dynarec64_F0) +#define dynarec64_660F STEPNAME(dynarec64_660F) +#define dynarec64_6664 STEPNAME(dynarec64_6664) +#define dynarec64_F20F STEPNAME(dynarec64_F20F) +#define dynarec64_F30F STEPNAME(dynarec64_F30F) + +#define geted STEPNAME(geted) +#define geted32 STEPNAME(geted32) +#define geted16 STEPNAME(geted16) +#define jump_to_epilog STEPNAME(jump_to_epilog) +#define jump_to_next STEPNAME(jump_to_next) +#define ret_to_epilog STEPNAME(ret_to_epilog) +#define retn_to_epilog STEPNAME(retn_to_epilog) +#define iret_to_epilog STEPNAME(iret_to_epilog) +#define call_c STEPNAME(call_c) +#define call_n STEPNAME(call_n) +#define grab_segdata STEPNAME(grab_segdata) +#define emit_cmp8 STEPNAME(emit_cmp8) +#define emit_cmp16 STEPNAME(emit_cmp16) +#define emit_cmp32 STEPNAME(emit_cmp32) +#define emit_cmp8_0 STEPNAME(emit_cmp8_0) +#define emit_cmp16_0 STEPNAME(emit_cmp16_0) +#define emit_cmp32_0 STEPNAME(emit_cmp32_0) +#define emit_test8 STEPNAME(emit_test8) +#define emit_test16 STEPNAME(emit_test16) +#define emit_test32 STEPNAME(emit_test32) +#define emit_add32 STEPNAME(emit_add32) +#define emit_add32c STEPNAME(emit_add32c) +#define emit_add8 STEPNAME(emit_add8) +#define emit_add8c STEPNAME(emit_add8c) +#define emit_sub32 STEPNAME(emit_sub32) +#define emit_sub32c STEPNAME(emit_sub32c) +#define emit_sub8 STEPNAME(emit_sub8) +#define emit_sub8c STEPNAME(emit_sub8c) +#define emit_or32 STEPNAME(emit_or32) +#define emit_or32c STEPNAME(emit_or32c) +#define emit_xor32 STEPNAME(emit_xor32) +#define emit_xor32c STEPNAME(emit_xor32c) +#define emit_and32 STEPNAME(emit_and32) +#define emit_and32c STEPNAME(emit_and32c) +#define emit_or8 STEPNAME(emit_or8) +#define emit_or8c STEPNAME(emit_or8c) +#define emit_xor8 STEPNAME(emit_xor8) +#define emit_xor8c STEPNAME(emit_xor8c) +#define emit_and8 STEPNAME(emit_and8) +#define emit_and8c STEPNAME(emit_and8c) +#define emit_add16 STEPNAME(emit_add16) +#define emit_add16c STEPNAME(emit_add16c) +#define emit_sub16 STEPNAME(emit_sub16) +#define emit_sub16c STEPNAME(emit_sub16c) +#define emit_or16 STEPNAME(emit_or16) +#define emit_or16c STEPNAME(emit_or16c) +#define emit_xor16 STEPNAME(emit_xor16) +#define emit_xor16c STEPNAME(emit_xor16c) +#define emit_and16 STEPNAME(emit_and16) +#define emit_and16c STEPNAME(emit_and16c) +#define emit_inc32 STEPNAME(emit_inc32) +#define emit_inc16 STEPNAME(emit_inc16) +#define emit_inc8 STEPNAME(emit_inc8) +#define emit_dec32 STEPNAME(emit_dec32) +#define emit_dec16 STEPNAME(emit_dec16) +#define emit_dec8 STEPNAME(emit_dec8) +#define emit_adc32 STEPNAME(emit_adc32) +#define emit_adc32c STEPNAME(emit_adc32c) +#define emit_adc8 STEPNAME(emit_adc8) +#define emit_adc8c STEPNAME(emit_adc8c) +#define emit_adc16 STEPNAME(emit_adc16) +#define emit_adc16c STEPNAME(emit_adc16c) +#define emit_sbb32 STEPNAME(emit_sbb32) +#define emit_sbb32c STEPNAME(emit_sbb32c) +#define emit_sbb8 STEPNAME(emit_sbb8) +#define emit_sbb8c STEPNAME(emit_sbb8c) +#define emit_sbb16 STEPNAME(emit_sbb16) +#define emit_sbb16c STEPNAME(emit_sbb16c) +#define emit_neg32 STEPNAME(emit_neg32) +#define emit_neg16 STEPNAME(emit_neg16) +#define emit_neg8 STEPNAME(emit_neg8) +#define emit_shl32 STEPNAME(emit_shl32) +#define emit_shl32c STEPNAME(emit_shl32c) +#define emit_shr32 STEPNAME(emit_shr32) +#define emit_shr32c STEPNAME(emit_shr32c) +#define emit_sar32c STEPNAME(emit_sar32c) +#define emit_rol32c STEPNAME(emit_rol32c) +#define emit_ror32c STEPNAME(emit_ror32c) +#define emit_shrd32c STEPNAME(emit_shrd32c) +#define emit_shld32c STEPNAME(emit_shld32c) + +#define emit_pf STEPNAME(emit_pf) + +#define x87_do_push STEPNAME(x87_do_push) +#define x87_do_push_empty STEPNAME(x87_do_push_empty) +#define x87_do_pop STEPNAME(x87_do_pop) +#define x87_get_cache STEPNAME(x87_get_cache) +#define x87_get_st STEPNAME(x87_get_st) +#define x87_refresh STEPNAME(x87_refresh) +#define x87_forget STEPNAME(x87_forget) +#define x87_reget_st STEPNAME(x87_reget_st) +#define x87_stackcount STEPNAME(x87_stackcount) +#define x87_setround STEPNAME(x87_setround) +#define x87_restoreround STEPNAME(x87_restoreround) +#define sse_setround STEPNAME(sse_setround) +#define mmx_get_reg STEPNAME(mmx_get_reg) +#define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty) +#define sse_get_reg STEPNAME(sse_get_reg) +#define sse_get_reg_empty STEPNAME(sse_get_reg_empty) +#define sse_forget_reg STEPNAME(sse_forget_reg) +#define sse_purge07cache STEPNAME(sse_purge07cache) + +#define fpu_pushcache STEPNAME(fpu_pushcache) +#define fpu_popcache STEPNAME(fpu_popcache) +#define fpu_reset STEPNAME(fpu_reset) +#define fpu_purgecache STEPNAME(fpu_purgecache) +#define mmx_purgecache STEPNAME(mmx_purgecache) +#define x87_purgecache STEPNAME(x87_purgecache) +#ifdef HAVE_TRACE +#define fpu_reflectcache STEPNAME(fpu_reflectcache) +#endif + +/* setup r2 to address pointed by */ +uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta); + +/* setup r2 to address pointed by */ +uintptr_t geted32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta); + +/* setup r2 to address pointed by */ +uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, int s); + + +// generic x64 helper +void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst); +void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst); +void ret_to_epilog(dynarec_arm_t* dyn, int ninst); +void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n); +void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits); +void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); +void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w); +void grab_segdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg, int segment); +void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); +void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_cmp16_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); +void emit_test8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_test16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_test32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_add32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_add32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); +void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); +void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); +void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); +void emit_xor32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_xor32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); +void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); +void emit_or8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_xor8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_add16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_sub16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_sub16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_or16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_or16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_xor16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_xor16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_and16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_and16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_inc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); +void emit_inc16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_inc8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); +void emit_dec16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_dec8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); +void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); +void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); +void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4); +void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4); + +void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); + +// x87 helper +// cache of the local stack counter, to avoid upadte at every call +void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch); +// fpu push. Return the Dd value to be used +int x87_do_push(dynarec_arm_t* dyn, int ninst); +// fpu push. Do not allocate a cache register. Needs a scratch register to do x87stack synch (or 0 to not do it) +void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1); +// fpu pop. All previous returned Dd should be considered invalid +void x87_do_pop(dynarec_arm_t* dyn, int ninst); +// get cache index for a x87 reg, create the entry if needed +int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a); +// get vfpu register for a x87 reg, create the entry if needed +int x87_get_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a); +// refresh a value from the cache ->emu (nothing done if value is not cached) +void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st); +// refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached) +void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st); +// refresh the cache value from emu +void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st); +// Set rounding according to cw flags, return reg to restore flags +int x87_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); +// Restore round flag +void x87_restoreround(dynarec_arm_t* dyn, int ninst, int s1); +// Set rounding according to mxcsr flags, return reg to restore flags +int sse_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); + +//MMX helpers +// get neon register for a MMX reg, create the entry if needed +int mmx_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a); +// get neon register for a MMX reg, but don't try to synch it if it needed to be created +int mmx_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a); + +//SSE/SSE2 helpers +// get neon register for a SSE reg, create the entry if needed +int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a); +// get neon register for a SSE reg, but don't try to synch it if it needed to be created +int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a); +// forget neon register for a SSE reg, create the entry if needed +void sse_forget_reg(dynarec_arm_t* dyn, int ninst, int a); +// purge the XMM0..XMM7 cache (before function call) +void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1); + +// common coproc helpers +// reset the cache +void fpu_reset(dynarec_arm_t* dyn, int ninst); +// purge the FPU cache (needs 3 scratch registers) +void fpu_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); +// purge MMX cache +void mmx_purgecache(dynarec_arm_t* dyn, int ninst, int s1); +// purge x87 cache +void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); +#ifdef HAVE_TRACE +void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); +#endif +void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not07); +void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not07); + +uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog); +//uintptr_t dynarec64_65(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep,int* ok, int* need_epilog); +uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); +uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); + +#if STEP < 2 +#define PASS2(A) +#else +#define PASS2(A) A +#endif + +#if STEP < 3 +#define PASS3(A) +#else +#define PASS3(A) A +#endif + +#if STEP < 3 +#define MAYUSE(A) (void)A +#else +#define MAYUSE(A) +#endif + +#define GOCOND(B, T1, T2) \ + case B+0x0: \ + INST_NAME(T1 "O " T2); \ + GO( TSTw_mask(xFlags, 0b010101, 0) \ + , cEQ, cNE, X_OF) \ + break; \ + case B+0x1: \ + INST_NAME(T1 "NO " T2); \ + GO( TSTw_mask(xFlags, 0b010101, 0) \ + , cNE, cEQ, X_OF) \ + break; \ + case B+0x2: \ + INST_NAME(T1 "C " T2); \ + GO( TSTw_mask(xFlags, 0, 0) \ + , cEQ, cNE, X_CF) \ + break; \ + case B+0x3: \ + INST_NAME(T1 "NC " T2); \ + GO( TSTw_mask(xFlags, 0, 0) \ + , cNE, cEQ, X_CF) \ + break; \ + case B+0x4: \ + INST_NAME(T1 "Z " T2); \ + GO( TSTw_mask(xFlags, 0b011010, 0) \ + , cEQ, cNE, X_ZF) \ + break; \ + case B+0x5: \ + INST_NAME(T1 "NZ " T2); \ + GO( TSTw_mask(xFlags, 0b011010, 0) \ + , cNE, cEQ, X_ZF) \ + break; \ + case B+0x6: \ + INST_NAME(T1 "BE " T2); \ + GO( MOV32w(x1, (1< +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynablock.h" +#include "dynarec_native.h" +#include "custommem.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +#ifndef STEP +#error No STEP defined +#endif + +uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr) +{ + int ok = 1; + int ninst = 0; + uintptr_t ip = addr; + uintptr_t init_addr = addr; + rex_t rex; + int rep; // 0 none, 1=F2 prefix, 2=F3 prefix + int need_epilog = 1; + dyn->sons_size = 0; + // Clean up (because there are multiple passes) + dyn->state_flags = 0; + dyn->dfnone = 0; + dyn->last_ip = ip; // RIP is always set at start of block! + MAYUSE(init_addr); + fpu_reset(dyn, ninst); + // ok, go now + INIT; + while(ok) { + ip = addr; + if((dyn->insts[ninst].x64.barrier==1)) { + dyn->last_ip = 0; + NEW_BARRIER_INST; + } + NEW_INST; + fpu_reset_scratch(dyn); +#ifdef HAVE_TRACE + if(my_context->dec && box64_dynarec_trace) { + if((trace_end == 0) + || ((ip >= trace_start) && (ip < trace_end))) { + MESSAGE(LOG_DUMP, "TRACE ----\n"); + fpu_reflectcache(dyn, ninst, x1, x2, x3); + GETIP(ip); + MOVx_REG(x1, xRIP); + STORE_XEMU_CALL(xRIP); + MOV32w(x2, 1); + CALL(PrintTrace, -1); + LOAD_XEMU_CALL(xRIP); + MESSAGE(LOG_DUMP, "----------\n"); + } + } +#endif + + rep = 0; + uint8_t pk = PK(0); + while((pk==0xF2) || (pk==0xF3)) { + rep = pk-0xF1; + ++addr; + pk = PK(0); + } + while(pk==0x3E) { //Branch Taken Hint ignored + ++addr; + pk = PK(0); + } + rex.rex = 0; + while(pk>=0x40 && pk<=0x4f) { + rex.rex = pk; + ++addr; + pk = PK(0); + } + + addr = dynarec64_00(dyn, addr, ip, ninst, rex, rep, &ok, &need_epilog); + + INST_EPILOG; + + if(dyn->insts[ninst+1].x64.barrier) { + fpu_purgecache(dyn, ninst, x1, x2, x3); + if(dyn->insts[ninst+1].x64.barrier!=2) { + dyn->state_flags = 0; + dyn->dfnone = 0; + } + } + #if STEP == 0 + if(!ok && !need_epilog && box64_dynarec_bigblock && getProtection(addr+3)&~PROT_CUSTOM && !IsInHotPage(addr+3)) + if(*(uint32_t*)addr!=0) { // check if need to continue (but is next 4 bytes are 0, stop) + uintptr_t next = get_closest_next(dyn, addr); + if(next && ( + (((next-addr)<15) && is_nops(dyn, addr, next-addr)) + ||(((next-addr)<30) && is_instructions(dyn, addr, next-addr)) )) + { + dynarec_log(LOG_DEBUG, "Extend block %p, %p -> %p (ninst=%d)\n", dyn, (void*)addr, (void*)next, ninst); + ok = 1; + } else if(next && (next-addr)<30) { + dynarec_log(LOG_DEBUG, "Cannot extend block %p -> %p (%02X %02X %02X %02X %02X %02X %02X %02x)\n", (void*)addr, (void*)next, PK(0), PK(1), PK(2), PK(3), PK(4), PK(5), PK(6), PK(7)); + } + } + #else + if(!ok && !need_epilog && (addr < (dyn->start+dyn->isize))) { + ok = 1; + } + #endif + if(ok<0) {ok = 0; need_epilog=1;} + ++ninst; + #if STEP == 0 + if(ok && !isJumpTableDefault64((void*)addr)) + #else + if(ok && (ninst==dyn->size)) + #endif + { + #if STEP == 3 + dynarec_log(LOG_DEBUG, "Stopping block %p (%d / %d)\n",(void*)init_addr, ninst, dyn->size); + #endif + BARRIER(2); + fpu_purgecache(dyn, ninst, x1, x2, x3); + jump_to_next(dyn, addr, 0, ninst); + ok=0; need_epilog=0; + } + } + if(need_epilog) { + fpu_purgecache(dyn, ninst, x1, x2, x3); + jump_to_epilog(dyn, ip, 0, ninst); // no linker here, it's an unknow instruction + } + FINI; + MESSAGE(LOG_DUMP, "---- END OF BLOCK ---- (%d, %d sons)\n", dyn->size, dyn->sons_size); + return addr; +} \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h new file mode 100755 index 00000000..d4818ac5 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -0,0 +1,39 @@ + +#define INIT uintptr_t sav_addr=addr +#define FINI \ + dyn->isize = addr-sav_addr; \ + dyn->insts[ninst].x64.addr = addr; \ + if(ninst) dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr + +#define MESSAGE(A, ...) +#define SETFLAGS(A, B) +#define READFLAGS(A) +#define EMIT(A) +#define JUMP(A) add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A +#define BARRIER(A) dyn->insts[ninst].x64.barrier = A +#define BARRIER_NEXT(A) if(ninstsize) dyn->insts[ninst+1].x64.barrier = A +#define NEW_INST \ + if(dyn->size+3>=dyn->cap) { \ + dyn->insts = (instruction_arm64_t*)realloc(dyn->insts, sizeof(instruction_arm64_t)*dyn->cap*2); \ + memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_arm64_t)*dyn->cap); \ + dyn->cap *= 2; \ + } \ + ++dyn->size; \ + dyn->insts[ninst].x64.addr = ip; \ + if(ninst) dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr +#define INST_EPILOG +#define INST_NAME(name) +#define DEFAULT \ + --dyn->size; \ + *ok = -1; \ + if(box64_dynarec_log>=LOG_INFO) {\ + dynarec_log(LOG_NONE, "%p: Dynarec stopped because of Opcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ + (void*)ip, PKip(0), \ + PKip(1), PKip(2), PKip(3), \ + PKip(4), PKip(5), PKip(6), \ + PKip(7), PKip(8), PKip(9), \ + PKip(10),PKip(11),PKip(12), \ + PKip(13),PKip(14)); \ + printFunctionAddr(ip, " => "); \ + dynarec_log(LOG_NONE, "\n"); \ + } diff --git a/src/dynarec/arm64/dynarec_arm64_pass1.h b/src/dynarec/arm64/dynarec_arm64_pass1.h new file mode 100755 index 00000000..a4abcf19 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_pass1.h @@ -0,0 +1,10 @@ +#define INIT +#define FINI +#define MESSAGE(A, ...) +#define EMIT(A) +#define READFLAGS(A) dyn->insts[ninst].x64.use_flags = A +#define SETFLAGS(A,B) {dyn->insts[ninst].x64.set_flags = A; dyn->insts[ninst].x64.state_flags = B;} + +#define NEW_INST +#define INST_EPILOG +#define INST_NAME(name) diff --git a/src/dynarec/arm64/dynarec_arm64_pass2.h b/src/dynarec/arm64/dynarec_arm64_pass2.h new file mode 100755 index 00000000..3d4b6f03 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_pass2.h @@ -0,0 +1,11 @@ +#define INIT dyn->native_size = 0 +#define FINI if(ninst) {dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);} + +#define MESSAGE(A, ...) +#define EMIT(A) dyn->insts[ninst].size+=4; dyn->native_size+=4 +#define NEW_INST if(ninst) {dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);} +#define INST_EPILOG dyn->insts[ninst].epilog = dyn->native_size; +#define INST_NAME(name) +#define NEW_BARRIER_INST if(ninst) ++dyn->sons_size +#define TABLE64(A, V) {Table64(dyn, (V)); EMIT(0);} +#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; Table64(dyn, v.q); EMIT(0);} \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_pass3.h b/src/dynarec/arm64/dynarec_arm64_pass3.h new file mode 100755 index 00000000..00f39f74 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_pass3.h @@ -0,0 +1,36 @@ +#define INIT +#define FINI +#define EMIT(A) \ + if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), arm64_print(A, (uintptr_t)dyn->block));} \ + *(uint32_t*)(dyn->block) = (uint32_t)(A); \ + dyn->block += 4; dyn->native_size += 4; \ + dyn->insts[ninst].size2 += 4 + +#define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__) +#define NEW_INST +#define INST_EPILOG +#define INST_NAME(name) \ + if(box64_dynarec_dump) {\ + printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); \ + dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, state=%d/%d, set=%X, use=%X, need=%X%s\n", \ + (box64_dynarec_dump>1)?"\e[32m":"", \ + (void*)(dyn->native_start+dyn->insts[ninst].address), \ + dyn->insts[ninst].size/4, \ + dyn->insts[ninst].x64.state_flags, \ + dyn->state_flags, \ + dyn->insts[ninst].x64.set_flags, \ + dyn->insts[ninst].x64.use_flags, \ + dyn->insts[ninst].x64.need_flags, \ + (box64_dynarec_dump>1)?"\e[m":""); \ + } + +#define NEW_BARRIER_INST \ + if(ninst) { \ + dyn->sons_x64[dyn->sons_size] = (uintptr_t)ip; \ + dyn->sons_native[dyn->sons_size] = dyn->block; \ + MESSAGE(LOG_DUMP, "----> potential Son here\n");\ + ++dyn->sons_size; \ + } + +#define TABLE64(A, V) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} +#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q); MESSAGE(LOG_DUMP, " FTable64: %g\n", v.d); VLDR64_literal(A, val64offset);} \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h new file mode 100755 index 00000000..252901a9 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -0,0 +1,64 @@ +#ifndef __DYNAREC_ARM_PRIVATE_H_ +#define __DYNAREC_ARM_PRIVATE_H_ + +#include "../dynarec_private.h" + +typedef struct x64emu_s x64emu_t; +typedef struct dynablock_s dynablock_t; + +typedef struct instruction_arm64_s { + instruction_x64_t x64; + uintptr_t address; // (start) address of the arm emited instruction + uintptr_t epilog; // epilog of current instruction (can be start of next, of barrier stuff) + int size; // size of the arm emited instruction + int size2; // size of the arm emited instrucion after pass2 + uintptr_t mark, mark2, mark3; + uintptr_t markf; + uintptr_t markseg; + uintptr_t marklock; + int pass2choice;// value for choices that are fixed on pass2 for pass3 + uintptr_t natcall; + int retn; +} instruction_arm64_t; + +typedef struct dynarec_arm_s { + instruction_arm64_t *insts; + int32_t size; + int32_t cap; + uintptr_t start; // start of the block + uint32_t isize; // size in byte of x64 instructions included + void* block; // memory pointer where next instruction is emited + uintptr_t native_start; // start of the arm code + size_t native_size; // size of emitted arm code + int state_flags;// actual state for on-demand flags + uintptr_t last_ip; // last set IP in RIP (or NULL if unclean state) + int8_t x87cache[8];// cache status for the 8 x87 register behind the fpu stack + int8_t x87reg[8]; // reg used for x87cache entry + int8_t mmxcache[8];// cache status for the 8 MMX registers + int8_t ssecache[16];// cache status for the 16 SSE(2) registers + int8_t fpuused[32];// all 8..31 Q reg from fpu, used by x87, sse and mmx + int x87stack; // cache stack counter + int fpu_scratch;// scratch counter + int fpu_reg; // x87/sse/mmx reg counter + int dfnone; // if defered flags is already set to df_none + uint64_t *table64; // table of 64bits value + int table64size;// size of table (will be appended at end of executable code) + int table64cap; + uintptr_t tablestart; + uintptr_t* next; // variable array of "next" jump address + int next_sz; + int next_cap; + uintptr_t* sons_x64; // the x64 address of potential dynablock sons + void** sons_native; // the arm address of potential dynablock sons + int sons_size; // number of potential dynablock sons + dynablock_t* dynablock; +} dynarec_arm_t; + +void add_next(dynarec_arm_t *dyn, uintptr_t addr); +uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr); +int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n); +int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n); + +int Table64(dynarec_arm_t *dyn, uint64_t val); // add a value to etable64 (if needed) and gives back the imm19 to use in LDR_literal + +#endif //__DYNAREC_ARM_PRIVATE_H_ diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h deleted file mode 100755 index b69ef087..00000000 --- a/src/dynarec/arm64_emitter.h +++ /dev/null @@ -1,1755 +0,0 @@ -#ifndef __ARM64_EMITTER_H__ -#define __ARM64_EMITTER_H__ -/* - ARM64 Emitter - -*/ - -// x86 Register mapping -#define xRAX 10 -#define xRCX 11 -#define xRDX 12 -#define xRBX 13 -#define xRSP 14 -#define xRBP 15 -#define xRSI 16 -#define xRDI 17 -#define xR8 18 -#define xR9 19 -#define xR10 20 -#define xR11 21 -#define xR12 22 -#define xR13 23 -#define xR14 24 -#define xR15 25 -#define xFlags 26 -#define xRIP 27 -// 32bits version -#define wEAX xRAX -#define wECX xRCX -#define wEDX xRDX -#define wEBX xRBX -#define wESP xRSP -#define wEBP xRBP -#define wESI xRSI -#define wEDI xRDI -#define wR8 xR8 -#define wR9 xR9 -#define wR10 xR10 -#define wR11 xR11 -#define wR12 xR12 -#define wR13 xR13 -#define wR14 xR14 -#define wR15 xR15 -#define wFlags xFlags -// scratch registers -#define x1 1 -#define x2 2 -#define x3 3 -#define x4 4 -#define x5 5 -#define x6 6 -#define x7 7 -// 32bits version of scratch -#define w1 x1 -#define w2 x2 -#define w3 x3 -#define w4 x4 -#define w5 x5 -#define w6 x6 -#define w7 x7 -// emu is r0 -#define xEmu 0 -// ARM64 LR -#define xLR 30 -// ARM64 SP is r31 but is a special register -#define xSP 31 -// xZR regs is 31 -#define xZR 31 -#define wZR xZR - -// conditions -#define cEQ 0b0000 -#define cNE 0b0001 -#define cCS 0b0010 -#define cHS cCS -#define cCC 0b0011 -#define cLO cCC -#define cMI 0b0100 -#define cPL 0b0101 -#define cVS 0b0110 -#define cVC 0b0111 -#define cHI 0b1000 -#define cLS 0b1001 -#define cGE 0b1010 -#define cLT 0b1011 -#define cGT 0b1100 -#define cLE 0b1101 -#define c__ 0b1110 - -#define invCond(cond) ((cond)^0b0001) - -// MOVZ -#define MOVZ_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b10<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd)) -#define MOVZx(Rd, imm16) EMIT(MOVZ_gen(1, 0, ((uint16_t)imm16)&0xffff, Rd)) -#define MOVZx_LSL(Rd, imm16, shift) EMIT(MOVZ_gen(1, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) -#define MOVZw(Rd, imm16) EMIT(MOVZ_gen(0, 0, ((uint16_t)imm16)&0xffff, Rd)) -#define MOVZw_LSL(Rd, imm16, shift) EMIT(MOVZ_gen(0, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) - -// MOVN -#define MOVN_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b00<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd)) -#define MOVNx(Rd, imm16) EMIT(MOVN_gen(1, 0, ((uint16_t)imm16)&0xffff, Rd)) -#define MOVNx_LSL(Rd, imm16, shift) EMIT(MOVN_gen(1, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) -#define MOVNw(Rd, imm16) EMIT(MOVN_gen(0, 0, ((uint16_t)imm16)&0xffff, Rd)) -#define MOVNw_LSL(Rd, imm16, shift) EMIT(MOVN_gen(0, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) - -// MOVK -#define MOVK_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b11<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd)) -#define MOVKx(Rd, imm16) EMIT(MOVK_gen(1, 0, ((uint16_t)imm16)&0xffff, Rd)) -#define MOVKx_LSL(Rd, imm16, shift) EMIT(MOVK_gen(1, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) -#define MOVKw(Rd, imm16) EMIT(MOVK_gen(0, 0, ((uint16_t)imm16)&0xffff, Rd)) -#define MOVKw_LSL(Rd, imm16, shift) EMIT(MOVK_gen(0, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) - -// This macro will give a -Wsign-compare warning, probably bug #38341 -#define MOV32w(Rd, imm32) \ - if(~((uint32_t)(imm32))<0xffffu) { \ - MOVNw(Rd, (~(uint32_t)(imm32))&0xffff); \ - } else { \ - MOVZw(Rd, (imm32)&0xffff); \ - if((imm32)&0xffff0000) {MOVKw_LSL(Rd, ((imm32)>>16)&0xffff, 16);} \ - } -#define MOV64x(Rd, imm64) \ - if(~((uint64_t)(imm64))<0xffff) { \ - MOVNx(Rd, (~(uint64_t)(imm64))&0xffff); \ - } else { \ - MOVZx(Rd, ((uint64_t)(imm64))&0xffff); \ - if(((uint64_t)(imm64))&0xffff0000) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>16)&0xffff, 16);} \ - if(((uint64_t)(imm64))&0xffff00000000LL) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>32)&0xffff, 32);} \ - if(((uint64_t)(imm64))&0xffff000000000000LL) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>48)&0xffff, 48);} \ - } - -#define MOV64xw(Rd, imm64) if(rex.w) {MOV64x(Rd, imm64);} else {MOV32w(Rd, imm64);} - - -// ADD / SUB -#define ADDSUB_REG_gen(sf, op, S, shift, Rm, imm6, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | (shift)<<22 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd)) -#define ADDx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, 0, Rn, Rd)) -#define ADDSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 0, 1, 0b00, Rm, 0, Rn, Rd)) -#define ADDx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, lsl, Rn, Rd)) -#define ADDw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, 0, Rn, Rd)) -#define ADDSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd)) -#define ADDw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, lsl, Rn, Rd)) -#define ADDxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd)) -#define ADDSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd)) -#define ADDxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b01, Rm, lsr, Rn, Rd)) - -#define ADDSUB_IMM_gen(sf, op, S, shift, imm12, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b10001<<24 | (shift)<<22 | (imm12)<<10 | (Rn)<<5 | (Rd)) -#define ADDx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define ADDSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) -#define ADDw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define ADDSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) -#define ADDxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define ADDSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) - -#define SUBx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd)) -#define SUBSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd)) -#define SUBx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, lsl, Rn, Rd)) -#define SUBw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, 0, Rn, Rd)) -#define SUBw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, lsl, Rn, Rd)) -#define SUBSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd)) -#define SUBSw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd)) -#define SUBxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd)) -#define SUBSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd)) -#define CMPSx_REG(Rn, Rm) SUBSx_REG(xZR, Rn, Rm) -#define CMPSw_REG(Rn, Rm) SUBSw_REG(wZR, Rn, Rm) -#define CMPSxw_REG(Rn, Rm) SUBSxw_REG(xZR, Rn, Rm) -#define NEGx_REG(Rd, Rm) SUBx_REG(Rd, xZR, Rm); -#define NEGw_REG(Rd, Rm) SUBw_REG(Rd, wZR, Rm); -#define NEGxw_REG(Rd, Rm) SUBxw_REG(Rd, xZR, Rm); -#define NEGSx_REG(Rd, Rm) SUBSx_REG(Rd, xZR, Rm); -#define NEGSw_REG(Rd, Rm) SUBSw_REG(Rd, wZR, Rm); -#define NEGSxw_REG(Rd, Rm) SUBSxw_REG(Rd, xZR, Rm); - -#define SUBx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define SUBSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) -#define SUBw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define SUBSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) -#define SUBxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define SUBSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) -#define CMPSx_U12(Rn, imm12) SUBSx_U12(xZR, Rn, imm12) -#define CMPSw_U12(Rn, imm12) SUBSw_U12(wZR, Rn, imm12) -#define CMPSxw_U12(Rn, imm12) SUBSxw_U12(xZR, Rn, imm12) - -#define ADDSUBC_gen(sf, op, S, Rm, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b11010000<<21 | (Rm)<<16 | (Rn)<<5 | (Rd)) -#define ADCx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 0, 0, Rm, Rn, Rd)) -#define ADCw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 0, 0, Rm, Rn, Rd)) -#define ADCxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 0, 0, Rm, Rn, Rd)) -#define SBCx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 1, 0, Rm, Rn, Rd)) -#define SBCw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 1, 0, Rm, Rn, Rd)) -#define SBCxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 1, 0, Rm, Rn, Rd)) -#define ADCSx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 0, 1, Rm, Rn, Rd)) -#define ADCSw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 0, 1, Rm, Rn, Rd)) -#define ADCSxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 0, 1, Rm, Rn, Rd)) -#define SBCSx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 1, 1, Rm, Rn, Rd)) -#define SBCSw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 1, 1, Rm, Rn, Rd)) -#define SBCSxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 1, 1, Rm, Rn, Rd)) - -// ADR -#define ADR_gen(immlo, immhi, Rd) ((immlo)<<29 | 0b10000<<24 | (immhi)<<5 | (Rd)) -#define ADR_S20(Rd, imm) EMIT(ADR_gen((imm)&3, ((imm)>>2)&0x7ffff, (Rd)) - -// LDR -#define LDR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) -#define LDRx_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define LDRx_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define LDRw_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define LDRw_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define LDRB_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define LDRB_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define LDRH_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b01, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define LDRH_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b01, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define LDRxw_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(rex.w?0b11:0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) - -#define LDRS_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b10<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) -#define LDRSW_S9_postindex(Rt, Rn, imm9) EMIT(LDRS_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define LDRSW_S9_preindex(Rt, Rn, imm9) EMIT(LDRS_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) - -#define LD_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) -#define LDRx_U12(Rt, Rn, imm12) EMIT(LD_gen(0b11, 0b01, ((uint32_t)((imm12)>>3))&0xfff, Rn, Rt)) -#define LDRw_U12(Rt, Rn, imm12) EMIT(LD_gen(0b10, 0b01, ((uint32_t)((imm12)>>2))&0xfff, Rn, Rt)) -#define LDRB_U12(Rt, Rn, imm12) EMIT(LD_gen(0b00, 0b01, ((uint32_t)((imm12)))&0xfff, Rn, Rt)) -#define LDRH_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt)) -#define LDRxw_U12(Rt, Rn, imm12) EMIT(LD_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt)) - -#define LDS_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b10<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) -#define LDRSW_U12(Rt, Rn, imm12) EMIT(LDS_gen(0b10, 0b01, ((uint32_t)((imm12)>>2))&0xfff, Rn, Rt)) - -#define LDR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b01<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) -#define LDRx_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt)) -#define LDRx_REG_LSL3(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt)) -#define LDRx_REG_UXTW3(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b010, 1, Rn, Rt)) -#define LDRw_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt)) -#define LDRw_REG_LSL2(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10, Rm, 0b011, 1, Rn, Rt)) -#define LDRxw_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10+rex.w, Rm, 0b011, 0, Rn, Rt)) -#define LDRB_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt)) -#define LDRH_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt)) - -#define LDRS_U12_gen(size, op1, opc, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) -#define LDRSHx_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b01, 0b01, 0b10, ((uint32_t)(imm12>>1))&0xfff, Rn, Rt)) -#define LDRSHw_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b01, 0b01, 0b11, ((uint32_t)(imm12>>1))&0xfff, Rn, Rt)) -#define LDRSHxw_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b01, 0b01, rex.w?0b10:0b11, ((uint32_t)(imm12>>1))&0xfff, Rn, Rt)) -#define LDRSBx_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b00, 0b01, 0b10, ((uint32_t)(imm12>>0))&0xfff, Rn, Rt)) -#define LDRSBw_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b00, 0b01, 0b11, ((uint32_t)(imm12>>0))&0xfff, Rn, Rt)) -#define LDRSBxw_U12(Rt, Rn, imm12) EMIT(LDRS_U12_gen(0b00, 0b01, rex.w?0b10:0b11, ((uint32_t)(imm12>>0))&0xfff, Rn, Rt)) - -#define LDRS_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b10<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) -#define LDRSW_REG(Rt, Rn, Rm) EMIT(LDRS_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt)) - -#define LDR_PC_gen(opc, imm19, Rt) ((opc)<<30 | 0b011<<27 | (imm19)<<5 | (Rt)) -#define LDRx_literal(Rt, imm19) EMIT(LDR_PC_gen(0b01, ((imm19)>>2)&0x7FFFF, Rt)) - -// STR -#define STR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) -#define STRx_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define STRx_S9_preindex(Rt, Rn, imm9) EMIT(STR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define STRw_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define STRw_S9_preindex(Rt, Rn, imm9) EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define STRxw_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(rex.w?0b11:0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define STRB_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b00, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define STRH_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b01, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) - -#define ST_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) -#define STRx_U12(Rt, Rn, imm12) EMIT(ST_gen(0b11, 0b01, ((uint32_t)((imm12)>>3))&0xfff, Rn, Rt)) -#define STRw_U12(Rt, Rn, imm12) EMIT(ST_gen(0b10, 0b01, ((uint32_t)((imm12)>>2))&0xfff, Rn, Rt)) -#define STRB_U12(Rt, Rn, imm12) EMIT(ST_gen(0b00, 0b01, ((uint32_t)((imm12)))&0xfff, Rn, Rt)) -#define STRH_U12(Rt, Rn, imm12) EMIT(ST_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt)) -#define STRxw_U12(Rt, Rn, imm12) EMIT(ST_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt)) - -#define STR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b00<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) -#define STRx_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt)) -#define STRx_REG_LSL3(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt)) -#define STRx_REG_UXTW(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b010, 0, Rn, Rt)) -#define STRw_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt)) -#define STRw_REG_LSL2(Rt, Rn, Rm) EMIT(STR_REG_gen(0b10, Rm, 0b011, 1, Rn, Rt)) -#define STRB_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt)) -#define STRH_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt)) -#define STRxw_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(rex.w?0b11:0b10, Rm, 0b011, 0, Rn, Rt)) - -// LOAD/STORE PAIR -#define MEMPAIR_gen(size, L, op2, imm7, Rt2, Rn, Rt) ((size)<<31 | 0b101<<27 | (op2)<<23 | (L)<<22 | (imm7)<<15 | (Rt2)<<10 | (Rn)<<5 | (Rt)) - -#define LDPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b01, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) -#define LDPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b01, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define LDPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b01, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) -#define LDPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b11, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) -#define LDPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b11, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define LDPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b11, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) -#define LDPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b10, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) -#define LDPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b10, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define LDPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b10, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) - -#define STPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b01, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) -#define STPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b01, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define STPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b01, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) -#define STPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b11, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) -#define STPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b11, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define STPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b11, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) -#define STPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b10, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) -#define STPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b10, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define STPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b10, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) - -// PUSH / POP helper -#define POP1(reg) LDRx_S9_postindex(reg, xRSP, 8) -#define PUSH1(reg) STRx_S9_preindex(reg, xRSP, -8) - -// LOAD/STORE Acquire Exclusive -#define MEMAX_gen(size, L, Rs, Rn, Rt) ((size)<<30 | 0b001000<<24 | (L)<<22 | (Rs)<<16 | 1<<15 | 0b11111<<10 | (Rn)<<5 | (Rt)) -#define LDAXRB(Rt, Rn) EMIT(MEMAX_gen(0b00, 1, 31, Rn, Rt)) -#define STLXRB(Rs, Rt, Rn) EMIT(MEMAX_gen(0b00, 0, Rs, Rn, Rt)) -#define LDAXRH(Rt, Rn) EMIT(MEMAX_gen(0b01, 1, 31, Rn, Rt)) -#define STLXRH(Rs, Rt, Rn) EMIT(MEMAX_gen(0b01, 0, Rs, Rn, Rt)) -#define LDAXRw(Rt, Rn) EMIT(MEMAX_gen(0b10, 1, 31, Rn, Rt)) -#define STLXRw(Rs, Rt, Rn) EMIT(MEMAX_gen(0b10, 0, Rs, Rn, Rt)) -#define LDAXRx(Rt, Rn) EMIT(MEMAX_gen(0b11, 1, 31, Rn, Rt)) -#define STLXRx(Rs, Rt, Rn) EMIT(MEMAX_gen(0b11, 0, Rs, Rn, Rt)) -#define LDAXRxw(Rt, Rn) EMIT(MEMAX_gen(2+rex.w, 1, 31, Rn, Rt)) -#define STLXRxw(Rs, Rt, Rn) EMIT(MEMAX_gen(2+rex.w, 0, Rs, Rn, Rt)) - -#define MEMAX_pair(size, L, Rs, Rt2, Rn, Rt) (1<<31 | (size)<<30 | 0b001000<<24 | (L)<<22 | 1<<21 | (Rs)<<16 | 1<<15 | (Rt2)<<10 | (Rn)<<5 | (Rt)) -#define LDAXPx(Rt, Rt2, Rn) EMIT(MEMAX_pair(1, 1, 31, Rt2, Rn, Rt)) -#define LDAXPw(Rt, Rt2, Rn) EMIT(MEMAX_pair(0, 1, 31, Rt2, Rn, Rt)) -#define LDAXPxw(Rt, Rt2, Rn) EMIT(MEMAX_pair(rex.w, 1, 31, Rt2, Rn, Rt)) -#define STLXPx(Rs, Rt, Rt2, Rn) EMIT(MEMAX_pair(1, 0, Rs, Rt2, Rn, Rt)) -#define STLXPw(Rs, Rt, Rt2, Rn) EMIT(MEMAX_pair(0, 0, Rs, Rt2, Rn, Rt)) -#define STLXPxw(Rs, Rt, Rt2, Rn) EMIT(MEMAX_pair(rex.w, 0, Rs, Rt2, Rn, Rt)) - -// LOAD/STORE Exclusive -#define MEMX_gen(size, L, Rs, Rn, Rt) ((size)<<30 | 0b001000<<24 | (L)<<22 | (Rs)<<16 | 0<<15 | 0b11111<<10 | (Rn)<<5 | (Rt)) -#define LDXRB(Rt, Rn) EMIT(MEMX_gen(0b00, 1, 31, Rn, Rt)) -#define STXRB(Rs, Rt, Rn) EMIT(MEMX_gen(0b00, 0, Rs, Rn, Rt)) -#define LDXRH(Rt, Rn) EMIT(MEMX_gen(0b01, 1, 31, Rn, Rt)) -#define STXRH(Rs, Rt, Rn) EMIT(MEMX_gen(0b01, 0, Rs, Rn, Rt)) -#define LDXRw(Rt, Rn) EMIT(MEMX_gen(0b10, 1, 31, Rn, Rt)) -#define STXRw(Rs, Rt, Rn) EMIT(MEMX_gen(0b10, 0, Rs, Rn, Rt)) -#define LDXRx(Rt, Rn) EMIT(MEMX_gen(0b11, 1, 31, Rn, Rt)) -#define STXRx(Rs, Rt, Rn) EMIT(MEMX_gen(0b11, 0, Rs, Rn, Rt)) -#define LDXRxw(Rt, Rn) EMIT(MEMX_gen(2+rex.w, 1, 31, Rn, Rt)) -#define STXRxw(Rs, Rt, Rn) EMIT(MEMX_gen(2+rex.w, 0, Rs, Rn, Rt)) - -// Prefetch -#define PRFM_register(Rm, option, S, Rn, Rt) (0b11<<30 | 0b111<<27 | 0b10<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | 0b10<<10 | (Rn)<<5 | (Rt)) -#define PLD_L1_KEEP(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00000)) -#define PLD_L2_KEEP(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00010)) -#define PLD_L3_KEEP(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00100)) -#define PLD_L1_STREAM(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00001)) -#define PLD_L2_STREAM(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00011)) -#define PLD_L3_STREAM(Rn, Rm) EMIT(PRFM_register(Rm, 0b011, 0, Rn, 0b00101)) - -#define PRFM_imm(imm12, Rn, Rt) (0b11<<30 | 0b111<<27 | 0b01<<24 | 0b10<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) -#define PLD_L1_KEEP_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00000)) -#define PLD_L2_KEEP_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00010)) -#define PLD_L3_KEEP_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00100)) -#define PLD_L1_STREAM_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00001)) -#define PLD_L2_STREAM_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00011)) -#define PLD_L3_STREAM_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b00101)) - -#define PST_L1_STREAM_U12(Rn, imm12) EMIT(PRFM_imm(((imm12)>>3)&0xfff, Rn, 0b01001)) - -// Data Memory Barrier -#define DMB_gen(CRm) (0b1101010100<<22 | 0b011<<16 | 0b0011<<12 | (CRm)<<8 | 1<<7 | 0b01<<5 | 0b11111) -#define DMB_ISH() EMIT(DMB_gen(0b1011)) - -// BR and Branches -#define BR_gen(Z, op, A, M, Rn, Rm) (0b1101011<<25 | (Z)<<24 | (op)<<21 | 0b11111<<16 | (A)<<11 | (M)<<10 | (Rn)<<5 | (Rm)) -#define BR(Rn) EMIT(BR_gen(0, 0b00, 0, 0, Rn, 0)) -#define BLR(Rn) EMIT(BR_gen(0, 0b01, 0, 0, Rn, 0)) - -#define CB_gen(sf, op, imm19, Rt) ((sf)<<31 | 0b011010<<25 | (op)<<24 | (imm19)<<5 | (Rt)) -#define CBNZx(Rt, imm19) EMIT(CB_gen(1, 1, ((imm19)>>2)&0x7FFFF, Rt)) -#define CBNZw(Rt, imm19) EMIT(CB_gen(0, 1, ((imm19)>>2)&0x7FFFF, Rt)) -#define CBNZxw(Rt, imm19) EMIT(CB_gen(rex.w, 1, ((imm19)>>2)&0x7FFFF, Rt)) -#define CBZx(Rt, imm19) EMIT(CB_gen(1, 0, ((imm19)>>2)&0x7FFFF, Rt)) -#define CBZw(Rt, imm19) EMIT(CB_gen(0, 0, ((imm19)>>2)&0x7FFFF, Rt)) -#define CBZxw(Rt, imm19) EMIT(CB_gen(rex.w, 0, ((imm19)>>2)&0x7FFFF, Rt)) - -#define TB_gen(b5, op, b40, imm14, Rt) ((b5)<<31 | 0b011011<<25 | (op)<<24 | (b40)<<19 | (imm14)<<5 | (Rt)) -#define TBZ(Rt, bit, imm16) EMIT(TB_gen(((bit)>>5)&1, 0, (bit)&0x1f, ((imm16)>>2)&0x3FFF, Rt)) -#define TBNZ(Rt, bit, imm16) EMIT(TB_gen(((bit)>>5)&1, 1, (bit)&0x1f, ((imm16)>>2)&0x3FFF, Rt)) - -#define Bcond_gen(imm19, cond) (0b0101010<<25 | (imm19)<<5 | (cond)) -#define Bcond(cond, imm19) EMIT(Bcond_gen(((imm19)>>2)&0x7FFFF, cond)) - -#define B_gen(imm26) (0b000101<<26 | (imm26)) -#define B(imm26) EMIT(B_gen(((imm26)>>2)&0x3ffffff)) - -#define BL_gen(imm26) (0b100101<<26 | (imm26)) -#define BL(imm26) EMIT(BL_gen(((imm26)>>2)&0x3ffffff)) - -#define NOP EMIT(0b11010101000000110010000000011111) - -#define CSINC_gen(sf, Rm, cond, Rn, Rd) ((sf)<<31 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | 1<<10 | (Rn)<<5 | (Rd)) -#define CSINCx(Rd, Rn, Rm, cond) EMIT(CSINC_gen(1, Rm, cond, Rn, Rd)) -#define CSINCw(Rd, Rn, Rm, cond) EMIT(CSINC_gen(0, Rm, cond, Rn, Rd)) -#define CSINCxw(Rd, Rn, Rm, cond) EMIT(CSINC_gen(rex.w, Rm, cond, Rn, Rd)) -#define CSETx(Rd, cond) CSINCx(Rd, xZR, xZR, invCond(cond)) -#define CSETw(Rd, cond) CSINCw(Rd, xZR, xZR, invCond(cond)) -#define CSETxw(Rd, cond) CSINCxw(Rd, xZR, xZR, invCond(cond)) - -#define CSINV_gen(sf, Rm, cond, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | (Rd)) -#define CSINVx(Rd, Rn, Rm, cond) EMIT(CSINV_gen(1, Rm, cond, Rn, Rd)) -#define CSINVw(Rd, Rn, Rm, cond) EMIT(CSINV_gen(0, Rm, cond, Rn, Rd)) -#define CSINVxw(Rd, Rn, Rm, cond) EMIT(CSINV_gen(rex.w?, Rm, cond, Rn, Rd)) -#define CINVx(Rd, Rn, cond) CSINVx(Rd, Rn, Rn, invertCond(cond)) -#define CINVw(Rd, Rn, cond) CSINVw(Rd, Rn, Rn, invertCond(cond)) -#define CINVxw(Rd, Rn, cond) CSINVxw(Rd, Rn, Rn, invertCond(cond)) -#define CSETMx(Rd, cond) CSINVx(Rd, xZR, xZR, invCond(cond)) -#define CSETMw(Rd, cond) CSINVw(Rd, xZR, xZR, invCond(cond)) -#define CSETMxw(Rd, cond) CSINVxw(Rd, xZR, xZR, invCond(cond)) - -#define CSEL_gen(sf, Rm, cond, Rn, Rd) ((sf<<31) | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | Rd) -#define CSELx(Rd, Rn, Rm, cond) EMIT(CSEL_gen(1, Rm, cond, Rn, Rd)) -#define CSELw(Rd, Rn, Rm, cond) EMIT(CSEL_gen(0, Rm, cond, Rn, Rd)) -#define CSELxw(Rd, Rn, Rm, cond) EMIT(CSEL_gen(rex.w, Rm, cond, Rn, Rd)) - -#define CSNEG_gen(sf, Rm, cond, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | 1<<10 | (Rn)<<5 | (Rd)) -#define CSNEGx(Rd, Rn, Rm, cond) EMIT(CSNEG_gen(1, Rm, cond, Rn, Rd)) -#define CSNEGw(Rd, Rn, Rm, cond) EMIT(CSNEG_gen(0, Rm, cond, Rn, Rd)) -#define CSNEGxw(Rd, Rn, Rm, cond) EMIT(CSNEG_gen(rex.w, Rm, cond, Rn, Rd)) -#define CNEGx(Rd, Rn, cond) CSNEGx(Rn, Rn, Rn, invCond(cond)) -#define CNEGw(Rd, Rn, cond) CSNEGw(Rn, Rn, Rn, invCond(cond)) -#define CNEGxw(Rd, Rn, cond) CSNEGxw(Rn, Rn, Rn, invCond(cond)) - -// AND / ORR -#define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | Rd) -// logic to get the mask is ... convoluted... list of possible value there: https://gist.github.com/dinfuehr/51a01ac58c0b23e4de9aac313ed6a06a -#define ANDx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b00, N, immr, imms, Rn, Rd)) -#define ANDw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b00, 0, immr, imms, Rn, Rd)) -#define ANDSx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b11, N, immr, imms, Rn, Rd)) -#define ANDSw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd)) -#define ORRx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b01, N, immr, imms, Rn, Rd)) -#define ORRw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b01, 0, immr, imms, Rn, Rd)) -#define EORx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b10, N, immr, imms, Rn, Rd)) -#define EORw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b10, 0, immr, imms, Rn, Rd)) -#define TSTx_mask(Rn, N, immr, imms) ANDSx_mask(xZR, Rn, N, immr, imms) -#define TSTw_mask(Rn, immr, imms) ANDSw_mask(wZR, Rn, immr, imms) - -#define LOGIC_REG_gen(sf, opc, shift, N, Rm, imm6, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b01010<<24 | (shift)<<22 | (N)<<21 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd)) -#define ANDx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 0, Rm, 0, Rn, Rd)) -#define ANDw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 0, Rm, 0, Rn, Rd)) -#define ANDxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 0, Rm, 0, Rn, Rd)) -#define ANDSx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) -#define ANDSw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) -#define ANDSxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) -#define ORRx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, 0, Rn, Rd)) -#define ORRx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, lsl, Rn, Rd)) -#define ORRw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 0, Rm, lsl, Rn, Rd)) -#define ORRxw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b00, 0, Rm, lsl, Rn, Rd)) -#define ORRx_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(1, 0b01, 0b01, 0, Rm, lsr, Rn, Rd)) -#define ORRw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(0, 0b01, 0b01, 0, Rm, lsr, Rn, Rd)) -#define ORRxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b01, 0, Rm, lsr, Rn, Rd)) -#define ORRxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b00, 0, Rm, 0, Rn, Rd)) -#define ORRw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 0, Rm, 0, Rn, Rd)) -#define ORNx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 1, Rm, 0, Rn, Rd)) -#define ORNw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 1, Rm, 0, Rn, Rd)) -#define ORNxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b00, 1, Rm, 0, Rn, Rd)) -#define ORNx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 1, Rm, lsl, Rn, Rd)) -#define EORx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b10, 0b00, 0, Rm, 0, Rn, Rd)) -#define EORw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b10, 0b00, 0, Rm, 0, Rn, Rd)) -#define EORxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b00, 0, Rm, 0, Rn, Rd)) -#define EORx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(1, 0b10, 0b00, 0, Rm, lsl, Rn, Rd)) -#define EORw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(0, 0b10, 0b00, 0, Rm, lsl, Rn, Rd)) -#define EORxw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b00, 0, Rm, lsl, Rn, Rd)) -#define EORx_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(1, 0b10, 0b01, 0, Rm, lsr, Rn, Rd)) -#define EORw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(0, 0b10, 0b01, 0, Rm, lsr, Rn, Rd)) -#define EORxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b01, 0, Rm, lsr, Rn, Rd)) -#define MOVx_REG(Rd, Rm) ORRx_REG(Rd, xZR, Rm) -#define MOVw_REG(Rd, Rm) ORRw_REG(Rd, xZR, Rm) -#define MOVxw_REG(Rd, Rm) ORRxw_REG(Rd, xZR, Rm) -#define LSLw_IMM(Rd, Rm, lsl) ORRw_REG_LSL(Rd, xZR, Rm, lsl) -#define LSLx_IMM(Rd, Rm, lsl) ORRx_REG_LSL(Rd, xZR, Rm, lsl) -#define LSLxw_IMM(Rd, Rm, lsl) ORRxw_REG_LSL(Rd, xZR, Rm, lsl) -#define LSRw_IMM(Rd, Rm, lsr) ORRw_REG_LSR(Rd, xZR, Rm, lsr) -#define LSRx_IMM(Rd, Rm, lsr) ORRx_REG_LSR(Rd, xZR, Rm, lsr) -#define LSRxw_IMM(Rd, Rm, lsr) ORRxw_REG_LSR(Rd, xZR, Rm, lsr) -#define MVNx_REG(Rd, Rm) ORNx_REG(Rd, xZR, Rm) -#define MVNx_REG_LSL(Rd, Rm, lsl) ORNx_REG_LSL(Rd, xZR, Rm, lsl) -#define MVNw_REG(Rd, Rm) ORNw_REG(Rd, xZR, Rm) -#define MVNxw_REG(Rd, Rm) ORNxw_REG(Rd, xZR, Rm) -#define MOV_frmSP(Rd) ADDx_U12(Rd, xSP, 0) -#define MOV_toSP(Rm) ADDx_U12(xSP, Rm, 0) -#define BICx(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) -#define BICw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) -#define BICw_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, lsl, Rn, Rd)) -#define BICSx(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) -#define BICSw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) -#define BICxw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) -#define BICSxw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) -#define BICx_REG BICx -#define BICw_REG BICw -#define BICxw_REG BICxw -#define TSTx_REG(Rn, Rm) ANDSx_REG(xZR, Rn, Rm) -#define TSTw_REG(Rn, Rm) ANDSw_REG(wZR, Rn, Rm) -#define TSTxw_REG(Rn, Rm) ANDSxw_REG(xZR, Rn, Rm) - -// ASRV -#define ASRV_gen(sf, Rm, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define ASRx_REG(Rd, Rn, Rm) EMIT(ASRV_gen(1, Rm, Rn, Rd)) -#define ASRw_REG(Rd, Rn, Rm) EMIT(ASRV_gen(0, Rm, Rn, Rd)) -#define ASRxw_REG(Rd, Rn, Rm) EMIT(ASRV_gen(rex.w, Rm, Rn, Rd)) - -// BFI -#define BFM_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) -#define BFMx(Rd, Rn, immr, imms) EMIT(BFM_gen(1, 0b01, 1, immr, imms, Rn, Rd)) -#define BFMw(Rd, Rn, immr, imms) EMIT(BFM_gen(0, 0b01, 0, immr, imms, Rn, Rd)) -#define BFMxw(Rd, Rn, immr, imms) EMIT(BFM_gen(rex.w, 0b01, rex.w, immr, imms, Rn, Rd)) -#define BFIx(Rd, Rn, lsb, width) BFMx(Rd, Rn, ((-lsb)%64)&0x3f, (width)-1) -#define BFIw(Rd, Rn, lsb, width) BFMw(Rd, Rn, ((-lsb)%32)&0x1f, (width)-1) -#define BFIxw(Rd, Rn, lsb, width) if(rex.w) {BFIx(Rd, Rn, lsb, width);} else {BFIw(Rd, Rn, lsb, width);} -#define BFCx(Rd, lsb, width) BFMx(Rd, xZR, ((-lsb)%64)&0x3f, (width)-1) -#define BFCw(Rd, lsb, width) BFMw(Rd, xZR, ((-lsb)%32)&0x1f, (width)-1) -#define BFCxw(Rd, lsb, width) BFMxw(Rd, xZR, rex.w?(((-lsb)%64)&0x3f):(((-lsb)%32)&0x1f), (width)-1) -// Insert lsb:width part of Rn into low part of Rd (leaving rest of Rd untouched) -#define BFXILx(Rd, Rn, lsb, width) EMIT(BFM_gen(1, 0b01, 1, (lsb), (lsb)+(width)-1, Rn, Rd)) -// Insert lsb:width part of Rn into low part of Rd (leaving rest of Rd untouched) -#define BFXILw(Rd, Rn, lsb, width) EMIT(BFM_gen(0, 0b01, 0, (lsb), (lsb)+(width)-1, Rn, Rd)) -// Insert lsb:width part of Rn into low part of Rd (leaving rest of Rd untouched) -#define BFXILxw(Rd, Rn, lsb, width) EMIT(BFM_gen(rex.w, 0b01, rex.w, (lsb), (lsb)+(width)-1, Rn, Rd)) - -// UBFX -#define UBFM_gen(sf, N, immr, imms, Rn, Rd) ((sf)<<31 | 0b10<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) -#define UBFMx(Rd, Rn, immr, imms) EMIT(UBFM_gen(1, 1, immr, imms, Rn, Rd)) -#define UBFMw(Rd, Rn, immr, imms) EMIT(UBFM_gen(0, 0, immr, imms, Rn, Rd)) -#define UBFMxw(Rd, Rn, immr, imms) EMIT(UBFM_gen(rex.w, rex.w, immr, imms, Rn, Rd)) -#define UBFXx(Rd, Rn, lsb, width) EMIT(UBFM_gen(1, 1, (lsb), (lsb)+(width)-1, Rn, Rd)) -#define UBFXw(Rd, Rn, lsb, width) EMIT(UBFM_gen(0, 0, (lsb), (lsb)+(width)-1, Rn, Rd)) -#define UBFXxw(Rd, Rn, lsb, width) EMIT(UBFM_gen(rex.w, rex.w, (lsb), (lsb)+(width)-1, Rn, Rd)) -#define UXTBx(Rd, Rn) EMIT(UBFM_gen(1, 1, 0, 7, Rn, Rd)) -#define UXTBw(Rd, Rn) EMIT(UBFM_gen(0, 0, 0, 7, Rn, Rd)) -#define UXTBxw(Rd, Rn) EMIT(UBFM_gen(rex.w, rex.w, 0, 7, Rn, Rd)) -#define UXTHx(Rd, Rn) EMIT(UBFM_gen(1, 1, 0, 15, Rn, Rd)) -#define UXTHw(Rd, Rn) EMIT(UBFM_gen(0, 0, 0, 15, Rn, Rd)) -#define LSRx(Rd, Rn, shift) EMIT(UBFM_gen(1, 1, shift, 63, Rn, Rd)) -#define LSRw(Rd, Rn, shift) EMIT(UBFM_gen(0, 0, shift, 31, Rn, Rd)) -#define LSRxw(Rd, Rn, shift) EMIT(UBFM_gen(rex.w, rex.w, shift, (rex.w)?63:31, Rn, Rd)) -#define LSLx(Rd, Rn, lsl) UBFMx(Rd, Rn, ((-(lsl))%64)&63, 63-(lsl)) -#define LSLw(Rd, Rn, lsl) UBFMw(Rd, Rn, ((-(lsl))%32)&31, 31-(lsl)) -#define LSLxw(Rd, Rn, lsl) UBFMxw(Rd, Rn, rex.w?(((-(lsl))%64)&63):(((-(lsl))%32)&31), (rex.w?63:31)-(lsl)) -// Take width first bits from Rn, LSL lsb and create Rd -#define UBFIZx(Rd, Rn, lsb, width) UBFMx(Rd, Rn, ((-(lsb))%64)&63, width-1) -// Take width first bits from Rn, LSL lsb and create Rd -#define UBFIZw(Rd, Rn, lsb, width) UBFMw(Rd, Rn, ((-(lsb))%32)&31, width-1) -// Take width first bits from Rn, LSL lsb and create Rd -#define UBFIZxw(Rd, Rn, lsb, width) UBFMxw(Rd, Rn, rex.w?(((-(lsb))%64)&63):(((-(lsb))%32)&31), width-1) - -// SBFM -#define SBFM_gen(sf, N, immr, imms, Rn, Rd) ((sf)<<31 | 0b00<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) -#define SBFMx(Rd, Rn, immr, imms) EMIT(SBFM_gen(1, 1, immr, imms, Rn, Rd)) -#define SBFMw(Rd, Rn, immr, imms) EMIT(SBFM_gen(0, 0, immr, imms, Rn, Rd)) -#define SBFMxw(Rd, Rn, immr, imms) EMIT(SBFM_gen(rex.w, rex.w, immr, imms, Rn, Rd)) -#define SBFXx(Rd, Rn, lsb, width) SBFMx(Rd, Rn, lsb, lsb+width-1) -#define SBFXw(Rd, Rn, lsb, width) SBFMw(Rd, Rn, lsb, lsb+width-1) -#define SBFXxw(Rd, Rn, lsb, width) SBFMxw(Rd, Rn, lsb, lsb+width-1) -#define SXTBx(Rd, Rn) SBFMx(Rd, Rn, 0, 7) -#define SXTBw(Rd, Rn) SBFMw(Rd, Rn, 0, 7) -#define SXTHx(Rd, Rn) SBFMx(Rd, Rn, 0, 15) -#define SXTHw(Rd, Rn) SBFMw(Rd, Rn, 0, 15) -#define SXTHxw(Rd, Rn) SBFMxw(Rd, Rn, 0, 15) -#define SXTWx(Rd, Rn) SBFMx(Rd, Rn, 0, 31) -#define ASRx(Rd, Rn, shift) SBFMx(Rd, Rn, shift, 63) -#define ASRw(Rd, Rn, shift) SBFMw(Rd, Rn, shift, 31) -#define ASRxw(Rd, Rn, shift) SBFMxw(Rd, Rn, shift, rex.w?63:31) -#define SBFIZx(Rd, Rn, lsb, width) SFBFMx(Rd, Rn, ((-(lsb))%64), (width)-1) -#define SBFIZw(Rd, Rn, lsb, width) SFBFMw(Rd, Rn, ((-(lsb))%32), (width)-1) -#define SBFIZxw(Rd, Rn, lsb, width) SFBFMxw(Rd, Rn, ((-(lsb))%(rex.w?64:32)), (width)-1) - -// EXTR -#define EXTR_gen(sf, N, Rm, imms, Rn, Rd) ((sf)<<31 | 0b00<<29 | 0b100111<<23 | (N)<<22 | (Rm)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) -#define EXTRx(Rd, Rn, Rm, lsb) EMIT(EXTR_gen(1, 1, Rm, lsb, Rn, Rd)) -#define EXTRw(Rd, Rn, Rm, lsb) EMIT(EXTR_gen(0, 0, Rm, lsb, Rn, Rd)) -#define EXTRxw(Rd, Rn, Rm, lsb) EMIT(EXTR_gen(rex.w, rex.w, Rm, lsb, Rn, Rd)) -#define RORx(Rd, Rn, lsb) EMIT(EXTR_gen(1, 1, Rn, lsb, Rn, Rd)) -#define RORw(Rd, Rn, lsb) EMIT(EXTR_gen(0, 0, Rn, lsb, Rn, Rd)) -#define RORxw(Rd, Rn, lsb) EMIT(EXTR_gen(rex.w, rex.w, Rn, lsb, Rn, Rd)) - -// RORV -#define RORV_gen(sf, Rm, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | 0b11<<10 | (Rn)<<5 | (Rd)) -#define RORx_REG(Rd, Rn, Rm) EMIT(RORV_gen(1, Rm, Rn, Rd)) -#define RORw_REG(Rd, Rn, Rm) EMIT(RORV_gen(0, Rm, Rn, Rd)) -#define RORxw_REG(Rd, Rn, Rm) EMIT(RORV_gen(rex.w, Rm, Rn, Rd)) - - -// LSRV / LSLV -#define LS_V_gen(sf, Rm, op2, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | (op2)<<10 | (Rn)<<5 | (Rd)) -#define LSRx_REG(Rd, Rn, Rm) EMIT(LS_V_gen(1, Rm, 0b01, Rn, Rd)) -#define LSRw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(0, Rm, 0b01, Rn, Rd)) -#define LSRxw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(rex.w, Rm, 0b01, Rn, Rd)) - -#define LSLx_REG(Rd, Rn, Rm) EMIT(LS_V_gen(1, Rm, 0b00, Rn, Rd)) -#define LSLw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(0, Rm, 0b00, Rn, Rd)) -#define LSLxw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(rex.w, Rm, 0b00, Rn, Rd)) - -// UMULL / SMULL -#define MADDL_gen(U, Rm, o0, Ra, Rn, Rd) (1<<31 | 0b11011<<24 | (U)<<23 | 0b01<<21 | (Rm)<<16 | (o0)<<15 | (Ra)<<10 | (Rn)<<5 | (Rd)) -#define UMADDL(Xd, Wn, Wm, Xa) EMIT(MADDL_gen(1, Wm, 0, Xa, Wn, Xd)) -#define UMULL(Xd, Wn, Wm) UMADDL(Xd, Wn, Wm, xZR) -#define SMADDL(Xd, Wn, Wm, Xa) EMIT(MADDL_gen(0, Wm, 0, Xa, Wn, Xd)) -#define SMULL(Xd, Wn, Wm) SMADDL(Xd, Wn, Wm, xZR) - -#define MULH_gen(U, Rm, Rn, Rd) (1<<31 | 0b11011<<24 | (U)<<23 | 0b10<<21 | (Rm)<<16 | 0b11111<<10 | (Rn)<<5 | (Rd)) -#define UMULH(Xd, Xn, Xm) EMIT(MULH_gen(1, Xm, Xn, Xd)) -#define SMULH(Xd, Xn, Xm) EMIT(MULH_gen(0, Xm, Xn, Xd)) - -#define MADD_gen(sf, Rm, o0, Ra, Rn, Rd) ((sf)<<31 | 0b11011<<24 | (Rm)<<16 | (o0)<<15 | (Ra)<<10 | (Rn)<<5 | (Rd)) -#define MADDx(Rd, Rn, Rm, Ra) EMIT(MADD_gen(1, Rm, 0, Ra, Rn, Rd)) -#define MADDw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(0, Rm, 0, Ra, Rn, Rd)) -#define MADDxw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(rex.w, Rm, 0, Ra, Rn, Rd)) -#define MULx(Rd, Rn, Rm) MADDx(Rd, Rn, Rm, xZR) -#define MULw(Rd, Rn, Rm) MADDw(Rd, Rn, Rm, xZR) -#define MULxw(Rd, Rn, Rm) MADDxw(Rd, Rn, Rm, xZR) -#define MSUBx(Rd, Rn, Rm, Ra) EMIT(MADD_gen(1, Rm, 1, Ra, Rn, Rd)) -#define MSUBw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(0, Rm, 1, Ra, Rn, Rd)) -#define MSUBxw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(rex.w, Rm, 1, Ra, Rn, Rd)) -#define MNEGx(Rd, Rn, Rm) EMIT(MADD_gen(1, Rm, 1, xZR, Rn, Rd)) -#define MNEGw(Rd, Rn, Rm) EMIT(MADD_gen(0, Rm, 1, xZR, Rn, Rd)) -#define MNEGxw(Rd, Rn, Rm) EMIT(MADD_gen(rex.w, Rm, 1, xZR, Rn, Rd)) - - -// DIV -#define DIV_gen(sf, Rm, o1, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b00001<<11 | (o1)<<10 | (Rn)<<5 | (Rd)) -#define UDIVw(Wd, Wn, Wm) EMIT(DIV_gen(0, Wm, 0, Wn, Wd)) -#define UDIVx(Xd, Xn, Xm) EMIT(DIV_gen(1, Xm, 0, Xn, Xd)) -#define SDIVw(Wd, Wn, Wm) EMIT(DIV_gen(0, Wm, 1, Wn, Wd)) -#define SDIVx(Xd, Xn, Xm) EMIT(DIV_gen(1, Xm, 1, Xn, Xd)) - -// CLZ -#define CL_gen(sf, op, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010110<<21 | 0b00010<<11 | (op)<<10 | (Rn)<<5 | (Rd)) -#define CLZx(Rd, Rn) EMIT(CL_gen(1, 0, Rn, Rd)) -#define CLZw(Rd, Rn) EMIT(CL_gen(0, 0, Rn, Rd)) -#define CLZxw(Rd, Rn) EMIT(CL_gen(rex.w, 0, Rn, Rd)) -#define CLSx(Rd, Rn) EMIT(CL_gen(1, 1, Rn, Rd)) -#define CLSw(Rd, Rn) EMIT(CL_gen(0, 1, Rn, Rd)) -#define CLSxw(Rd, Rn) EMIT(CL_gen(rex.w, 1, Rn, Rd)) - -// RBIT -#define RBIT_gen(sf, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010110<<21 | (Rn)<<5 | (Rd)) -#define RBITx(Rd, Rn) EMIT(RBIT_gen(1, Rn, Rd)) -#define RBITw(Rd, Rn) EMIT(RBIT_gen(0, Rn, Rd)) -#define RBITxw(Rd, Rn) EMIT(RBIT_gen(rex.w, Rn, Rd)) - -// REV -#define REV_gen(sf, opc, Rn, Rd) ((sf)<<31 | 1<<30 | 0b11010110<<21 | (opc)<<10 | (Rn)<<5 | (Rd)) -#define REV64x(Rd, Rn) EMIT(REV_gen(1, 0b11, Rn, Rd)) -#define REV32w(Rd, Rn) EMIT(REV_gen(0, 0b10, Rn, Rd)) -#define REVxw(Rd, Rn) EMIT(REV_gen(rex.w, 0b10|rex.w, Rn, Rd)) -#define REV16w(Rd, Rn) EMIT(REV_gen(0, 0b01, Rn, Rd)) -#define REV16x(Rd, Rn) EMIT(REV_gen(1, 0b01, Rn, Rd)) - -// MRS -#define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt) (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt)) -// mrs x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 -// MRS : from System register -#define MRS_nzvc(Rt) EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt)) -// MSR : to System register -#define MSR_nzvc(Rt) EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt)) -// mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=0 -#define MRS_fpcr(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt)) -#define MSR_fpcr(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt)) -// mrs x0, fpsr : 1101010100 1 1 1 011 0100 0100 001 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=1 -#define MRS_fpsr(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 1, Rt)) -#define MSR_fpsr(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 1, Rt)) -// NEON Saturation Bit -#define FPSR_QC 27 -// NEON Input Denormal Cumulative -#define FPSR_IDC 7 -// NEON IneXact Cumulative -#define FPSR_IXC 4 -// NEON Underflow Cumulative -#define FPSR_UFC 3 -// NEON Overflow Cumulative -#define FPSR_OFC 2 -// NEON Divide by 0 Cumulative -#define FPSR_DZC 1 -// NEON Invalid Operation Cumulative -#define FPSR_IOC 0 - -// FCSEL -#define FCSEL_scalar(type, Rm, cond, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | (cond)<<12 | 0b11<<10 | (Rn)<<5 | (Rd)) -#define FCSELS(Sd, Sn, Sm, cond) EMIT(FCSEL_scalar(0b00, Sm, cond, Sn, Sd)) -#define FCSELD(Dd, Dn, Dm, cond) EMIT(FCSEL_scalar(0b01, Dm, cond, Dn, Dd)) - -// VLDR -#define VMEM_gen(size, opc, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | 0b01<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) -// imm14 must be 3-aligned -#define VLDR32_U12(Dt, Rn, imm14) EMIT(VMEM_gen(0b10, 0b01, ((uint32_t)((imm14)>>2))&0xfff, Rn, Dt)) -// imm15 must be 3-aligned -#define VLDR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b01, ((uint32_t)((imm15)>>3))&0xfff, Rn, Dt)) -// imm16 must be 4-aligned -#define VLDR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b00, 0b11, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt)) -// (imm14) must be 3-aligned -#define VSTR32_U12(Dt, Rn, imm14) EMIT(VMEM_gen(0b10, 0b00, ((uint32_t)(imm14>>2))&0xfff, Rn, Dt)) -// (imm15) must be 3-aligned -#define VSTR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b00, ((uint32_t)(imm15>>3))&0xfff, Rn, Dt)) -// imm16 must be 4-aligned -#define VSTR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b00, 0b10, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt)) -// (imm14) must be 1-aligned -#define VSTR16_U12(Ht, Rn, imm14) EMIT(VMEM_gen(0b01, 0b00, ((uint32_t)(imm14>>1))&0xfff, Rn, Ht)) - -#define VMEMUR_vector(size, opc, imm9, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (Rn)<<5 | (Rt)) -// signed offset, no alignement! -#define VLDR8_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b01, (imm9)&0b111111111, Rn, Vt)) -#define VLDR16_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b01, 0b01, (imm9)&0b111111111, Rn, Vt)) -#define VLDR32_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b10, 0b01, (imm9)&0b111111111, Rn, Vt)) -#define VLDR64_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b11, 0b01, (imm9)&0b111111111, Rn, Vt)) -#define VLDR128_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b11, (imm9)&0b111111111, Rn, Vt)) -// signed offset, no alignement! -#define VSTR8_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b00, (imm9)&0b111111111, Rn, Vt)) -#define VSTR16_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b01, 0b00, (imm9)&0b111111111, Rn, Vt)) -#define VSTR32_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b10, 0b00, (imm9)&0b111111111, Rn, Vt)) -#define VSTR64_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b11, 0b00, (imm9)&0b111111111, Rn, Vt)) -#define VSTR128_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b10, (imm9)&0b111111111, Rn, Vt)) - -#define VMEMW_gen(size, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) -#define VLDR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define VLDR64_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define VLDR128_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b11, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define VLDR128_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b11, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define VSTR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define VSTR64_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define VSTR128_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b10, (imm9)&0x1ff, 0b01, Rn, Rt)) -#define VSTR128_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b10, (imm9)&0x1ff, 0b11, Rn, Rt)) - -#define VMEM_REG_gen(size, opc, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | 0b10<<10 | (Rn)<<5 | (Rt)) - -#define VLDR32_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 0, Rn, Dt)) -#define VLDR32_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 1, Rn, Dt)) -#define VLDR64_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 0, Rn, Dt)) -#define VLDR64_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 1, Rn, Dt)) -#define VLDR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 0, Rn, Dt)) -#define VLDR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 1, Rn, Dt)) - -#define VSTR32_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 0, Rn, Dt)) -#define VSTR32_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 1, Rn, Dt)) -#define VSTR64_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 0, Rn, Dt)) -#define VSTR64_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 1, Rn, Dt)) -#define VSTR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt)) -#define VSTR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt)) - -#define VLDR_PC_gen(opc, imm19, Rt) ((opc)<<30 | 0b011<<27 | 1<<26 | (imm19)<<5 | (Rt)) -#define VLDR32_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b00, ((imm19)>>2)&0x7FFFF, Vt)) -#define VLDR64_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b01, ((imm19)>>2)&0x7FFFF, Vt)) -#define VLDR128_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b10, ((imm19)>>2)&0x7FFFF, Vt)) - - -#define LD1R_gen(Q, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | 0b110<<13 | (size)<<10 | (Rn)<<5 | (Rt)) -#define VLDQ1R_8(Vt, Rn) EMIT(LD1R_gen(1, 0b00, Rn, Vt)) -#define VLDQ1R_16(Vt, Rn) EMIT(LD1R_gen(1, 0b01, Rn, Vt)) -#define VLDQ1R_32(Vt, Rn) EMIT(LD1R_gen(1, 0b10, Rn, Vt)) -#define VLDQ1R_64(Vt, Rn) EMIT(LD1R_gen(1, 0b11, Rn, Vt)) -#define VLD1R_8(Vt, Rn) EMIT(LD1R_gen(0, 0b00, Rn, Vt)) -#define VLD1R_16(Vt, Rn) EMIT(LD1R_gen(0, 0b01, Rn, Vt)) -#define VLD1R_32(Vt, Rn) EMIT(LD1R_gen(0, 0b10, Rn, Vt)) - -#define LD1_single(Q, opcode, S, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | (opcode)<<13 | (S)<<12 | (size)<<10 | (Rn)<<5 | (Rt)) -#define VLD1_8(Vt, index, Rn) EMIT(LD1_single(((index)>>3)&1, 0b000, ((index)>>2)&1, (index)&3, Rn, Vt)) -#define VLD1_16(Vt, index, Rn) EMIT(LD1_single(((index)>>2)&1, 0b010, ((index)>>1)&1, ((index)&1)<<1, Rn, Vt)) -#define VLD1_32(Vt, index, Rn) EMIT(LD1_single(((index)>>1)&1, 0b100, ((index))&1, 0b00, Rn, Vt)) -#define VLD1_64(Vt, index, Rn) EMIT(LD1_single(((index))&1, 0b100, 0, 0b01, Rn, Vt)) - -#define ST1_single(Q, opcode, S, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 0<<22 | 0<<21 | (opcode)<<13 | (S)<<12 | (size)<<10 | (Rn)<<5 | (Rt)) -#define VST1_8(Vt, index, Rn) EMIT(ST1_single(((index)>>3)&1, 0b000, ((index)>>2)&1, (index)&3, Rn, Vt)) -#define VST1_16(Vt, index, Rn) EMIT(ST1_single(((index)>>2)&1, 0b010, ((index)>>1)&1, ((index)&1)<<1, Rn, Vt)) -#define VST1_32(Vt, index, Rn) EMIT(ST1_single(((index)>>1)&1, 0b100, ((index))&1, 0b00, Rn, Vt)) -#define VST1_64(Vt, index, Rn) EMIT(ST1_single(((index))&1, 0b100, 0, 0b01, Rn, Vt)) - -// LOGIC -#define VLOGIC_gen(Q, opc2, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (opc2)<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VEORQ(Vd, Vn, Vm) EMIT(VLOGIC_gen(1, 0b00, Vm, Vn, Vd)) -#define VEOR(Vd, Vn, Vm) EMIT(VLOGIC_gen(0, 0b00, Vm, Vn, Vd)) - -#define VLOGIC_immediate(Q, op, abc, cmade, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd)) -//#define V - -#define SHL_vector(Q, immh, immb, Rn, Rd) ((Q)<<30 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VSHLQ_8(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0001, (shift)&7, Vn, Vd)) -#define VSHLQ_16(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0010 | (((shift)>>3)&1), (shift)&7, Vn, Vd)) -#define VSHLQ_32(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) -#define VSHLQ_64(Vd, Vn, shift) EMIT(SHL_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) -#define VSHL_8(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0001, (shift)&7, Vn, Vd)) -#define VSHL_16(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0010 | (((shift)>>3)&1), (shift)&7, Vn, Vd)) -#define VSHL_32(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) - -#define SHL_scalar(U, size, Rm, R, S, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b010<<13 | (R)<<12 | (S)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define SSHL_R_64(Vd, Vn, Vm) EMIT(SHL_scalar(0, 0b11, Vm, 0, 0, Vn, Vd)) -#define USHL_R_64(Vd, Vn, Vm) EMIT(SHL_scalar(1, 0b11, Vm, 0, 0, Vn, Vd)) - -#define SHL_scalar_imm(U, immh, immb, Rn, Rd) (0b01<<30 | 0b111110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define SHL_64(Vd, Vn, shift) EMIT(SHL_scalar_imm(0, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) - -#define SHL_vector_vector(Q, U, size, Rm, R, S, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b010<<13 | (R)<<12 | (S)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define SSHL_8(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 0, 0b00, Vm, 0, 0, Vn, Vd)) -#define SSHL_16(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 0, 0b01, Vm, 0, 0, Vn, Vd)) -#define SSHL_32(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 0, 0b10, Vm, 0, 0, Vn, Vd)) -#define SSHLQ_8(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 0, 0b00, Vm, 0, 0, Vn, Vd)) -#define SSHLQ_16(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 0, 0b01, Vm, 0, 0, Vn, Vd)) -#define SSHLQ_32(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 0, 0b10, Vm, 0, 0, Vn, Vd)) -#define SSHLQ_64(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 0, 0b11, Vm, 0, 0, Vn, Vd)) -#define USHL_8(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 1, 0b00, Vm, 0, 0, Vn, Vd)) -#define USHL_16(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 1, 0b01, Vm, 0, 0, Vn, Vd)) -#define USHL_32(Vd, Vn, Vm) EMIT(SHL_vector_vector(0, 1, 0b10, Vm, 0, 0, Vn, Vd)) -#define USHLQ_8(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 1, 0b00, Vm, 0, 0, Vn, Vd)) -#define USHLQ_16(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 1, 0b01, Vm, 0, 0, Vn, Vd)) -#define USHLQ_32(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 1, 0b10, Vm, 0, 0, Vn, Vd)) -#define USHLQ_64(Vd, Vn, Vm) EMIT(SHL_vector_vector(1, 1, 0b11, Vm, 0, 0, Vn, Vd)) - -#define SHR_vector(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b00000<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VSHRQ_8(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0001, (8-(shift))&7, Vn, Vd)) -#define VSHRQ_16(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) -#define VSHRQ_32(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) -#define VSHRQ_64(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, Vn, Vd)) -#define VSHR_8(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0001, (8-(shift))&7, Vn, Vd)) -#define VSHR_16(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) -#define VSHR_32(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) -#define VSSHRQ_8(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0001, (8-(shift))&7, Vn, Vd)) -#define VSSHRQ_16(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) -#define VSSHRQ_32(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) -#define VSSHRQ_64(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, Vn, Vd)) -#define VSSHR_8(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0001, (8-(shift))&7, Vn, Vd)) -#define VSSHR_16(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) -#define VSSHR_32(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) - -#define SHR_scalar_imm(U, immh, immb, o1, o0, Rn, Rd) (0b01<<30 | (U)<<29 | 0b111110<<23 | (immh)<<19 | (immb)<<16 | (o1)<<13 | (o0)<<12 | 1<<10 | (Rn)<<5 | (Rd)) -#define SSHR_64(Vd, Vn, shift) EMIT(SHR_scalar_imm(0, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, 0, 0, Vn, Vd)) -#define USHR_64(Vd, Vn, shift) EMIT(SHR_scalar_imm(1, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, 0, 0, Vn, Vd)) - -#define EXT_vector(Q, Rm, imm4, Rn, Rd) ((Q)<<30 | 0b101110<<24 | (Rm)<<16 | (imm4)<<11 | (Rn)<<5 | (Rd)) -#define VEXT_8(Rd, Rn, Rm, index) EMIT(EXT_vector(0, Rm, index, Rn, Rd)) -#define VEXTQ_8(Rd, Rn, Rm, index) EMIT(EXT_vector(1, Rm, index, Rn, Rd)) - -// Shift Left and Insert (not touching lower part of dest) -#define SLI_vector(Q, immh, immb, Rn, Rd) ((Q)<<30 | 1<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<1 | 1<<10 | (Rn)<<5 | (Rd)) -#define VSLIQ_8(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0001, (shift)&7, Vn, Vd)) -#define VSLIQ_16(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSLIQ_32(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) -#define VSLIQ_64(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) -#define VSLI_8(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0001, (shift)&7, Vn, Vd)) -#define VSLI_16(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSLI_32(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) - -// Shift Right and Insert (not touching higher part of dest) -#define SRI_vector(Q, immh, immb, Rn, Rd) ((Q)<<30 | 1<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01000<<1 | 1<<10 | (Rn)<<5 | (Rd)) -#define VSRIQ_8(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0001, (shift)&7, Vn, Vd)) -#define VSRIQ_16(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSRIQ_32(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) -#define VSRIQ_64(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) -#define VSRI_8(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0001, (shift)&7, Vn, Vd)) -#define VSRI_16(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSRI_32(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) - -// Integer MATH -#define ADDSUB_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10000<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VADDQ_8(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 0, 0b00, Vm, Vn, Vd)) -#define VADDQ_16(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 0, 0b01, Vm, Vn, Vd)) -#define VADDQ_32(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 0, 0b10, Vm, Vn, Vd)) -#define VADDQ_64(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 0, 0b11, Vm, Vn, Vd)) -#define VADD_8(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 0, 0b00, Vm, Vn, Vd)) -#define VADD_16(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 0, 0b01, Vm, Vn, Vd)) -#define VADD_32(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 0, 0b10, Vm, Vn, Vd)) -#define VSUBQ_8(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 1, 0b00, Vm, Vn, Vd)) -#define VSUBQ_16(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 1, 0b01, Vm, Vn, Vd)) -#define VSUBQ_32(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 1, 0b10, Vm, Vn, Vd)) -#define VSUBQ_64(Vd, Vn, Vm) EMIT(ADDSUB_vector(1, 1, 0b11, Vm, Vn, Vd)) -#define VSUB_8(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 1, 0b00, Vm, Vn, Vd)) -#define VSUB_16(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 1, 0b01, Vm, Vn, Vd)) -#define VSUB_32(Vd, Vn, Vm) EMIT(ADDSUB_vector(0, 1, 0b10, Vm, Vn, Vd)) - -#define NEGABS_vector(Q, U, size, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b01011<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define NEG_8(Vd, Vn) EMIT(NEGABS_vector(0, 1, 0b00, Vn, Vd)) -#define NEG_16(Vd, Vn) EMIT(NEGABS_vector(0, 1, 0b01, Vn, Vd)) -#define NEG_32(Vd, Vn) EMIT(NEGABS_vector(0, 1, 0b10, Vn, Vd)) -#define NEGQ_8(Vd, Vn) EMIT(NEGABS_vector(1, 1, 0b00, Vn, Vd)) -#define NEGQ_16(Vd, Vn) EMIT(NEGABS_vector(1, 1, 0b01, Vn, Vd)) -#define NEGQ_32(Vd, Vn) EMIT(NEGABS_vector(1, 1, 0b10, Vn, Vd)) -#define NEGQ_64(Vd, Vn) EMIT(NEGABS_vector(1, 1, 0b11, Vn, Vd)) -#define ABS_8(Vd, Vn) EMIT(NEGABS_vector(0, 0, 0b00, Vn, Vd)) -#define ABS_16(Vd, Vn) EMIT(NEGABS_vector(0, 0, 0b01, Vn, Vd)) -#define ABS_32(Vd, Vn) EMIT(NEGABS_vector(0, 0, 0b10, Vn, Vd)) -#define ABSQ_8(Vd, Vn) EMIT(NEGABS_vector(1, 0, 0b00, Vn, Vd)) -#define ABSQ_16(Vd, Vn) EMIT(NEGABS_vector(1, 0, 0b01, Vn, Vd)) -#define ABSQ_32(Vd, Vn) EMIT(NEGABS_vector(1, 0, 0b10, Vn, Vd)) -#define ABSQ_64(Vd, Vn) EMIT(NEGABS_vector(1, 0, 0b11, Vn, Vd)) - -#define NEGABS_vector_scalar(U, size, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (size)<<22 | 0b10000<<17 | 0b01011<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define NEG_64(Vd, Vn) EMIT(NEGABS_vector_scalar(1, 0b11, Vn, Vd)) -#define ABS_64(Vd, Vn) EMIT(NEGABS_vector_scalar(0, 0b11, Vn, Vd)) - -// FMOV -#define FMOV_general(sf, type, mode, opcode, Rn, Rd) ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (mode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd)) -// 32-bit to single-precision -#define FMOVSw(Sd, Wn) EMIT(FMOV_general(0, 0b00, 0b00, 0b111, Wn, Sd)) -// Single-precision to 32-bit -#define FMOVwS(Wd, Sn) EMIT(FMOV_general(0, 0b00, 0b00, 0b110, Sn, Wd)) -// 64-bit to double-precision -#define FMOVDx(Dd, Xn) EMIT(FMOV_general(1, 0b01, 0b00, 0b111, Xn, Dd)) -// 64-bit to top half of 128-bit -#define FMOVD1x(Vd, Xn) EMIT(FMOV_general(1, 0b10, 0b01, 0b111, Xn, Vd)) -// Double-precision to 64-bit -#define FMOVxD(Xd, Dn) EMIT(FMOV_general(1, 0b01, 0b00, 0b110, Dn, Xd)) -// Top half of 128-bit to 64-bit -#define FMOVxD1(Xd, Vn) EMIT(FMOV_general(1, 0b10, 0b01, ob110, Vn, Xd)) - -#define FMOV_register(type, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b10000<<10 | (Rn)<<5 | (Rd)) -#define FMOVS(Sd, Sn) EMIT(FMOV_register(0b00, Sn, Sd)) -#define FMOVD(Dd, Dn) EMIT(FMOV_register(0b01, Dn, Dd)) - -#define FMOV_vector_imm(Q, op, abc, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | 0b1111<<12 | 1<<10 | (defgh)<<5 | (Rd)) -#define VFMOVS_8(Vd, u8) EMIT(FMOV_vector_imm(0, 0, ((u8)>>5)&0b111, (u8)&0b11111, Vd)) -#define VFMOVSQ_8(Vd, u8) EMIT(FMOV_vector_imm(1, 0, ((u8)>>5)&0b111, (u8)&0b11111, Vd)) -#define VFMOVDQ_8(Vd, u8) EMIT(FMOV_vector_imm(1, 1, ((u8)>>5)&0b111, (u8)&0b11111, Vd)) - -#define FMOV_scalar_imm(type, imm8, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (imm8)<<13 | 0b100<<10 | (Rd)) -#define FMOVS_8(Sd, u8) EMIT(FMOV_scalar_imm(0b00, u8, Sd)) -#define FMOVD_8(Dd, u8) EMIT(FMOV_scalar_imm(0b01, u8, Dd)) - -// VMOV -#define VMOV_element(imm5, imm4, Rn, Rd) (1<<30 | 1<<29 | 0b01110000<<21 | (imm5)<<16 | (imm4)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VMOVeB(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<1) | 1, (i2), Vn, Vd)) -#define VMOVeH(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<2) | 2, (i2)<<1, Vn, Vd)) -#define VMOVeS(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<3) | 4, (i2)<<2, Vn, Vd)) -#define VMOVeD(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<4) | 8, (i2)<<3, Vn, Vd)) - -#define VMOV_from(imm5, Rn, Rd) (1<<30 | 0<<29 | 0b01110000<<21 | (imm5)<<16 | 0b0011<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VMOVQBfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<1) | 1, Wn, Vd)) -#define VMOVQHfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<2) | 2, Wn, Vd)) -#define VMOVQSfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<3) | 4, Wn, Vd)) -#define VMOVQDfrom(Vd, index, Xn) EMIT(VMOV_from(((index)<<4) | 8, Xn, Vd)) - -#define UMOV_gen(Q, imm5, Rn, Rd) ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 0b01<<13 | 1<<12 | 1<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VMOVQDto(Xd, Vn, index) EMIT(UMOV_gen(1, ((index)<<4) | 8, Vn, Xd)) -#define VMOVBto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<1) | 1, Vn, Wd)) -#define VMOVHto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<2) | 2, Vn, Wd)) -#define VMOVSto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<3) | 4, Vn, Wd)) - -#define MVN_vector(Q, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | 0b10000<<17 | 0b00101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define VMVNQ(Rd, Rn) EMIT(MVN_vector(1, Rn, Rd)) - -// VORR -#define ORR_vector(Q, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | 0b10<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VORRQ(Vd, Vn, Vm) EMIT(ORR_vector(1, Vm, Vn, Vd)) -#define VORR(Dd, Dn, Dm) EMIT(ORR_vector(0, Dm, Dn, Dd)) -#define VMOVQ(Vd, Vn) EMIT(ORR_vector(1, Vn, Vn, Vd)) -#define VMOV(Dd, Dn) EMIT(ORR_vector(0, Dn, Dn, Dd)) - -// VAND -#define AND_vector(Q, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | 0b00<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VANDQ(Vd, Vn, Vm) EMIT(AND_vector(1, Vm, Vn, Vd)) -#define VAND(Dd, Dn, Dm) EMIT(AND_vector(0, Dm, Dn, Dd)) - -// VBIC -#define BIC_vector(Q, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | 0b01<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VBICQ(Vd, Vn, Vm) EMIT(BIC_vector(1, Vm, Vn, Vd)) -#define VBIC(Dd, Dn, Dm) EMIT(BIC_vector(0, Dm, Dn, Dd)) - -// VORN -#define ORN_vector(Q, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | 0b11<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VORNQ(Vd, Vn, Vm) EMIT(ORN_vector(1, Vm, Vn, Vd)) -#define VORN(Dd, Dn, Dm) EMIT(ORN_vector(0, Dm, Dn, Dd)) - -// ADD / SUB -#define FADDSUB_vector(Q, U, op, sz, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (op)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11010<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VFADDQS(Vd, Vn, Vm) EMIT(FADDSUB_vector(1, 0, 0, 0, Vm, Vn, Vd)) -#define VFADDQD(Vd, Vn, Vm) EMIT(FADDSUB_vector(1, 0, 0, 1, Vm, Vn, Vd)) -#define VFADDS(Dd, Dn, Dm) EMIT(FADDSUB_vector(0, 0, 0, 0, Dm, Dn, Dd)) - -#define VFSUBQS(Vd, Vn, Vm) EMIT(FADDSUB_vector(1, 0, 1, 0, Vm, Vn, Vd)) -#define VFSUBQD(Vd, Vn, Vm) EMIT(FADDSUB_vector(1, 0, 1, 1, Vm, Vn, Vd)) -#define VFSUBS(Dd, Dn, Dm) EMIT(FADDSUB_vector(0, 0, 1, 0, Dm, Dn, Dd)) - -#define FADDSUB_scalar(type, Rm, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b001<<13 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define FADDS(Sd, Sn, Sm) EMIT(FADDSUB_scalar(0b00, Sm, 0, Sn, Sd)) -#define FADDD(Dd, Dn, Dm) EMIT(FADDSUB_scalar(0b01, Dm, 0, Dn, Dd)) - -#define FSUBS(Sd, Sn, Sm) EMIT(FADDSUB_scalar(0b00, Sm, 1, Sn, Sd)) -#define FSUBD(Dd, Dn, Dm) EMIT(FADDSUB_scalar(0b01, Dm, 1, Dn, Dd)) - -// ADD Pair -#define ADDP_vector(Q, size, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10111<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VADDPQ_8(Vd, Vn, Vm) EMIT(ADDP_vector(1, 0b00, Vm, Vn, Vd)) -#define VADDPQ_16(Vd, Vn, Vm) EMIT(ADDP_vector(1, 0b01, Vm, Vn, Vd)) -#define VADDPQ_32(Vd, Vn, Vm) EMIT(ADDP_vector(1, 0b10, Vm, Vn, Vd)) -#define VADDPQ_64(Vd, Vn, Vm) EMIT(ADDP_vector(1, 0b11, Vm, Vn, Vd)) -#define VADDP_8(Vd, Vn, Vm) EMIT(ADDP_vector(0, 0b00, Vm, Vn, Vd)) -#define VADDP_16(Vd, Vn, Vm) EMIT(ADDP_vector(0, 0b01, Vm, Vn, Vd)) -#define VADDP_32(Vd, Vn, Vm) EMIT(ADDP_vector(0, 0b10, Vm, Vn, Vd)) - -#define FADDP_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11010<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VFADDPQS(Vd, Vn, Vm) EMIT(FADDP_vector(1, 0, Vm, Vn, Vd)) -#define VFADDPQD(Vd, Vn, Vm) EMIT(FADDP_vector(1, 1, Vm, Vn, Vd)) - -// NEG / ABS -#define FNEGABS_scalar(type, opc, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (opc)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) -#define FNEGS(Sd, Sn) EMIT(FNEGABS_scalar(0b00, 0b10, Sn, Sd)) -#define FNEGD(Dd, Dn) EMIT(FNEGABS_scalar(0b01, 0b10, Dn, Dd)) - -#define FABSS(Sd, Sn) EMIT(FNEGABS_scalar(0b00, 0b01, Sn, Sd)) -#define FABSD(Dd, Dn) EMIT(FNEGABS_scalar(0b01, 0b01, Dn, Dd)) - - -// MUL -#define FMUL_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11011<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VFMULS(Sd, Sn, Sm) EMIT(FMUL_vector(0, 0, Sm, Sn, Sd)) -#define VFMULQS(Sd, Sn, Sm) EMIT(FMUL_vector(1, 0, Sm, Sn, Sd)) -#define VFMULQD(Sd, Sn, Sm) EMIT(FMUL_vector(1, 1, Sm, Sn, Sd)) - -#define FMUL_scalar(type, Rm, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b10<<10 | (Rn)<<5 | Rd) -#define FMULS(Sd, Sn, Sm) EMIT(FMUL_scalar(0b00, Sm, Sn, Sd)) -#define FMULD(Dd, Dn, Dm) EMIT(FMUL_scalar(0b01, Dm, Dn, Dd)) - -#define FMLA_vector(Q, op, sz, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (op)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11001<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VFMLAS(Sd, Sn, Sm) EMIT(FMLA_vector(0, 0, 0, Sm, Sn, Sd)) -#define VFMLAQS(Sd, Sn, Sm) EMIT(FMLA_vector(1, 0, 0, Sm, Sn, Sd)) -#define CFMLAQD(Dd, Dn, Dm) EMIT(FMLA_vector(1, 0, 1, Dm, Dn, Dd)) - -// DIV -#define FDIV_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11111<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VFDIVS(Sd, Sn, Sm) EMIT(FDIV_vector(0, 0, Sm, Sn, Sd)) -#define VFDIVQS(Sd, Sn, Sm) EMIT(FDIV_vector(1, 0, Sm, Sn, Sd)) -#define VFDIVQD(Sd, Sn, Sm) EMIT(FDIV_vector(1, 1, Sm, Sn, Sd)) - -#define FDIV_scalar(type, Rm, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b0001<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define FDIVS(Sd, Sn, Sm) EMIT(FDIV_scalar(0b00, Sm, Sn, Sd)) -#define FDIVD(Dd, Dn, Dm) EMIT(FDIV_scalar(0b01, Dm, Dn, Dd)) - -#define FRECPE_vector(Q, sz, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | 1<<23 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define VFRECPES(Vd, Vn) EMIT(FRECPE_vector(0, 0, Vn, Vd)) -#define VFRECPEQS(Vd, Vn) EMIT(FRECPE_vector(1, 0, Vn, Vd)) -#define VFRECPEQD(Vd, Vn) EMIT(FRECPE_vector(1, 0, Vn, Vd)) - -#define FRECPS_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | 0<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11111<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VFRECPSS(Vd, Vn, Vm) EMIT(FRECPS_vector(0, 0, Vm, Vn, Vd)) -#define VFRECPSQS(Vd, Vn, Vm) EMIT(FRECPS_vector(1, 0, Vm, Vn, Vd)) -#define VFRECPSQD(Vd, Vn, Vm) EMIT(FRECPS_vector(1, 0, Vm, Vn, Vd)) - -// SQRT -#define FSQRT_vector(Q, sz, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | 1<<23 | (sz)<<22 | 0b10000<<17 | 0b11111<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define VFSQRTS(Sd, Sn) EMIT(FSQRT_vector(0, 0, Sn, Sd)) -#define VFSQRTQS(Sd, Sn) EMIT(FSQRT_vector(1, 0, Sn, Sd)) -#define VFSQRTQD(Sd, Sn) EMIT(FSQRT_vector(1, 1, Sn, Sd)) - -#define FSQRT_scalar(type, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b11<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) -#define FSQRTS(Sd, Sn) EMIT(FSQRT_scalar(0b00, Sn, Sd)) -#define FSQRTD(Dd, Dn) EMIT(FSQRT_scalar(0b01, Dn, Dd)) - -#define FRSQRTE_vector(Q, sz, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | 1<<23 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define VFRSQRTES(Vd, Vn) EMIT(FRSQRTE_vector(0, 0, Vn, Vd)) -#define VFRSQRTEQS(Vd, Vn) EMIT(FRSQRTE_vector(1, 0, Vn, Vd)) -#define VFRSQRTEQD(Vd, Vn) EMIT(FRSQRTE_vector(1, 0, Vn, Vd)) - -#define FRSQRTS_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | 1<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11111<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VFRSQRTSS(Vd, Vn, Vm) EMIT(FRSQRTS_vector(0, 0, Vm, Vn, Vd)) -#define VFRSQRTSQS(Vd, Vn, Vm) EMIT(FRSQRTS_vector(1, 0, Vm, Vn, Vd)) -#define VFRSQRTSQD(Vd, Vn, Vm) EMIT(FRSQRTS_vector(1, 0, Vm, Vn, Vd)) - -// CMP -#define FCMP_scalar(type, Rn, Rm, opc) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b1000<<10 | (Rn)<<5 | (opc)<<3) -#define FCMPS(Sn, Sm) EMIT(FCMP_scalar(0b00, Sn, Sm, 0b00)) -#define FCMPD(Dn, Dm) EMIT(FCMP_scalar(0b01, Dn, Dm, 0b00)) -#define FCMPS_0(Sn) EMIT(FCMP_scalar(0b00, 0, Sn, 0b01)) -#define FCMPD_0(Dn) EMIT(FCMP_scalar(0b01, 0, Dn, 0b01)) - -// CVT -#define FCVT_scalar(sf, type, rmode, opcode, Rn, Rd) ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd)) -// Floating-point Convert to Signed integer, rounding to nearest with ties to Away -#define FCVTASwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b100, Sn, Wd)) -#define FCVTASxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b100, Sn, Xd)) -#define FCVTASwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b100, Dn, Wd)) -#define FCVTASxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b100, Dn, Xd)) -// Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away -#define FCVTAUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b101, Sn, Wd)) -#define FCVTAUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b101, Sn, Xd)) -#define FCVTAUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b101, Dn, Wd)) -#define FCVTAUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b101, Dn, Xd)) -// Floating-point Convert to Signed integer, rounding toward Minus infinity -#define FCVTMSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b10, 0b100, Sn, Wd)) -#define FCVTMSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b10, 0b100, Sn, Xd)) -#define FCVTMSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b100, Sn, Xd)) -#define FCVTMSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b10, 0b100, Dn, Wd)) -#define FCVTMSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b10, 0b100, Dn, Xd)) -#define FCVTMSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b10, 0b100, Dn, Xd)) -// Floating-point Convert to Unsigned integer, rounding toward Minus infinity -#define FCVTMUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b10, 0b101, Sn, Wd)) -#define FCVTMUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b10, 0b101, Sn, Xd)) -#define FCVTMUxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b101, Sn, Xd)) -#define FCVTMUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b10, 0b101, Dn, Wd)) -#define FCVTMUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b10, 0b101, Dn, Xd)) -#define FCVTMUxwD(Xd, Dn) EMIT(FCVT_scalar(rfex.w, 0b01, 0b10, 0b101, Dn, Xd)) -// Floating-point Convert to Signed integer, rounding to nearest with ties to even -#define FCVTNSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b000, Sn, Wd)) -#define FCVTNSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b000, Sn, Xd)) -#define FCVTNSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b000, Sn, Xd)) -#define FCVTNSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b000, Dn, Wd)) -#define FCVTNSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b000, Dn, Xd)) -#define FCVTNSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b000, Dn, Xd)) -// Floating-point Convert to Unsigned integer, rounding to nearest with ties to even -#define FCVTNUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b001, Sn, Wd)) -#define FCVTNUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b001, Sn, Xd)) -#define FCVTNUxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b001, Sn, Xd)) -#define FCVTNUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b001, Dn, Wd)) -#define FCVTNUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b001, Dn, Xd)) -#define FCVTNUxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b001, Dn, Xd)) -// Floating-point Convert to Signed integer, rounding toward Plus infinity -#define FCVTPSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b01, 0b000, Sn, Wd)) -#define FCVTPSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b01, 0b000, Sn, Xd)) -#define FCVTPSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b01, 0b000, Sn, Xd)) -#define FCVTPSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b01, 0b000, Dn, Wd)) -#define FCVTPSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b01, 0b000, Dn, Xd)) -#define FCVTPSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b01, 0b000, Dn, Xd)) -// Floating-point Convert to Unsigned integer, rounding toward Plus infinity -#define FCVTPUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b01, 0b001, Sn, Wd)) -#define FCVTPUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b01, 0b001, Sn, Xd)) -#define FCVTPUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b01, 0b001, Dn, Wd)) -#define FCVTPUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b01, 0b001, Dn, Xd)) -// Floating-point Convert to Signed integer, rounding toward Zero -#define FCVTZSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b11, 0b000, Sn, Wd)) -#define FCVTZSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b11, 0b000, Sn, Xd)) -#define FCVTZSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b11, 0b000, Sn, Xd)) -#define FCVTZSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b11, 0b000, Dn, Wd)) -#define FCVTZSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b11, 0b000, Dn, Xd)) -#define FCVTZSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b11, 0b000, Dn, Xd)) -// Floating-point Convert to Unsigned integer, rounding toward Zero -#define FCVTZUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b11, 0b001, Sn, Wd)) -#define FCVTZUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b11, 0b001, Sn, Xd)) -#define FCVTZUxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b11, 0b001, Sn, Xd)) -#define FCVTZUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b11, 0b001, Dn, Wd)) -#define FCVTZUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b11, 0b001, Dn, Xd)) -#define FCVTZUxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b11, 0b001, Dn, Xd)) - -#define FCVT_vector_scalar(U, o2, sz, o1, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1110<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -// Floating-point Convert to (Un)signed integer, rounding to nearest with ties to Away -#define VFCVTASs(Vd, Vn) EMIT(FCVT_vector_scalar(0, 0, 0, 0, Vn, Vd)) -#define VFCVTASd(Vd, Vn) EMIT(FCVT_vector_scalar(0, 0, 1, 0, Vn, Vd)) -#define VFCVTAUs(Vd, Vn) EMIT(FCVT_vector_scalar(1, 0, 0, 0, Vn, Vd)) -#define VFCVTAUd(Vd, Vn) EMIT(FCVT_vector_scalar(1, 0, 1, 0, Vn, Vd)) -// Floating-point Convert to (Un)signed integer, rounding toward Minus infinity -#define VFCVTMSs(Vd, Vn) EMIT(FCVT_vector_scalar(0, 0, 0, 1, Vn, Vd)) -#define VFCVTMSd(Vd, Vn) EMIT(FCVT_vector_scalar(0, 0, 1, 1, Vn, Vd)) -#define VFCVTMUs(Vd, Vn) EMIT(FCVT_vector_scalar(1, 0, 0, 1, Vn, Vd)) -#define VFCVTMUd(Vd, Vn) EMIT(FCVT_vector_scalar(1, 0, 1, 1, Vn, Vd)) - -#define FCVT2_vector_scalar(U, o2, sz, o1, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1101<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -// Floating-point Convert to (Un)signed integer, rounding to nearest with ties to even -#define VFCVTNSs(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 0, 0, 0, Vn, Vd)) -#define VFCVTNSd(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 0, 1, 0, Vn, Vd)) -#define VFCVTNUs(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 0, 0, 0, Vn, Vd)) -#define VFCVTNUd(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 0, 1, 0, Vn, Vd)) -// Floating-point Convert to (Un)signed integer, rounding toward Plus infinity -#define VFCVTPSs(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 1, 0, 0, Vn, Vd)) -#define VFCVTPSd(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 1, 1, 0, Vn, Vd)) -#define VFCVTPUs(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 0, 0, Vn, Vd)) -#define VFCVTPUd(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 1, 0, Vn, Vd)) -// Floating-point Convert to (Un)signed integer, rounding toward Zero -#define VFCVTZSs(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 1, 0, 1, Vn, Vd)) -#define VFCVTZSd(Vd, Vn) EMIT(FCVT2_vector_scalar(0, 1, 1, 1, Vn, Vd)) -#define VFCVTZUs(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 0, 1, Vn, Vd)) -#define VFCVTZUd(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 1, 1, Vn, Vd)) - -#define FCVT_vector(Q, U, o2, sz, o1, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1110<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -// Floating-point Convert to (Un)signed integer, rounding to nearest with ties to Away -#define VFCVTASS(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 0, 0, Vn, Vd)) -#define VFCVTASD(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 1, 0, Vn, Vd)) -#define VFCVTASQS(Vd, Vn) EMIT(FCVT_vector(1, 0, 0, 0, 0, Vn, Vd)) -#define VFCVTASQD(Vd, Vn) EMIT(FCVT_vector(1, 0, 0, 1, 0, Vn, Vd)) -#define VFCVTAUS(Vd, Vn) EMIT(FCVT_vector(0, 1, 0, 0, 0, Vn, Vd)) -#define VFCVTAUD(Vd, Vn) EMIT(FCVT_vector(0, 1, 0, 1, 0, Vn, Vd)) -#define VFCVTAUQS(Vd, Vn) EMIT(FCVT_vector(1, 1, 0, 0, 0, Vn, Vd)) -#define VFCVTAUQD(Vd, Vn) EMIT(FCVT_vector(1, 1, 0, 1, 0, Vn, Vd)) -// Floating-point Convert to (Un)signed integer, rounding toward Minus infinity -#define VFCVTMSS(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 0, 1, Vn, Vd)) -#define VFCVTMSD(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 1, 1, Vn, Vd)) -#define VFCVTMSQS(Vd, Vn) EMIT(FCVT_vector(1, 0, 0, 0, 1, Vn, Vd)) -#define VFCVTMSQD(Vd, Vn) EMIT(FCVT_vector(1, 0, 0, 1, 1, Vn, Vd)) -#define VFCVTMUS(Vd, Vn) EMIT(FCVT_vector(0, 1, 0, 0, 1, Vn, Vd)) -#define VFCVTMUD(Vd, Vn) EMIT(FCVT_vector(0, 1, 0, 1, 1, Vn, Vd)) -#define VFCVTMUQS(Vd, Vn) EMIT(FCVT_vector(1, 1, 0, 0, 1, Vn, Vd)) -#define VFCVTMUQD(Vd, Vn) EMIT(FCVT_vector(1, 1, 0, 1, 1, Vn, Vd)) - -#define FCVT2_vector(Q, U, o2, sz, o1, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1101<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -// Floating-point Convert to (Un)signed integer, rounding to nearest with ties to even -#define VFCVTNSS(Vd, Vn) EMIT(FCVT2_vector(0, 0, 0, 0, 0, Vn, Vd)) -#define VFCVTNSD(Vd, Vn) EMIT(FCVT2_vector(0, 0, 0, 1, 0, Vn, Vd)) -#define VFCVTNSQS(Vd, Vn) EMIT(FCVT2_vector(1, 0, 0, 0, 0, Vn, Vd)) -#define VFCVTNSQD(Vd, Vn) EMIT(FCVT2_vector(1, 0, 0, 1, 0, Vn, Vd)) -#define VFCVTNUS(Vd, Vn) EMIT(FCVT2_vector(0, 1, 0, 0, 0, Vn, Vd)) -#define VFCVTNUD(Vd, Vn) EMIT(FCVT2_vector(0, 1, 0, 1, 0, Vn, Vd)) -#define VFCVTNUQS(Vd, Vn) EMIT(FCVT2_vector(1, 1, 0, 0, 0, Vn, Vd)) -#define VFCVTNUQD(Vd, Vn) EMIT(FCVT2_vector(1, 1, 0, 1, 0, Vn, Vd)) -// Floating-point Convert to (Un)signed integer, rounding toward Plus infinity -#define VFCVTPSS(Vd, Vn) EMIT(FCVT2_vector(0, 0, 1, 0, 0, Vn, Vd)) -#define VFCVTPSD(Vd, Vn) EMIT(FCVT2_vector(0, 0, 1, 1, 0, Vn, Vd)) -#define VFCVTPSQS(Vd, Vn) EMIT(FCVT2_vector(1, 0, 1, 0, 0, Vn, Vd)) -#define VFCVTPSQD(Vd, Vn) EMIT(FCVT2_vector(1, 0, 1, 1, 0, Vn, Vd)) -#define VFCVTPUS(Vd, Vn) EMIT(FCVT2_vector(0, 1, 1, 0, 0, Vn, Vd)) -#define VFCVTPUD(Vd, Vn) EMIT(FCVT2_vector(0, 1, 1, 1, 0, Vn, Vd)) -#define VFCVTPUQS(Vd, Vn) EMIT(FCVT2_vector(1, 1, 1, 0, 0, Vn, Vd)) -#define VFCVTPUQD(Vd, Vn) EMIT(FCVT2_vector(1, 1, 1, 1, 0, Vn, Vd)) -// Floating-point Convert to (Un)signed integer, rounding toward Zero -#define VFCVTZSS(Vd, Vn) EMIT(FCVT2_vector(0, 0, 1, 0, 1, Vn, Vd)) -#define VFCVTZSD(Vd, Vn) EMIT(FCVT2_vector(0, 0, 1, 1, 1, Vn, Vd)) -#define VFCVTZSQS(Vd, Vn) EMIT(FCVT2_vector(1, 0, 1, 0, 1, Vn, Vd)) -#define VFCVTZSQD(Vd, Vn) EMIT(FCVT2_vector(1, 0, 1, 1, 1, Vn, Vd)) -#define VFCVTZUS(Vd, Vn) EMIT(FCVT2_vector(0, 1, 1, 0, 1, Vn, Vd)) -#define VFCVTZUD(Vd, Vn) EMIT(FCVT2_vector(0, 1, 1, 1, 1, Vn, Vd)) -#define VFCVTZUQS(Vd, Vn) EMIT(FCVT2_vector(1, 1, 1, 0, 1, Vn, Vd)) -#define VFCVTZUQD(Vd, Vn) EMIT(FCVT2_vector(1, 1, 1, 1, 1, Vn, Vd)) - -#define FCVT_precision(type, opc, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b0001<<17 | (opc)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) -#define FCVT_D_S(Dd, Sn) EMIT(FCVT_precision(0b00, 0b01, Sn, Dd)) -#define FCVT_S_D(Sd, Dn) EMIT(FCVT_precision(0b01, 0b00, Dn, Sd)) - -#define FCVTXN_vector(Q, sz, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10110<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -// Convert Vn from 2*Double to lower Vd as 2*float and clears the upper half -#define FCVTXN(Vd, Vn) EMIT(FCVTXN_vector(0, 1, Vn, Vd)) -// Convert Vn from 2*Double to higher Vd as 2*float -#define FCVTXN2(Vd, Vn) EMIT(FCVTXN_vector(1, 1, Vn, Vd)) - -#define FCVTL_vector(Q, sz, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10111<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -// Convert lower Vn from 2*float to Vd as 2*double -#define FCVTL(Vd, Vn) EMIT(FCVTL_vector(0, 1, Vn, Vd)) -// Convert higher Vn from 2*float to Vd as 2*double -#define FCVTL2(Vd, Vn) EMIT(FCVTL_vector(1, 1, Vn, Vd)) - -#define SCVTF_scalar(sf, type, rmode, opcode, Rn, Rd) ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd)) -#define SCVTFSw(Sd, Wn) EMIT(SCVTF_scalar(0, 0b00, 0b00, 0b010, Wn, Sd)) -#define SCVTFDw(Dd, Wn) EMIT(SCVTF_scalar(0, 0b01, 0b00, 0b010, Wn, Dd)) -#define SCVTFSx(Sd, Xn) EMIT(SCVTF_scalar(1, 0b00, 0b00, 0b010, Xn, Sd)) -#define SCVTFDx(Dd, Xn) EMIT(SCVTF_scalar(1, 0b01, 0b00, 0b010, Xn, Dd)) - -#define SCVTF_vector_scalar(U, sz, Rn, Rd) (1<<30 | (U)<<29 | 0b11110<<24 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define SCVTFSS(Vd, Vn) EMIT(SCVTF_vector_scalar(0, 0, Vn, Vd)) -#define SCVTFDD(Vd, Vn) EMIT(SCVTF_vector_scalar(0, 1, Vn, Vd)) - -#define SCVTF_vector(Q, U, sz, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define SCVTFS(Vd, Vn) EMIT(SCVTF_vector(0, 0, 0, Vn, Vd)) -#define SCVTFD(Vd, Vn) EMIT(SCVTF_vector(0, 0, 1, Vn, Vd)) -#define SCVTQFS(Vd, Vn) EMIT(SCVTF_vector(1, 0, 0, Vn, Vd)) -#define SCVTQFD(Vd, Vn) EMIT(SCVTF_vector(1, 0, 1, Vn, Vd)) - -// FRINTI Floating-point Round to Integral, using current rounding mode from FPCR (vector). -#define FRINT_vector(Q, U, o2, sz, o1, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1100<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define VFRINTIS(Vd,Vn) EMIT(FRINT_vector(0, 1, 1, 0, 1, Vn, Vd)) -#define VFRINTISQ(Vd,Vn) EMIT(FRINT_vector(1, 1, 1, 0, 1, Vn, Vd)) -#define VFRINTIDQ(Vd,Vn) EMIT(FRINT_vector(1, 1, 1, 1, 1, Vn, Vd)) - -#define FRINTxx_scalar(type, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b0100<<17 | (op)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) -#define FRINT32ZS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b00, Sn, Sd)) -#define FRINT32ZD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b00, Dn, Dd)) -#define FRINT32XS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b01, Sn, Sd)) -#define FRINT32XD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b01, Dn, Dd)) -#define FRINT64ZS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b10, Sn, Sd)) -#define FRINT64ZD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b10, Dn, Dd)) -#define FRINT64XS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b11, Sn, Sd)) -#define FRINT64XD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b11, Dn, Dd)) - -#define FRINT_scalar(type, rmode, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b001<<18 | (rmode)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) -// round toward 0 (truncate) -#define FRINTZS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b011, Sn, Sd)) -// round toward 0 (truncate) -#define FRINTZD(Sd, Sn) EMIT(FRINT_scalar(0b01, 0b011, Sn, Sd)) -// round with current FPCR mode -#define FRINTXS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b110, Sn, Sd)) -// round with current FPCR mode -#define FRINTXD(Sd, Sn) EMIT(FRINT_scalar(0b01, 0b110, Sn, Sd)) -// round with mode, mode is 0 = TieEven, 1=+inf, 2=-inf, 3=zero -#define FRINTRRS(Sd, Sn, mode) EMIT(FRINT_scalar(0b00, ((mode)&3), Sn, Sd)) -// round with mode, mode is 0 = TieEven, 1=+inf, 2=-inf, 3=zero -#define FRINTRRD(Dd, Dn, mode) EMIT(FRINT_scalar(0b01, ((mode)&3), Dn, Dd)) - -// FMAX / FMIN -#define FMINMAX_vector(Q, U, o1, sz, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o1)<<23 | (sz)<<22 | 0b1<<21 | (Rm)<<16 | 0b11110<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VFMINS(Vd, Vn, Vm) EMIT(FMINMAX_vector(0, 0, 1, 0, Vm, Vn, Vd)) -#define VFMAXS(Vd, Vn, Vm) EMIT(FMINMAX_vector(0, 0, 0, 0, Vm, Vn, Vd)) -#define VFMINQS(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 1, 0, Vm, Vn, Vd)) -#define VFMAXQS(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 0, 0, Vm, Vn, Vd)) -#define VFMINQD(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 1, 1, Vm, Vn, Vd)) -#define VFMAXQD(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 0, 1, Vm, Vn, Vd)) - -#define FMINMAX_scalar(type, Rm, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b01<<14 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define FMINS(Sd, Sn, Sm) EMIT(FMINMAX_scalar(0b00, Sm, 0b01, Sn, Sd)) -#define FMIND(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b01, Dn, Dd)) -#define FMAXS(Sd, Sn, Sm) EMIT(FMINMAX_scalar(0b00, Sm, 0b00, Sn, Sd)) -#define FMAXD(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b00, Dn, Dd)) -// FMINNM NaN vs Number: number is picked -#define FMINNMS(Sd, Sn, Sm) EMIT(FMINMAX_scalar(0b00, Sm, 0b11, Sn, Sd)) -// FMINNM NaN vs Number: number is picked -#define FMINNMD(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b11, Dn, Dd)) -// FMAXNM NaN vs Number: number is picked -#define FMAXNMS(Sd, Sn, Sm) EMIT(FMINMAX_scalar(0b00, Sm, 0b10, Sn, Sd)) -// FMAXNM NaN vs Number: number is picked -#define FMAXNMD(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b10, Dn, Dd)) - -// ZIP / UZP -#define ZIP_gen(Q, size, Rm, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b11<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define VZIP1Q_8(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b00, Rm, 0, Rn, Rt)) -#define VZIP2Q_8(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b00, Rm, 1, Rn, Rt)) -#define VZIP1_8(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b00, Rm, 0, Rn, Rt)) -#define VZIP2_8(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b00, Rm, 1, Rn, Rt)) -#define VZIP1Q_16(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b01, Rm, 0, Rn, Rt)) -#define VZIP2Q_16(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b01, Rm, 1, Rn, Rt)) -#define VZIP1_16(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b01, Rm, 0, Rn, Rt)) -#define VZIP2_16(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b01, Rm, 1, Rn, Rt)) -#define VZIP1Q_32(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b10, Rm, 0, Rn, Rt)) -#define VZIP2Q_32(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b10, Rm, 1, Rn, Rt)) -#define VZIP1_32(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b10, Rm, 0, Rn, Rt)) -#define VZIP2_32(Rt, Rn, Rm) EMIT(ZIP_gen(0, 0b10, Rm, 1, Rn, Rt)) -#define VZIP1Q_64(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b11, Rm, 0, Rn, Rt)) -#define VZIP2Q_64(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b11, Rm, 1, Rn, Rt)) - -#define UZP_gen(Q, size, Rm, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b01<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define VUZP1Q_8(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b00, Rm, 0, Rn, Rt)) -#define VUZP2Q_8(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b00, Rm, 1, Rn, Rt)) -#define VUZP1_8(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b00, Rm, 0, Rn, Rt)) -#define VUZP2_8(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b00, Rm, 1, Rn, Rt)) -#define VUZP1Q_16(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b01, Rm, 0, Rn, Rt)) -#define VUZP2Q_16(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b01, Rm, 1, Rn, Rt)) -#define VUZP1_16(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b01, Rm, 0, Rn, Rt)) -#define VUZP2_16(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b01, Rm, 1, Rn, Rt)) -#define VUZP1Q_32(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b10, Rm, 0, Rn, Rt)) -#define VUZP2Q_32(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b10, Rm, 1, Rn, Rt)) -#define VUZP1_32(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b10, Rm, 0, Rn, Rt)) -#define VUZP2_32(Rt, Rn, Rm) EMIT(UZP_gen(0, 0b10, Rm, 1, Rn, Rt)) -#define VUZP1Q_64(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b11, Rm, 0, Rn, Rt)) -#define VUZP2Q_64(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b11, Rm, 1, Rn, Rt)) - -#define DUP_gen(Q, imm5, Rn, Rd) ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 1<<10 | (Rn)<<5 | (Rd)) -#define VDUP_8(Vd, Vn, idx) EMIT(DUP_gen(0, ((idx)<<1|1), Vn, Vd)) -#define VDUPQ_8(Vd, Vn, idx) EMIT(DUP_gen(1, ((idx)<<1|1), Vn, Vd)) -#define VDUP_16(Vd, Vn, idx) EMIT(DUP_gen(0, ((idx)<<2|0b10), Vn, Vd)) -#define VDUPQ_16(Vd, Vn, idx) EMIT(DUP_gen(1, ((idx)<<2|0b10), Vn, Vd)) -#define VDUP_32(Vd, Vn, idx) EMIT(DUP_gen(0, ((idx)<<3|0b100), Vn, Vd)) -#define VDUPQ_32(Vd, Vn, idx) EMIT(DUP_gen(1, ((idx)<<3|0b100), Vn, Vd)) -#define VDUPQ_64(Vd, Vn, idx) EMIT(DUP_gen(1, ((idx)<<4|0b1000), Vn, Vd)) - -// TBL -#define TBL_gen(Q, Rm, len, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (Rm)<<16 | (len)<<13 | (op)<<12 | (Rn)<<5 | (Rd)) -//Use Rm[] to pick from Rn element and store in Rd. Out-of-range element gets 0 -#define VTBLQ1_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b00, 0, Rn, Rd)) -#define VTBL1_8(Rd, Rn, Rm) EMIT(TBL_gen(0, Rm, 0b00, 0, Rn, Rd)) -//Use Rm[] to pick from Rn, Rn+1 element and store in Rd. Out-of-range element gets 0 -#define VTBLQ2_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b01, 0, Rn, Rd)) -//Use Rm[] to pick from Rn, Rn+1, Rn+2 element and store in Rd. Out-of-range element gets 0 -#define VTBLQ3_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b10, 0, Rn, Rd)) -//Use Rm[] to pick from Rn, Rn+1, Rn+2, Rn+3 element and store in Rd. Out-of-range element gets 0 -#define VTBLQ4_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b11, 0, Rn, Rd)) -//Use Rm[] to pick from Rn element and store in Rd. Out-of-range element stay untouched -#define VTBXQ1_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b00, 0, Rn, Rd)) -//Use Rm[] to pick from Rn, Rn+1 element and store in Rd. Out-of-range element stay untouched -#define VTBXQ2_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b01, 0, Rn, Rd)) -//Use Rm[] to pick from Rn, Rn+1, Rn+2 element and store in Rd. Out-of-range element stay untouched -#define VTBXQ3_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b10, 0, Rn, Rd)) -//Use Rm[] to pick from Rn, Rn+1, Rn+2, Rn+3 element and store in Rd. Out-of-range element stay untouched -#define VTBXQ4_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b11, 0, Rn, Rd)) - -// TRN -#define TRN_gen(Q, size, Rm, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b10<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define VTRNQ1_64(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b11, Vm, 0, Vn, Vd)) -#define VTRNQ1_32(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b10, Vm, 0, Vn, Vd)) -#define VTRNQ1_16(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b01, Vm, 0, Vn, Vd)) -#define VTRNQ1_8(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b00, Vm, 0, Vn, Vd)) -#define VSWP(Vd, Vn) VTRNQ1_64(Vd, Vn, Vn) -#define VTRNQ2_64(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b11, Vm, 1, Vn, Vd)) -#define VTRNQ2_32(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b10, Vm, 1, Vn, Vd)) -#define VTRNQ2_16(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b01, Vm, 1, Vn, Vd)) -#define VTRNQ2_8(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b00, Vm, 1, Vn, Vd)) - -// QXTN / QXTN2 -#define QXTN_scalar(U, size, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (size)<<22 | 0b10000<<17 | 0b10100<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -// Signed saturating extract Narrow, from D to S -#define SQXTN_S_D(Sd, Dn) EMIT(QXTN_scalar(0, 0b10, Dn, Sd)) -// Signed saturating extract Narrow, from S to H -#define SQXTN_H_S(Hd, Sn) EMIT(QXTN_scalar(0, 0b01, Sn, Hd)) -// Signed saturating extract Narrow, from H to B -#define SQXTN_B_H(Bd, Hn) EMIT(QXTN_scalar(0, 0b00, Hn, Bd)) - -#define QXTN_vector(Q, U, size, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b10100<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -// Signed saturating extract Narrow, takes Rn element and reduce 64->32 with Signed saturation and fit lower part of Rd -#define SQXTN_32(Rd, Rn) EMIT(QXTN_vector(0, 0, 0b10, Rn, Rd)) -// Signed saturating extract Narrow, takes Rn element and reduce 64->32 with Signed saturation and fit higher part of Rd -#define SQXTN2_32(Rd, Rn) EMIT(QXTN_vector(1, 0, 0b10, Rn, Rd)) -// Signed saturating extract Narrow, takes Rn element and reduce 32->16 with Signed saturation and fit lower part of Rd -#define SQXTN_16(Rd, Rn) EMIT(QXTN_vector(0, 0, 0b01, Rn, Rd)) -// Signed saturating extract Narrow, takes Rn element and reduce 32->16 with Signed saturation and fit higher part of Rd -#define SQXTN2_16(Rd, Rn) EMIT(QXTN_vector(1, 0, 0b01, Rn, Rd)) -// Signed saturating extract Narrow, takes Rn element and reduce 16->8 with Signed saturation and fit lower part of Rd -#define SQXTN_8(Rd, Rn) EMIT(QXTN_vector(0, 0, 0b00, Rn, Rd)) -// Signed saturating extract Narrow, takes Rn element and reduce 16->8 with Signed saturation and fit higher part of Rd -#define SQXTN2_8(Rd, Rn) EMIT(QXTN_vector(1, 0, 0b00, Rn, Rd)) -// Unsigned saturating Extract Narrow, takes Rn element and reduce 64->32 with Unsigned saturation and fit lower part of Rd -#define UQXTN_32(Rd, Rn) EMIT(QXTN_vector(0, 1, 0b10, Rn, Rd)) -// Unsigned saturating Extract Narrow, takes Rn element and reduce 64->32 with Unsigned saturation and fit higher part of Rd -#define UQXTN2_32(Rd, Rn) EMIT(QXTN_vector(1, 1, 0b10, Rn, Rd)) -// Unsigned saturating extract Narrow, takes Rn element and reduce 32->16 with Unsigned saturation and fit lower part of Rd -#define UQXTN_16(Rd, Rn) EMIT(QXTN_vector(0, 1, 0b01, Rn, Rd)) -// Unsigned saturating extract Narrow, takes Rn element and reduce 32->16 with Unsigned saturation and fit higher part of Rd -#define UQXTN2_16(Rd, Rn) EMIT(QXTN_vector(1, 1, 0b01, Rn, Rd)) -// Unsigned saturating extract Narrow, takes Rn element and reduce 16->8 with Unsigned saturation and fit lower part of Rd -#define UQXTN_8(Rd, Rn) EMIT(QXTN_vector(0, 1, 0b00, Rn, Rd)) -// Unsigned saturating extract Narrow, takes Rn element and reduce 16->8 with Unsigned saturation and fit higher part of Rd -#define UQXTN2_8(Rd, Rn) EMIT(QXTN_vector(1, 1, 0b00, Rn, Rd)) - -#define QXTUN_vector(Q, U, size, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b10010<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 64->32 with Unsigned saturation and fit lower part of Rd -#define SQXTUN_32(Rd, Rn) EMIT(QXTUN_vector(0, 1, 0b10, Rn, Rd)) -// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 64->32 with Unsigned saturation and fit higher part of Rd -#define SQXTUN2_32(Rd, Rn) EMIT(QXTUN_vector(1, 1, 0b10, Rn, Rd)) -// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 32->16 with Unsigned saturation and fit lower part of Rd -#define SQXTUN_16(Rd, Rn) EMIT(QXTUN_vector(0, 1, 0b01, Rn, Rd)) -// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 32->16 with Unsigned saturation and fit higher part of Rd -#define SQXTUN2_16(Rd, Rn) EMIT(QXTUN_vector(1, 1, 0b01, Rn, Rd)) -// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 16->8 with Unsigned saturation and fit lower part of Rd -#define SQXTUN_8(Rd, Rn) EMIT(QXTUN_vector(0, 1, 0b00, Rn, Rd)) -// Signed saturating extract Unsigned Narrow, takes Rn element and reduce 16->8 with Unsigned saturation and fit higher part of Rd -#define SQXTUN2_8(Rd, Rn) EMIT(QXTUN_vector(1, 1, 0b00, Rn, Rd)) - -// Integer CMP -// EQual -#define CMEQ_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10001<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VCMEQ_8(Rd, Rn, Rm) EMIT(CMEQ_vector(0, 1, 0b00, Rm, Rn, Rd)) -#define VCMEQ_16(Rd, Rn, Rm) EMIT(CMEQ_vector(0, 1, 0b01, Rm, Rn, Rd)) -#define VCMEQ_32(Rd, Rn, Rm) EMIT(CMEQ_vector(0, 1, 0b10, Rm, Rn, Rd)) -#define VCMEQQ_8(Rd, Rn, Rm) EMIT(CMEQ_vector(1, 1, 0b00, Rm, Rn, Rd)) -#define VCMEQQ_16(Rd, Rn, Rm) EMIT(CMEQ_vector(1, 1, 0b01, Rm, Rn, Rd)) -#define VCMEQQ_32(Rd, Rn, Rm) EMIT(CMEQ_vector(1, 1, 0b10, Rm, Rn, Rd)) -#define VCMEQQ_64(Rd, Rn, Rm) EMIT(CMEQ_vector(1, 1, 0b11, Rm, Rn, Rd)) -// Greater test -#define CMG_vector(Q, U, size, eq, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b0011<<12 | (eq)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -// Signed Greater or Equal -#define VCMGEQ_8(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b00, 1, Rm, Rn, Rd)) -#define VCMGEQ_16(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b01, 1, Rm, Rn, Rd)) -#define VCMGEQ_32(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b10, 1, Rm, Rn, Rd)) -#define VCMGEQ_64(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b11, 1, Rm, Rn, Rd)) -// Unsigned Higher or Same -#define VCMHSQ_8(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b00, 1, Rm, Rn, Rd)) -#define VCMHSQ_16(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b01, 1, Rm, Rn, Rd)) -#define VCMHSQ_32(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b10, 1, Rm, Rn, Rd)) -#define VCMHSQ_64(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b11, 1, Rm, Rn, Rd)) -// Signed Greater Than -#define VCMGTQ_8(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b00, 0, Rm, Rn, Rd)) -#define VCMGTQ_16(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b01, 0, Rm, Rn, Rd)) -#define VCMGTQ_32(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b10, 0, Rm, Rn, Rd)) -#define VCMGTQ_64(Rd, Rn, Rm) EMIT(CMG_vector(1, 0, 0b11, 0, Rm, Rn, Rd)) -#define VCMGT_8(Rd, Rn, Rm) EMIT(CMG_vector(0, 0, 0b00, 0, Rm, Rn, Rd)) -#define VCMGT_16(Rd, Rn, Rm) EMIT(CMG_vector(0, 0, 0b01, 0, Rm, Rn, Rd)) -#define VCMGT_32(Rd, Rn, Rm) EMIT(CMG_vector(0, 0, 0b10, 0, Rm, Rn, Rd)) -// Unsigned Higher -#define VCHIQQ_8(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b00, 0, Rm, Rn, Rd)) -#define VCHIQQ_16(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b01, 0, Rm, Rn, Rd)) -#define VCHIQQ_32(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b10, 0, Rm, Rn, Rd)) -#define VCHIQQ_64(Rd, Rn, Rm) EMIT(CMG_vector(1, 1, 0b11, 0, Rm, Rn, Rd)) - -// Less Than 0 -#define CMLT_0_vector(Q, size, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b01010<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define CMLT_0_8(Rd, Rn) EMIT(CMLT_0_vector(0, 0b00, Rn, Rd)) -#define CMLT_0_16(Rd, Rn) EMIT(CMLT_0_vector(0, 0b01, Rn, Rd)) -#define CMLT_0_32(Rd, Rn) EMIT(CMLT_0_vector(0, 0b10, Rn, Rd)) -#define CMLTQ_0_8(Rd, Rn) EMIT(CMLT_0_vector(1, 0b00, Rn, Rd)) -#define CMLTQ_0_16(Rd, Rn) EMIT(CMLT_0_vector(1, 0b01, Rn, Rd)) -#define CMLTQ_0_32(Rd, Rn) EMIT(CMLT_0_vector(1, 0b10, Rn, Rd)) -#define CMLTQ_0_64(Rd, Rn) EMIT(CMLT_0_vector(1, 0b11, Rn, Rd)) -// Equal 0 -#define CMEQ_0_vector(Q, size, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b0100<<13 | 1<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define CMEQ_0_8(Rd, Rn) EMIT(CMEQ_0_vector(0, 0b00, Rn, Rd)) -#define CMEQ_0_16(Rd, Rn) EMIT(CMEQ_0_vector(0, 0b01, Rn, Rd)) -#define CMEQ_0_32(Rd, Rn) EMIT(CMEQ_0_vector(0, 0b10, Rn, Rd)) -#define CMEQQ_0_8(Rd, Rn) EMIT(CMEQ_0_vector(1, 0b00, Rn, Rd)) -#define CMEQQ_0_16(Rd, Rn) EMIT(CMEQ_0_vector(1, 0b01, Rn, Rd)) -#define CMEQQ_0_32(Rd, Rn) EMIT(CMEQ_0_vector(1, 0b10, Rn, Rd)) -#define CMEQQ_0_64(Rd, Rn) EMIT(CMEQ_0_vector(1, 0b11, Rn, Rd)) - -// Vector Float CMP -// EQual -#define FCMP_vector(Q, U, E, sz, Rm, ac, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (E)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (ac)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define FCMEQQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 0, 0, 1, Rm, 0, Rn, Rd)) -#define FCMEQQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 0, 0, 0, Rm, 0, Rn, Rd)) -// Greater or Equal -#define FCMGEQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 1, Rm, 0, Rn, Rd)) -#define FCMGEQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 0, Rm, 0, Rn, Rd)) -#define FCMGEQD_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 1, Rm, 1, Rn, Rd)) -#define FCMGEQS_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 0, Rm, 1, Rn, Rd)) -// Greater Than -#define FCMGTQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 1, Rm, 0, Rn, Rd)) -#define FCMGTQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 0, Rm, 0, Rn, Rd)) -#define FCMGTQD_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 1, Rm, 1, Rn, Rd)) -#define FCMGTQS_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 0, Rm, 1, Rn, Rd)) - -// UMULL / SMULL -#define MULL_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1100<<12 |(Rn)<<5 |(Rd)) -#define VUMULL_8(Rd, Rn, Rm) EMIT(MULL_vector(0, 1, 0b00, Rm, Rn, Rd)) -#define VUMULL_16(Rd, Rn, Rm) EMIT(MULL_vector(0, 1, 0b01, Rm, Rn, Rd)) -#define VUMULL_32(Rd, Rn, Rm) EMIT(MULL_vector(0, 1, 0b10, Rm, Rn, Rd)) -#define VUMULL2_8(Rd, Rn, Rm) EMIT(MULL_vector(1, 1, 0b00, Rm, Rn, Rd)) -#define VUMULL2_16(Rd, Rn, Rm) EMIT(MULL_vector(1, 1, 0b01, Rm, Rn, Rd)) -#define VUMULL2_32(Rd, Rn, Rm) EMIT(MULL_vector(1, 1, 0b10, Rm, Rn, Rd)) -#define VSMULL_8(Rd, Rn, Rm) EMIT(MULL_vector(0, 0, 0b00, Rm, Rn, Rd)) -#define VSMULL_16(Rd, Rn, Rm) EMIT(MULL_vector(0, 0, 0b01, Rm, Rn, Rd)) -#define VSMULL_32(Rd, Rn, Rm) EMIT(MULL_vector(0, 0, 0b10, Rm, Rn, Rd)) -#define VSMULL2_8(Rd, Rn, Rm) EMIT(MULL_vector(1, 0, 0b00, Rm, Rn, Rd)) -#define VSMULL2_16(Rd, Rn, Rm) EMIT(MULL_vector(1, 0, 0b01, Rm, Rn, Rd)) -#define VSMULL2_32(Rd, Rn, Rm) EMIT(MULL_vector(1, 0, 0b10, Rm, Rn, Rd)) - -// MUL -#define MUL_vector(Q, size, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10011<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VMUL_8(Vd, Vn, Vm) EMIT(MUL_vector(0, 0b00, Vm, Vn, Vd)) -#define VMUL_16(Vd, Vn, Vm) EMIT(MUL_vector(0, 0b01, Vm, Vn, Vd)) -#define VMUL_32(Vd, Vn, Vm) EMIT(MUL_vector(0, 0b10, Vm, Vn, Vd)) -#define VMULQ_8(Vd, Vn, Vm) EMIT(MUL_vector(1, 0b00, Vm, Vn, Vd)) -#define VMULQ_16(Vd, Vn, Vm) EMIT(MUL_vector(1, 0b01, Vm, Vn, Vd)) -#define VMULQ_32(Vd, Vn, Vm) EMIT(MUL_vector(1, 0b10, Vm, Vn, Vd)) - -// (S/Q)ADD -#define QADD_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b00001<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define SQADDQ_8(Vd, Vn, Vm) EMIT(QADD_vector(1, 0, 0b00, Vm, Vn, Vd)) -#define SQADDQ_16(Vd, Vn, Vm) EMIT(QADD_vector(1, 0, 0b01, Vm, Vn, Vd)) -#define SQADDQ_32(Vd, Vn, Vm) EMIT(QADD_vector(1, 0, 0b10, Vm, Vn, Vd)) -#define SQADDQ_64(Vd, Vn, Vm) EMIT(QADD_vector(1, 0, 0b11, Vm, Vn, Vd)) -#define UQADDQ_8(Vd, Vn, Vm) EMIT(QADD_vector(1, 1, 0b00, Vm, Vn, Vd)) -#define UQADDQ_16(Vd, Vn, Vm) EMIT(QADD_vector(1, 1, 0b01, Vm, Vn, Vd)) -#define UQADDQ_32(Vd, Vn, Vm) EMIT(QADD_vector(1, 1, 0b10, Vm, Vn, Vd)) -#define UQADDQ_64(Vd, Vn, Vm) EMIT(QADD_vector(1, 1, 0b11, Vm, Vn, Vd)) -#define SQADD_8(Vd, Vn, Vm) EMIT(QADD_vector(0, 0, 0b00, Vm, Vn, Vd)) -#define SQADD_16(Vd, Vn, Vm) EMIT(QADD_vector(0, 0, 0b01, Vm, Vn, Vd)) -#define SQADD_32(Vd, Vn, Vm) EMIT(QADD_vector(0, 0, 0b10, Vm, Vn, Vd)) -#define SQADD_64(Vd, Vn, Vm) EMIT(QADD_vector(0, 0, 0b11, Vm, Vn, Vd)) -#define UQADD_8(Vd, Vn, Vm) EMIT(QADD_vector(0, 1, 0b00, Vm, Vn, Vd)) -#define UQADD_16(Vd, Vn, Vm) EMIT(QADD_vector(0, 1, 0b01, Vm, Vn, Vd)) -#define UQADD_32(Vd, Vn, Vm) EMIT(QADD_vector(0, 1, 0b10, Vm, Vn, Vd)) -#define UQADD_64(Vd, Vn, Vm) EMIT(QADD_vector(0, 1, 0b11, Vm, Vn, Vd)) - -// Absolute Difference -#define AD_vector(Q, U, size, Rm, ac, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b0111<<12 | (ac)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -// Signed Absolute Difference and accumulate -#define SABAQ_8(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b00, Rm, 1, Rn, Rd)) -#define SABAQ_16(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b01, Rm, 1, Rn, Rd)) -#define SABAQ_32(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b10, Rm, 1, Rn, Rd)) -#define SABA_8(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b00, Rm, 1, Rn, Rd)) -#define SABA_16(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b01, Rm, 1, Rn, Rd)) -#define SABA_32(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b10, Rm, 1, Rn, Rd)) -// Signed Absolute Difference -#define SABDQ_8(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b00, Rm, 0, Rn, Rd)) -#define SABDQ_16(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b01, Rm, 0, Rn, Rd)) -#define SABDQ_32(Rd, Rn, Rm) EMIT(AD_vector(1, 0, 0b10, Rm, 0, Rn, Rd)) -#define SABD_8(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b00, Rm, 0, Rn, Rd)) -#define SABD_16(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b01, Rm, 0, Rn, Rd)) -#define SABD_32(Rd, Rn, Rm) EMIT(AD_vector(0, 0, 0b10, Rm, 0, Rn, Rd)) - -#define ADL_vector(Q, U, size, Rm, op, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b01<<14 | (op)<<13 | 1<<12 | (Rn)<<5 | (Rd)) -#define SABAL_8(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b00, Rm, 0, Rn, Rd)) -#define SABAL2_8(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b00, Rm, 0, Rn, Rd)) -#define SABAL_16(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b01, Rm, 0, Rn, Rd)) -#define SABAL2_16(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b01, Rm, 0, Rn, Rd)) -#define SABAL_32(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b10, Rm, 0, Rn, Rd)) -#define SABAL2_32(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b10, Rm, 0, Rn, Rd)) -#define UABAL_8(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b00, Rm, 0, Rn, Rd)) -#define UABAL2_8(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b00, Rm, 0, Rn, Rd)) -#define UABAL_16(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b01, Rm, 0, Rn, Rd)) -#define UABAL2_16(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b01, Rm, 0, Rn, Rd)) -#define UABAL_32(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b10, Rm, 0, Rn, Rd)) -#define UABAL2_32(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b10, Rm, 0, Rn, Rd)) -#define SABDL_8(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b00, Rm, 1, Rn, Rd)) -#define SABDL2_8(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b00, Rm, 1, Rn, Rd)) -#define SABDL_16(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b01, Rm, 1, Rn, Rd)) -#define SABDL2_16(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b01, Rm, 1, Rn, Rd)) -#define SABDL_32(Rd, Rn, Rm) EMIT(ADL_vector(0, 0, 0b10, Rm, 1, Rn, Rd)) -#define SABDL2_32(Rd, Rn, Rm) EMIT(ADL_vector(1, 0, 0b10, Rm, 1, Rn, Rd)) -#define UABDL_8(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b00, Rm, 1, Rn, Rd)) -#define UABDL2_8(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b00, Rm, 1, Rn, Rd)) -#define UABDL_16(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b01, Rm, 1, Rn, Rd)) -#define UABDL2_16(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b01, Rm, 1, Rn, Rd)) -#define UABDL_32(Rd, Rn, Rm) EMIT(ADL_vector(0, 1, 0b10, Rm, 1, Rn, Rd)) -#define UABDL2_32(Rd, Rn, Rm) EMIT(ADL_vector(1, 1, 0b10, Rm, 1, Rn, Rd)) - -// Add Pairwise -#define ADDLP_vector(Q, U, size, op, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (op)<<14 | 0b10<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define SADDLPQ_8(Rd, Rn) EMIT(ADDLP_vector(1, 0, 0b00, 0, Rn, Rd)) -#define SADDLPQ_16(Rd, Rn) EMIT(ADDLP_vector(1, 0, 0b01, 0, Rn, Rd)) -#define SADDLPQ_32(Rd, Rn) EMIT(ADDLP_vector(1, 0, 0b10, 0, Rn, Rd)) -#define SADDLP_8(Rd, Rn) EMIT(ADDLP_vector(0, 0, 0b00, 0, Rn, Rd)) -#define SADDLP_16(Rd, Rn) EMIT(ADDLP_vector(0, 0, 0b01, 0, Rn, Rd)) -#define SADDLP_32(Rd, Rn) EMIT(ADDLP_vector(0, 0, 0b10, 0, Rn, Rd)) -#define UADDLPQ_8(Rd, Rn) EMIT(ADDLP_vector(1, 1, 0b00, 0, Rn, Rd)) -#define UADDLPQ_16(Rd, Rn) EMIT(ADDLP_vector(1, 1, 0b01, 0, Rn, Rd)) -#define UADDLPQ_32(Rd, Rn) EMIT(ADDLP_vector(1, 1, 0b10, 0, Rn, Rd)) -#define UADDLP_8(Rd, Rn) EMIT(ADDLP_vector(0, 1, 0b00, 0, Rn, Rd)) -#define UADDLP_16(Rd, Rn) EMIT(ADDLP_vector(0, 1, 0b01, 0, Rn, Rd)) -#define UADDLP_32(Rd, Rn) EMIT(ADDLP_vector(0, 1, 0b10, 0, Rn, Rd)) - -// Add accros vector -#define ADDLV_vector(Q, U, size, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b11000<<17 | 0b00011<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define SADDLVQ_8(Rd, Rn) EMIT(ADDLV_vector(1, 0, 0b00, Rn, Rd)) -#define SADDLVQ_16(Rd, Rn) EMIT(ADDLV_vector(1, 0, 0b01, Rn, Rd)) -#define SADDLVQ_32(Rd, Rn) EMIT(ADDLV_vector(1, 0, 0b10, Rn, Rd)) -#define SADDLV_8(Rd, Rn) EMIT(ADDLV_vector(0, 0, 0b00, Rn, Rd)) -#define SADDLV_16(Rd, Rn) EMIT(ADDLV_vector(0, 0, 0b01, Rn, Rd)) -#define SADDLV_32(Rd, Rn) EMIT(ADDLV_vector(0, 0, 0b10, Rn, Rd)) -#define UADDLVQ_8(Rd, Rn) EMIT(ADDLV_vector(1, 1, 0b00, Rn, Rd)) -#define UADDLVQ_16(Rd, Rn) EMIT(ADDLV_vector(1, 1, 0b01, Rn, Rd)) -#define UADDLVQ_32(Rd, Rn) EMIT(ADDLV_vector(1, 1, 0b10, Rn, Rd)) -#define UADDLV_8(Rd, Rn) EMIT(ADDLV_vector(0, 1, 0b00, Rn, Rd)) -#define UADDLV_16(Rd, Rn) EMIT(ADDLV_vector(0, 1, 0b01, Rn, Rd)) -#define UADDLV_32(Rd, Rn) EMIT(ADDLV_vector(0, 1, 0b10, Rn, Rd)) - -// MOV Immediate -#define MOVI_vector(Q, op, abc, cmode, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd)) -#define MOVIQ_8(Rd, imm8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) -#define MOVI_8(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) - -// SHLL and eXtend Long -#define SHLL_vector(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b10100<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define USHLL2_8(Vd, Vn, imm) EMIT(SHLL_vector(1, 1, 0b0001, (imm)&0x7, Vn, Vd)) -#define USHLL_8(Vd, Vn, imm) EMIT(SHLL_vector(0, 1, 0b0001, (imm)&0x7, Vn, Vd)) -#define SSHLL2_8(Vd, Vn, imm) EMIT(SHLL_vector(1, 0, 0b0001, (imm)&0x7, Vn, Vd)) -#define SSHLL_8(Vd, Vn, imm) EMIT(SHLL_vector(0, 0, 0b0001, (imm)&0x7, Vn, Vd)) -#define USHLL2_16(Vd, Vn, imm) EMIT(SHLL_vector(1, 1, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) -#define USHLL_16(Vd, Vn, imm) EMIT(SHLL_vector(0, 1, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) -#define SSHLL2_16(Vd, Vn, imm) EMIT(SHLL_vector(1, 0, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) -#define SSHLL_16(Vd, Vn, imm) EMIT(SHLL_vector(0, 0, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) -#define USHLL2_32(Vd, Vn, imm) EMIT(SHLL_vector(1, 1, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) -#define USHLL_32(Vd, Vn, imm) EMIT(SHLL_vector(0, 1, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) -#define SSHLL2_32(Vd, Vn, imm) EMIT(SHLL_vector(1, 0, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) -#define SSHLL_32(Vd, Vn, imm) EMIT(SHLL_vector(0, 0, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) - -#define UXTL_8(Vd, Vn) USHLL_8(Vd, Vn, 0) -#define UXTL2_8(Vd, Vn) USHLL2_8(Vd, Vn, 0) -#define UXTL_16(Vd, Vn) USHLL_16(Vd, Vn, 0) -#define UXTL2_16(Vd, Vn) USHLL2_16(Vd, Vn, 0) -#define UXTL_32(Vd, Vn) USHLL_32(Vd, Vn, 0) -#define UXTL2_32(Vd, Vn) USHLL2_32(Vd, Vn, 0) - -#define SXTL_8(Vd, Vn) SSHLL_8(Vd, Vn, 0) -#define SXTL2_8(Vd, Vn) SSHLL2_8(Vd, Vn, 0) -#define SXTL_16(Vd, Vn) SSHLL_16(Vd, Vn, 0) -#define SXTL2_16(Vd, Vn) SSHLL2_16(Vd, Vn, 0) -#define SXTL_32(Vd, Vn) SSHLL_32(Vd, Vn, 0) -#define SXTL2_32(Vd, Vn) SSHLL2_32(Vd, Vn, 0) - -// SHRN -#define QSHRN_vector(Q, U, immh, immb, op, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b1001<<12 | (op)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define UQSHRN_8(Vd, Vn, imm) EMIT(QSHRN_vector(0, 1, 0b0001, (8-(imm))&0x7, 0, Vn, Vd)) -#define UQSHRN2_8(Vd, Vn, imm) EMIT(QSHRN_vector(1, 1, 0b0001, (8-(imm))&0x7, 0, Vn, Vd)) -#define SQSHRN_8(Vd, Vn, imm) EMIT(QSHRN_vector(0, 0, 0b0001, (8-(imm))&0x7, 0, Vn, Vd)) -#define SQSHRN2_8(Vd, Vn, imm) EMIT(QSHRN_vector(1, 0, 0b0001, (8-(imm))&0x7, 0, Vn, Vd)) -#define UQSHRN_16(Vd, Vn, imm) EMIT(QSHRN_vector(0, 1, 0b0010|(((16-(imm))>>3)&1), (16-(imm))&0x7, 0, Vn, Vd)) -#define UQSHRN2_16(Vd, Vn, imm) EMIT(QSHRN_vector(1, 1, 0b0010|(((16-(imm))>>3)&1), (16-(imm))&0x7, 0, Vn, Vd)) -#define SQSHRN_16(Vd, Vn, imm) EMIT(QSHRN_vector(0, 0, 0b0010|(((16-(imm))>>3)&1), (16-(imm))&0x7, 0, Vn, Vd)) -#define SQSHRN2_16(Vd, Vn, imm) EMIT(QSHRN_vector(1, 0, 0b0010|(((16-(imm))>>3)&1), (16-(imm))&0x7, 0, Vn, Vd)) -#define UQSHRN_32(Vd, Vn, imm) EMIT(QSHRN_vector(0, 1, 0b0100|(((32-(imm))>>3)&3), (32-(imm))&0x7, 0, Vn, Vd)) -#define UQSHRN2_32(Vd, Vn, imm) EMIT(QSHRN_vector(1, 1, 0b0100|(((32-(imm))>>3)&3), (32-(imm))&0x7, 0, Vn, Vd)) -#define SQSHRN_32(Vd, Vn, imm) EMIT(QSHRN_vector(0, 0, 0b0100|(((32-(imm))>>3)&3), (32-(imm))&0x7, 0, Vn, Vd)) -#define SQSHRN2_32(Vd, Vn, imm) EMIT(QSHRN_vector(1, 0, 0b0100|(((32-(imm))>>3)&3), (32-(imm))&0x7, 0, Vn, Vd)) - -// UQSUB -#define QSUB_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b00101<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define UQSUB_8(Vd, Vn, Vm) EMIT(QSUB_vector(0, 1, 0b00, Vm, Vn, Vd)) -#define UQSUB_16(Vd, Vn, Vm) EMIT(QSUB_vector(0, 1, 0b01, Vm, Vn, Vd)) -#define UQSUB_32(Vd, Vn, Vm) EMIT(QSUB_vector(0, 1, 0b10, Vm, Vn, Vd)) -#define UQSUB_64(Vd, Vn, Vm) EMIT(QSUB_vector(0, 1, 0b11, Vm, Vn, Vd)) -#define SQSUB_8(Vd, Vn, Vm) EMIT(QSUB_vector(0, 0, 0b00, Vm, Vn, Vd)) -#define SQSUB_16(Vd, Vn, Vm) EMIT(QSUB_vector(0, 0, 0b01, Vm, Vn, Vd)) -#define SQSUB_32(Vd, Vn, Vm) EMIT(QSUB_vector(0, 0, 0b10, Vm, Vn, Vd)) -#define SQSUB_64(Vd, Vn, Vm) EMIT(QSUB_vector(0, 0, 0b11, Vm, Vn, Vd)) -#define UQSUBQ_8(Vd, Vn, Vm) EMIT(QSUB_vector(1, 1, 0b00, Vm, Vn, Vd)) -#define UQSUBQ_16(Vd, Vn, Vm) EMIT(QSUB_vector(1, 1, 0b01, Vm, Vn, Vd)) -#define UQSUBQ_32(Vd, Vn, Vm) EMIT(QSUB_vector(1, 1, 0b10, Vm, Vn, Vd)) -#define UQSUBQ_64(Vd, Vn, Vm) EMIT(QSUB_vector(1, 1, 0b11, Vm, Vn, Vd)) -#define SQSUBQ_8(Vd, Vn, Vm) EMIT(QSUB_vector(1, 0, 0b00, Vm, Vn, Vd)) -#define SQSUBQ_16(Vd, Vn, Vm) EMIT(QSUB_vector(1, 0, 0b01, Vm, Vn, Vd)) -#define SQSUBQ_32(Vd, Vn, Vm) EMIT(QSUB_vector(1, 0, 0b10, Vm, Vn, Vd)) -#define SQSUBQ_64(Vd, Vn, Vm) EMIT(QSUB_vector(1, 0, 0b11, Vm, Vn, Vd)) - -// MAX/MIN vector -#define MINMAX_vector(Q, U, size, Rm, op, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b0110<<12 | (op)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define SMAX_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b00, Vm, 0, Vn, Vd)) -#define SMAX_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b01, Vm, 0, Vn, Vd)) -#define SMAX_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b10, Vm, 0, Vn, Vd)) -#define SMAX_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b11, Vm, 0, Vn, Vd)) -#define UMAX_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b00, Vm, 0, Vn, Vd)) -#define UMAX_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b01, Vm, 0, Vn, Vd)) -#define UMAX_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b10, Vm, 0, Vn, Vd)) -#define UMAX_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b11, Vm, 0, Vn, Vd)) -#define SMIN_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b00, Vm, 1, Vn, Vd)) -#define SMIN_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b01, Vm, 1, Vn, Vd)) -#define SMIN_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b10, Vm, 1, Vn, Vd)) -#define SMIN_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b11, Vm, 1, Vn, Vd)) -#define UMIN_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b00, Vm, 1, Vn, Vd)) -#define UMIN_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b01, Vm, 1, Vn, Vd)) -#define UMIN_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b10, Vm, 1, Vn, Vd)) -#define UMIN_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b11, Vm, 1, Vn, Vd)) -#define SMAXQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b00, Vm, 0, Vn, Vd)) -#define SMAXQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b01, Vm, 0, Vn, Vd)) -#define SMAXQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b10, Vm, 0, Vn, Vd)) -#define SMAXQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b11, Vm, 0, Vn, Vd)) -#define UMAXQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b00, Vm, 0, Vn, Vd)) -#define UMAXQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b01, Vm, 0, Vn, Vd)) -#define UMAXQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b10, Vm, 0, Vn, Vd)) -#define UMAXQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b11, Vm, 0, Vn, Vd)) -#define SMINQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b00, Vm, 1, Vn, Vd)) -#define SMINQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b01, Vm, 1, Vn, Vd)) -#define SMINQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b10, Vm, 1, Vn, Vd)) -#define SMINQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b11, Vm, 1, Vn, Vd)) -#define UMINQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b00, Vm, 1, Vn, Vd)) -#define UMINQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b01, Vm, 1, Vn, Vd)) -#define UMINQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b10, Vm, 1, Vn, Vd)) -#define UMINQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b11, Vm, 1, Vn, Vd)) - -// HADD vector -#define HADD_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 1<<10 | (Rn)<<5 | (Rd)) -#define SHADD_8(Vd, Vn, Vm) EMIT(HADD_vector(0, 0, 0b00, Vm, Vn, Vd)) -#define SHADD_16(Vd, Vn, Vm) EMIT(HADD_vector(0, 0, 0b01, Vm, Vn, Vd)) -#define SHADD_32(Vd, Vn, Vm) EMIT(HADD_vector(0, 0, 0b10, Vm, Vn, Vd)) -#define SHADDQ_8(Vd, Vn, Vm) EMIT(HADD_vector(1, 0, 0b00, Vm, Vn, Vd)) -#define SHADDQ_16(Vd, Vn, Vm) EMIT(HADD_vector(1, 0, 0b01, Vm, Vn, Vd)) -#define SHADDQ_32(Vd, Vn, Vm) EMIT(HADD_vector(1, 0, 0b10, Vm, Vn, Vd)) -#define UHADD_8(Vd, Vn, Vm) EMIT(HADD_vector(0, 1, 0b00, Vm, Vn, Vd)) -#define UHADD_16(Vd, Vn, Vm) EMIT(HADD_vector(0, 1, 0b01, Vm, Vn, Vd)) -#define UHADD_32(Vd, Vn, Vm) EMIT(HADD_vector(0, 1, 0b10, Vm, Vn, Vd)) -#define UHADDQ_8(Vd, Vn, Vm) EMIT(HADD_vector(1, 1, 0b00, Vm, Vn, Vd)) -#define UHADDQ_16(Vd, Vn, Vm) EMIT(HADD_vector(1, 1, 0b01, Vm, Vn, Vd)) -#define UHADDQ_32(Vd, Vn, Vm) EMIT(HADD_vector(1, 1, 0b10, Vm, Vn, Vd)) - -#define RHADD_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b00010<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define SRHADD_8(Vd, Vn, Vm) EMIT(RHADD_vector(0, 0, 0b00, Vm, Vn, Vd)) -#define SRHADD_16(Vd, Vn, Vm) EMIT(RHADD_vector(0, 0, 0b01, Vm, Vn, Vd)) -#define SRHADD_32(Vd, Vn, Vm) EMIT(RHADD_vector(0, 0, 0b10, Vm, Vn, Vd)) -#define SRHADDQ_8(Vd, Vn, Vm) EMIT(RHADD_vector(1, 0, 0b00, Vm, Vn, Vd)) -#define SRHADDQ_16(Vd, Vn, Vm) EMIT(RHADD_vector(1, 0, 0b01, Vm, Vn, Vd)) -#define SRHADDQ_32(Vd, Vn, Vm) EMIT(RHADD_vector(1, 0, 0b10, Vm, Vn, Vd)) -#define URHADD_8(Vd, Vn, Vm) EMIT(RHADD_vector(0, 1, 0b00, Vm, Vn, Vd)) -#define URHADD_16(Vd, Vn, Vm) EMIT(RHADD_vector(0, 1, 0b01, Vm, Vn, Vd)) -#define URHADD_32(Vd, Vn, Vm) EMIT(RHADD_vector(0, 1, 0b10, Vm, Vn, Vd)) -#define URHADDQ_8(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b00, Vm, Vn, Vd)) -#define URHADDQ_16(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b01, Vm, Vn, Vd)) -#define URHADDQ_32(Vd, Vn, Vm) EMIT(RHADD_vector(1, 1, 0b10, Vm, Vn, Vd)) - -// QRDMULH Signed saturating (Rounding) Doubling Multiply returning High half -#define QDMULH_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10110<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define SQRDMULH_8(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b00, Vm, Vn, Vd)) -#define SQRDMULH_16(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b01, Vm, Vn, Vd)) -#define SQRDMULH_32(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 1, 0b10, Vm, Vn, Vd)) -#define SQRDMULHQ_8(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 1, 0b00, Vm, Vn, Vd)) -#define SQRDMULHQ_16(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 1, 0b01, Vm, Vn, Vd)) -#define SQRDMULHQ_32(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 1, 0b10, Vm, Vn, Vd)) -#define SQDMULH_8(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 0, 0b00, Vm, Vn, Vd)) -#define SQDMULH_16(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 0, 0b01, Vm, Vn, Vd)) -#define SQDMULH_32(Vd, Vn, Vm) EMIT(QDMULH_vector(0, 0, 0b10, Vm, Vn, Vd)) -#define SQDMULHQ_8(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b00, Vm, Vn, Vd)) -#define SQDMULHQ_16(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b01, Vm, Vn, Vd)) -#define SQDMULHQ_32(Vd, Vn, Vm) EMIT(QDMULH_vector(1, 0, 0b10, Vm, Vn, Vd)) - -// AES extensions -#define AES_gen(D, Rn, Rd) (0b01001110<<24 | 0b00<<22 | 0b10100<<17 | 0b0010<<13 | (D)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define AESD(Vd, Vn) EMIT(AES_gen(1, Vn, Vd)) -#define AESE(Vd, Vn) EMIT(AES_gen(0, Vn, Vd)) - -#define AESMC_gen(D, Rn, Rd) (0b01001110<<24 | 0b00<<22 | 0b10100<<17 | 0b0011<<13 | (D)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) -#define AESIMC(Vd, Vn) EMIT(AESMC_gen(1, Vn, Vd)) -#define AESMC(Vd, Vn) EMIT(AESMC_gen(0, Vn, Vd)) - -#endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S deleted file mode 100755 index 4b73803a..00000000 --- a/src/dynarec/arm64_epilog.S +++ /dev/null @@ -1,54 +0,0 @@ -//arm epilog for dynarec -//Save stuff, prepare stack and register -//called with pointer to emu as 1st parameter -//and address to jump to as 2nd parameter - -.text -.align 4 - -.global arm64_epilog -arm64_epilog: - //update register -> emu - stp x10, x11, [x0, (8 * 0)] - stp x12, x13, [x0, (8 * 2)] - stp x14, x15, [x0, (8 * 4)] - stp x16, x17, [x0, (8 * 6)] - stp x18, x19, [x0, (8 * 8)] - stp x20, x21, [x0, (8 * 10)] - stp x22, x23, [x0, (8 * 12)] - stp x24, x25, [x0, (8 * 14)] - stp x26, x27, [x0, (8 * 16)] // put back reg value in emu, including EIP (so x27 must be EIP now) - //restore all used register - //vpop {d8-d15} - ldp x19, x20, [sp, (8 * 0)] - ldp x21, x22, [sp, (8 * 2)] - ldp x23, x24, [sp, (8 * 4)] - ldp x25, x26, [sp, (8 * 6)] - ldr x27, [sp, (8 * 8)] - ldp d8, d9, [sp, (8 *10)] - ldp d10, d11, [sp, (8 *12)] - ldp d12, d13, [sp, (8 *14)] - ldp d14, d15, [sp, (8 *16)] - add sp, sp, (8 * 18) - ldp lr, fp, [sp], 16 // saved lr - //end, return... - ret - - -.global arm64_epilog_fast -arm64_epilog_fast: - //restore all used register - //vpop {d8-d15} - ldp x19, x20, [sp, (8 * 0)] - ldp x21, x22, [sp, (8 * 2)] - ldp x23, x24, [sp, (8 * 4)] - ldp x25, x26, [sp, (8 * 6)] - ldr x27, [sp, (8 * 8)] - ldp d8, d9, [sp, (8 *10)] - ldp d10, d11, [sp, (8 *12)] - ldp d12, d13, [sp, (8 *14)] - ldp d14, d15, [sp, (8 *16)] - add sp, sp, (8 * 18) - ldp lr, fp, [sp], 16 // saved lr - //end, return... - ret diff --git a/src/dynarec/arm64_lock.S b/src/dynarec/arm64_lock.S deleted file mode 100755 index 9d097ff5..00000000 --- a/src/dynarec/arm64_lock.S +++ /dev/null @@ -1,131 +0,0 @@ -//arm lock helper -//there is 2 part: read and write -// write return 0 on success, 1 on fail (value has been changed) - -.text -.align 4 - -.global arm64_lock_read_b -.global arm64_lock_write_b -.global arm64_lock_read_h -.global arm64_lock_write_h -.global arm64_lock_read_d -.global arm64_lock_write_d -.global arm64_lock_read_dd -.global arm64_lock_write_dd -.global arm64_lock_read_dq -.global arm64_lock_write_dq -.global arm64_lock_xchg -.global arm64_lock_storeifnull -.global arm64_lock_storeifref - - -arm64_lock_read_b: - dmb ish - // address is x0, return is x0 - ldaxrb w0, [x0] - ret - -arm64_lock_write_b: - // address is x0, value is x1, return is x0 - mov x2, x0 - stlxrb w0, w1, [x2] - dmb ish - ret - -arm64_lock_read_h: - dmb ish - // address is x0, return is x0 - ldaxrh w0, [x0] - ret - -arm64_lock_write_h: - // address is x0, value is x1, return is x0 - mov x2, x0 - stlxrh w0, w1, [x2] - dmb ish - ret - -arm64_lock_read_d: - dmb ish - // address is x0, return is x0 - ldaxr w0, [x0] - ret - -arm64_lock_write_d: - // address is x0, value is w1, return is x0 - mov x2, x0 - stlxr w0, w1, [x2] - dmb ish - ret - -arm64_lock_read_dd: - dmb ish - // address is x0, return is x0 - ldaxr x0, [x0] - ret - -arm64_lock_write_dd: - // address is x0, value is x1, return is x0 - mov x2, x0 - stlxr w0, x1, [x2] - dmb ish - ret - -arm64_lock_read_dq: - dmb ish - // address is r2, return is r0, r1 - ldaxp x4, x3, [x2] - str x4, [x0] - str x3, [x1] - ret - -arm64_lock_write_dq: - // address is r2, value is r0, r1, return is r0 - // r0 needs to be aligned - stlxp w3, x0, x1, [x2] - mov w0, w3 - dmb ish - ret - - -arm64_lock_xchg: - dmb ish -arm64_lock_xchg_0: - // address is x0, value is x1, return old value in x0 - ldaxr x2, [x0] - stlxr w3, x1, [x0] - cbnz w3, arm64_lock_xchg_0 - dmb ish - mov x0, x2 - ret - -arm64_lock_storeifnull: - dmb ish -arm64_lock_storeifnull_0: - // address is x0, value is x1, x1 store to x0 only if [x0] is 0. return new [x0] value (so x1 or old value) - ldaxr x2, [x0] - cbnz x2, arm64_lock_storeifnull_exit - mov x2, x1 - stlxr w3, x2, [x0] - cbnz w3, arm64_lock_storeifnull_0 -arm64_lock_storeifnull_exit: - dmb ish - mov x0, x2 - ret - -arm64_lock_storeifref: - dmb ish -arm64_lock_storeifref_0: - // address is x0, value is x1, x1 store to x0 only if [x0] is x3. return new [x0] value (so x1 or old value) - ldaxr x3, [x0] - cmp x2, x3 - bne arm64_lock_storeifref_exit - stlxr w4, x1, [x0] - cbnz w4, arm64_lock_storeifref_0 - mov x0, x1 - ret -arm64_lock_storeifref_exit: - dmb ish - mov x0, x3 - ret diff --git a/src/dynarec/arm64_lock.h b/src/dynarec/arm64_lock.h deleted file mode 100755 index 8f6bd14d..00000000 --- a/src/dynarec/arm64_lock.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __ARM64_LOCK__H__ -#define __ARM64_LOCK__H__ -#include - -// LDAXRB of ADDR -extern uint8_t arm64_lock_read_b(void* addr); -// STLXRB of ADDR, return 0 if ok, 1 if not -extern int arm64_lock_write_b(void* addr, uint8_t val); - -// LDAXRH of ADDR -extern uint16_t arm64_lock_read_h(void* addr); -// STLXRH of ADDR, return 0 if ok, 1 if not -extern int arm64_lock_write_h(void* addr, uint16_t val); - -// LDAXR of ADDR -extern uint32_t arm64_lock_read_d(void* addr); -// STLXR of ADDR, return 0 if ok, 1 if not -extern int arm64_lock_write_d(void* addr, uint32_t val); - -// LDAXR of ADDR -extern uint64_t arm64_lock_read_dd(void* addr); -// STLXR of ADDR, return 0 if ok, 1 if not -extern int arm64_lock_write_dd(void* addr, uint64_t val); - -// LDAXRD of ADDR -extern void arm64_lock_read_dq(uint64_t * a, uint64_t* b, void* addr); -// STLXRD of ADDR, return 0 if ok, 1 if not -extern int arm64_lock_write_dq(uint64_t a, uint64_t b, void* addr); - -// Atomicaly exchange value at [p] with val, return old p -extern uintptr_t arm64_lock_xchg(void* p, uintptr_t val); - -// Atomicaly store value to [p] only if [p] is NULL. Return new [p] value (so val or old) -extern void* arm64_lock_storeifnull(void*p, void* val); - -// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old) -extern void* arm64_lock_storeifref(void*p, void* val, void* ref); - -#endif //__ARM64_LOCK__H__ \ No newline at end of file diff --git a/src/dynarec/arm64_next.S b/src/dynarec/arm64_next.S deleted file mode 100755 index 5e890137..00000000 --- a/src/dynarec/arm64_next.S +++ /dev/null @@ -1,39 +0,0 @@ -//arm update linker table for dynarec -//called with pointer to emu as 1st parameter -//and address of table to as 2nd parameter -//ip is at r12 - -.text -.align 4 - -.extern LinkNext - -.global arm64_next -arm64_next: - // emu is r0 - // IP address is r1 - sub sp, sp, (8 * 12) - stp x0, x1, [sp, (8 * 0)] - stp x10, x11, [sp, (8 * 2)] - stp x12, x13, [sp, (8 * 4)] - stp x14, x15, [sp, (8 * 6)] - stp x16, x17, [sp, (8 * 8)] - stp x18, x27, [sp, (8 * 10)] // also save x27(rip) to allow change in LinkNext - - mov x2, lr // "from" is in lr, so put in x2 - add x3, sp, 8*11 // x3 is address to change rip - // call the function - bl LinkNext - // preserve return value - mov x3, x0 - // pop regs - ldp x0, x1, [sp, (8 * 0)] - ldp x10, x11, [sp, (8 * 2)] - ldp x12, x13, [sp, (8 * 4)] - ldp x14, x15, [sp, (8 * 6)] - ldp x16, x17, [sp, (8 * 8)] - ldp x18, x27, [sp, (8 * 10)] - add sp, sp, (8 * 12) - // return offset is jump address - br x3 - diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c deleted file mode 100755 index b07d40a4..00000000 --- a/src/dynarec/arm64_printer.c +++ /dev/null @@ -1,1353 +0,0 @@ -#include -#include -#include -#include - -#include "arm64_printer.h" -#include "debug.h" - -static const char* Xt[] = {"xEmu", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "xRAX", "xRCX", "xRDX", "xRBX", "xRSP", "xRBP", "xRSI", "xRDI", "xR8", "xR9", "xR10", "xR11", "xR12", "xR13", "xR14", "xR15", "xFlags", "xRIP", "x28", "FP", "LR", "xZR"}; -static const char* XtSp[] = {"xEmu", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "xRAX", "xRCX", "xRDX", "xRBX", "xRSP", "xRBP", "xRSI", "xRDI", "xR8", "xR9", "xR10", "xR11", "xR12", "xR13", "xR14", "xR15", "xFlags", "xRIP", "x28", "FP", "LR", "SP"}; -static const char* Wt[] = {"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "wEAX", "wECX", "wEDX", "wEBX", "wESP", "wEBP", "wESI", "wEDI", "wR8", "wR9", "wR10", "wR11", "wR12", "wR13", "wR14", "wR15", "wFlags", "w27", "w28", "w29", "w30", "wZR"}; -static const char* WtSp[] = {"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "wEAX", "wECX", "wEDX", "wEBX", "wESP", "wEBP", "wESI", "wEDI", "wR8", "wR9", "wR10", "wR11", "wR12", "wR13", "wR14", "wR15", "wFlags", "w27", "w28", "w29", "w30", "wSP"}; - -static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", "cVC", "cHI", "cLS", "cGE", "cLT", "cGT", "cLE", "c__", "inv"}; - -#define abs(A) (((A)<0)?(-(A)):(A)) - -typedef struct arm64_print_s { - int N, S, U, L, Q; - int t, n, m, d, t2, a; - int f, c, o, h, p; - int i, r, s; - int x, w; -} arm64_print_t; - -uint64_t DecodeBitMasks(int N, int imms, int immr) -{ - int len = 31-__builtin_clz(N<<6 | ((~imms)&0b111111)); - if(len<1) return 0; - int levels = (1<>r)|(mask<<(e-r)); - mask&=((1LL<>i)&1; - switch(*mask) { - case '0': if(v!=0) return 0; break; - case '1': if(v!=1) return 0; break; - case 'N': a->N = (a->N<<1) | v; break; - case 'S': a->S = (a->S<<1) | v; break; - case 'U': a->U = (a->U<<1) | v; break; - case 'L': a->L = (a->L<<1) | v; break; - case 'Q': a->Q = (a->Q<<1) | v; break; - case 't': a->t = (a->t<<1) | v; break; - case '2': a->t2 = (a->t2<<1) | v; break; - case 'n': a->n = (a->n<<1) | v; break; - case 'p': a->p = (a->p<<1) | v; break; - case 'm': a->m = (a->m<<1) | v; break; - case 'a': a->a = (a->a<<1) | v; break; - case 'd': a->d = (a->d<<1) | v; break; - case 'f': a->f = (a->f<<1) | v; break; - case 'c': a->c = (a->c<<1) | v; break; - case 'i': a->i = (a->i<<1) | v; break; - case 'r': a->r = (a->r<<1) | v; break; - case 's': a->s = (a->s<<1) | v; break; - case 'o': a->o = (a->o<<1) | v; break; - case 'h': a->h = (a->h<<1) | v; break; - case 'w': a->w = (a->w<<1) | v; break; - case 'x': a->x = (a->x<<1) | v; break; - default: - printf_log(LOG_NONE, "Warning, printer mask use unhandled '%c'\n", *mask); - } - mask++; - --i; - } - - return 1; -} - -int64_t signExtend(uint32_t val, int sz) -{ - int64_t ret = val; - if((val>>(sz-1))&1) - ret |= (0xffffffffffffffffll<>30)&3; - int offset = signExtend(imm, 9); - snprintf(buff, sizeof(buff), "LDR %s, [%s], %s0x%x", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); - return buff; - } - if(isMask(opcode, "1x111000010iiiiiiiii11nnnnnttttt", &a)) { - int size = (opcode>>30)&3; - int offset = signExtend(imm, 9); - snprintf(buff, sizeof(buff), "LDR %s, [%s, %s0x%x]!", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); - return buff; - } - if(isMask(opcode, "1x11100101iiiiiiiiiiiinnnnnttttt", &a)) { - int size = (opcode>>30)&3; - int offset = (imm)<>30)&1)?3:2; - int offset = signExtend(imm, 19)<<2; - snprintf(buff, sizeof(buff), "LDR %s, [#%+d]\t;%p", (size==2)?Wt[Rt]:Xt[Rt], offset, (void*)(addr+offset)); - return buff; - } - if(isMask(opcode, "10011000iiiiiiiiiiiiiiiiiiittttt", &a)) { - int offset = signExtend(imm, 19)<<2; - snprintf(buff, sizeof(buff), "LDRSW %s, [#%+d]\t;%p", Xt[Rt], offset, (void*)(addr+offset)); - return buff; - } - if(isMask(opcode, "ff011100iiiiiiiiiiiiiiiiiiittttt", &a)) { - int offset = signExtend(imm, 19)<<2; - const char* Y[] = {"S", "D", "Q", "?"}; - snprintf(buff, sizeof(buff), "LDR %s%d, [#%+d]\t;%p", Y[sf], Rt, offset, (void*)(addr+offset)); - return buff; - } - if(isMask(opcode, "1x111000011mmmmmoooS10nnnnnttttt", &a)) { - int size = (opcode>>30)&3; - const char* extend[] = {"?0", "?1", "UXTW", "LSL", "?4", "?5", "SXTW", "SXTX"}; - int amount = size*a.S; - if(option==3 && !amount) - snprintf(buff, sizeof(buff), "LDR %s, [%s, %s]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm]); - else - snprintf(buff, sizeof(buff), "LDR %s, [%s, %s, %s %d]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm], extend[option], amount); - return buff; - } - if(isMask(opcode, "1x111000000iiiiiiiii01nnnnnttttt", &a)) { - int size = (opcode>>30)&3; - int offset = signExtend(imm, 9); - snprintf(buff, sizeof(buff), "STR %s, [%s], %s0x%x", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); - return buff; - } - if(isMask(opcode, "1x111000000iiiiiiiii11nnnnnttttt", &a)) { - int size = (opcode>>30)&3; - int offset = signExtend(imm, 9); - snprintf(buff, sizeof(buff), "STR %s, [%s, %s0x%x]!", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); - return buff; - } - if(isMask(opcode, "1x11100100iiiiiiiiiiiinnnnnttttt", &a)) { - int size = (opcode>>30)&3; - int offset = (imm)<>30)&3; - const char* extend[] = {"?0", "?1", "UXTW", "LSL", "?4", "?5", "SXTW", "SXTX"}; - int amount = size*a.S; - if(option==3 && !amount) - snprintf(buff, sizeof(buff), "STR %s, [%s, %s]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm]); - else - snprintf(buff, sizeof(buff), "STR %s, [%s, %s, %s %d]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm], extend[option], amount); - return buff; - } - if(isMask(opcode, "0x111000010iiiiiiiii01nnnnnttttt", &a)) { - int size = a.x; - int offset = signExtend(imm, 9); - snprintf(buff, sizeof(buff), "LDR%c %s, [%s], %s0x%x", size?'H':'B', Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); - return buff; - } - if(isMask(opcode, "0x111000010iiiiiiiii11nnnnnttttt", &a)) { - int size = a.x; - int offset = signExtend(imm, 9); - snprintf(buff, sizeof(buff), "LDR%c %s, [%s, %s0x%x]!", size?'H':'B', Xt[Rt], XtSp[Rn], (offset<0)?"-":"", abs(offset)); - return buff; - } - if(isMask(opcode, "0x11100101iiiiiiiiiiiinnnnnttttt", &a)) { - int size = a.x; - int offset = (imm)<=immr) - snprintf(buff, sizeof(buff), "UBFX %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms-immr+1); - else - snprintf(buff, sizeof(buff), "UBFM %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms); - - return buff; - } - - if(isMask(opcode, "f0011010110mmmmm001010nnnnnddddd", &a)) { - snprintf(buff, sizeof(buff), "ASR %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); - return buff; - } - - if(isMask(opcode, "f00100110Nrrrrrrssssssnnnnnddddd", &a)) { - if(sf && imms==0b111111) - snprintf(buff, sizeof(buff), "ASR %s, %s, %d", Xt[Rd], Xt[Rn], immr); - else if(!sf && imms==0b011111) - snprintf(buff, sizeof(buff), "ASR %s, %s, %d", Wt[Rd], Wt[Rn], immr); - else if(immr==0 && imms==0b000111) - snprintf(buff, sizeof(buff), "SXTB %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); - else if(immr==0 && imms==0b001111) - snprintf(buff, sizeof(buff), "SXTH %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); - else if(sf && immr==0 && imms==0b011111) - snprintf(buff, sizeof(buff), "SXTW %s, %s", Xt[Rd], Wt[Rn]); - else if(imms>=immr) - snprintf(buff, sizeof(buff), "SBFX %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms-immr+1); - else - snprintf(buff, sizeof(buff), "SBFM %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms); - return buff; - } - - if(isMask(opcode, "f00100111N0mmmmmssssssnnnnnddddd", &a)) { - if(Rn==Rm) - snprintf(buff, sizeof(buff), "ROR %s, %s, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], imms); - else - snprintf(buff, sizeof(buff), "EXTR %s, %s, %s, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], imms); - return buff; - } - - if(isMask(opcode, "f0011010110mmmmm001011nnnnnddddd", &a)) { - snprintf(buff, sizeof(buff), "ROR %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); - return buff; - } - - if(isMask(opcode, "f0011010110mmmmm001001nnnnnddddd", &a)) { - snprintf(buff, sizeof(buff), "LSR %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); - return buff; - } - - if(isMask(opcode, "f0011010110mmmmm001000nnnnnddddd", &a)) { - snprintf(buff, sizeof(buff), "LSL %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); - return buff; - } - - if(isMask(opcode, "f01100110Nrrrrrrssssssnnnnnddddd", &a)) { - if(imms>2, (void*)(addr + offset)); - return buff; - } - if(isMask(opcode, "000101iiiiiiiiiiiiiiiiiiiiiiiiii", &a)) { - int offset = signExtend(imm, 26)<<2; - snprintf(buff, sizeof(buff), "B #+%di\t; %p", offset>>2, (void*)(addr + offset)); - return buff; - } - if(isMask(opcode, "f0110100iiiiiiiiiiiiiiiiiiittttt", &a)) { - int offset = signExtend(imm, 19)<<2; - snprintf(buff, sizeof(buff), "CBZ %s, #%+di\t; %p", Xt[Rt], offset>>2, (void*)(addr + offset)); - return buff; - } - if(isMask(opcode, "f0110101iiiiiiiiiiiiiiiiiiittttt", &a)) { - int offset = signExtend(imm, 19)<<2; - snprintf(buff, sizeof(buff), "CBNZ %s, #%+di\t; %p", Xt[Rt], offset>>2, (void*)(addr + offset)); - return buff; - } - if(isMask(opcode, "f0110100iiiiiiiiiiiiiiiiiiittttt", &a)) { - int offset = signExtend(imm, 19)<<2; - snprintf(buff, sizeof(buff), "CBZ %s, #%+di\t; %p", Xt[Rt], offset>>2, (void*)(addr + offset)); - return buff; - } - if(isMask(opcode, "s0110110sssssiiiiiiiiiiiiiittttt", &a)) { - int offset = signExtend(imm, 14)<<2; - snprintf(buff, sizeof(buff), "TBZ %s, 0x%x, #%+di\t; %p", (imms<31)?Xt[Rt]:Wt[Rt], imms, offset>>2, (void*)(addr + offset)); - return buff; - } - if(isMask(opcode, "s0110111sssssiiiiiiiiiiiiiittttt", &a)) { - int offset = signExtend(imm, 14)<<2; - snprintf(buff, sizeof(buff), "TBNZ %s, 0x%x, #%+di\t; %p", (imms<31)?Xt[Rt]:Wt[Rt], imms, offset>>2, (void*)(addr + offset)); - return buff; - } - - if(isMask(opcode, "f0011010100mmmmmcccc01nnnnnddddd", &a)) { - if(Rm!=31 && (cond&0b1110)!=0b1110 && Rn!=31 && Rn==Rm) - snprintf(buff, sizeof(buff), "CINC %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond^1]); - else if(Rm==31 && (cond&0b1110)!=0b1110 && Rn==31) - snprintf(buff, sizeof(buff), "CSET %s,%s", sf?Xt[Rd]:Wt[Rd], conds[cond^1]); - else - snprintf(buff, sizeof(buff), "CSINC %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond]); - return buff; - } - - if(isMask(opcode, "f1011010100mmmmmcccc00nnnnnddddd", &a)) { - if(Rm!=31 && (cond&0b1110)!=0b1110 && Rn!=31 && Rn==Rm) - snprintf(buff, sizeof(buff), "CINV %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond^1]); - else if(Rm==31 && (cond&0b1110)!=0b1110 && Rn==31) - snprintf(buff, sizeof(buff), "CSETM %s,%s", sf?Xt[Rd]:Wt[Rd], conds[cond^1]); - else - snprintf(buff, sizeof(buff), "CSINV %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond]); - return buff; - } - - if(isMask(opcode, "f1011010100mmmmmcccc01nnnnnddddd", &a)) { - if((cond&0b1110)!=0b1110 && Rn==Rm) - snprintf(buff, sizeof(buff), "CNEG %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], conds[cond^1]); - else - snprintf(buff, sizeof(buff), "CSNEG %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond]); - return buff; - } - if(isMask(opcode, "f0011010100mmmmmcccc00nnnnnddddd", &a)) { - snprintf(buff, sizeof(buff), "CSEL %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], conds[cond]); - return buff; - } - // MISC Bits - if(isMask(opcode, "f10110101100000000010onnnnnddddd", &a)) { - snprintf(buff, sizeof(buff), "CL%c %s, %s", option?'S':'Z', sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); - return buff; - } - if(isMask(opcode, "f101101011000000000000nnnnnddddd", &a)) { - snprintf(buff, sizeof(buff), "RBIT %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); - return buff; - } - if(isMask(opcode, "f1011010110000000000oonnnnnddddd", &a)) { - if(!sf && option==2) - snprintf(buff, sizeof(buff), "REV %s, %s", Wt[Rd], Wt[Rn]); - else if (sf && option==3) - snprintf(buff, sizeof(buff), "REV %s, %s", Xt[Rd], Xt[Rn]); - else - snprintf(buff, sizeof(buff), "REV%d %s, %s", 8< nzcv - //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2 => fpcr - if(a.o==1 && a.p==3 && a.n==4 && a.m==2 && a.t2==0) - reg="nzcv"; - else if(a.o==1 && a.p==3 && a.n==4 && a.m==4 && a.t2==2) - reg="fpcr"; - - if(!reg) - snprintf(buff, sizeof(buff), "MSR S%d_%d_%d_%d_%d, %s", 2+a.o, a.p, a.n, a.m, a.t2, Xt[Rt]); - else - snprintf(buff, sizeof(buff), "MSR %s, %s", reg, Xt[Rt]); - return buff; - } - if(isMask(opcode, "110101010011opppnnnnmmmm222ttttt", &a)) { - const char* reg=NULL; - //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 => nzcv - //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2 => fpcr - if(a.o==1 && a.p==3 && a.n==4 && a.m==2 && a.t2==0) - reg="nzcv"; - else if(a.o==1 && a.p==3 && a.n==4 && a.m==4 && a.t2==2) - reg="fpcr"; - - if(!reg) - snprintf(buff, sizeof(buff), "MRS %s, S%d_%d_%d_%d_%d", Xt[Rt], 2+a.o, a.p, a.n, a.m, a.t2); - else - snprintf(buff, sizeof(buff), "MRS %s, %s", Xt[Rt], reg); - return buff; - } - - // ----------- NEON / FPU - - // VORR/VAND/VBIC/VORN - if(isMask(opcode, "0Q001110101mmmmm000111nnnnnddddd", &a)) { - char q = a.Q?'Q':'D'; - if(Rn==Rm) - snprintf(buff, sizeof(buff), "VMOV %c%d, %c%d", q, Rd, q, Rn); - else - snprintf(buff, sizeof(buff), "VORR %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); - return buff; - } - if(isMask(opcode, "0Q001110111mmmmm000111nnnnnddddd", &a)) { - char q = a.Q?'Q':'D'; - snprintf(buff, sizeof(buff), "VORN %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); - return buff; - } - if(isMask(opcode, "0Q001110001mmmmm000111nnnnnddddd", &a)) { - char q = a.Q?'Q':'D'; - snprintf(buff, sizeof(buff), "VAND %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); - return buff; - } - if(isMask(opcode, "0Q001110011mmmmm000111nnnnnddddd", &a)) { - char q = a.Q?'Q':'D'; - snprintf(buff, sizeof(buff), "VBIC %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); - return buff; - } - // UMOV - if(isMask(opcode, "0Q001110000rrrrr001111nnnnnddddd", &a)) { - char q = a.Q?'Q':'D'; - char s = '?'; - int sz=0; - if(a.Q==0 && immr&1) {s='B'; sz=0; } - else if(a.Q==0 && (immr&3)==2) {s='H'; sz=1; } - else if(a.Q==0 && (immr&7)==4) {s='S'; sz=2; } - else if(a.Q==1 && (immr&15)==8) {s='D'; sz=3; } - int index = (immr)>>(sz+1); - if(sz>2) - snprintf(buff, sizeof(buff), "MOV %s, %c%d.%c[%d]", a.Q?Xt[Rd]:Wt[Rd], q, Rn, s, index); - else - snprintf(buff, sizeof(buff), "UMOV %s, %c%d.%c[%d]", a.Q?Xt[Rd]:Wt[Rd], q, Rn, s, index); - return buff; - } - // VEOR - if(isMask(opcode, "0Q101110001mmmmm000111nnnnnddddd", &a)) { - char q = a.Q?'Q':'D'; - snprintf(buff, sizeof(buff), "VEOR %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); - return buff; - } - - // VADD / VSUB - if(isMask(opcode, "0QU01110ff1mmmmm100001nnnnnddddd", &a)) { - const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; - const char* Vd = Y[((sf)<<1) | a.Q]; - snprintf(buff, sizeof(buff), "V%s V%d.%s, V%d.%s, V%d.%s", a.U?"SUB":"ADD", Rd, Vd, Rn, Vd, Rm, Vd); - return buff; - } - - // VMUL - if(isMask(opcode, "0Q001110ff1mmmmm100111nnnnnddddd", &a)) { - const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; - const char* Vd = Y[((sf)<<1) | a.Q]; - snprintf(buff, sizeof(buff), "VMUL V%d.%s, V%d.%s, V%d.%s", Rd, Vd, Rn, Vd, Rm, Vd); - return buff; - } - // CMP - if(isMask(opcode, "0Q101110ff1mmmmm100011nnnnnddddd", &a)) { - const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; - const char* Vd = Y[((sf)<<1) | a.Q]; - snprintf(buff, sizeof(buff), "VCMEQ V%d.%s, V%d.%s, V%d.%s", Rd, Vd, Rn, Vd, Rm, Vd); - return buff; - } - - // Shift - if(isMask(opcode, "0QU011110hhhhrrr000001nnnnnddddd", &a)) { - const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; - const char* Vd ="??"; - int s = 0; - if(shift==0b0001) {Vd = Y[a.Q]; s=16-((shift)<<3 | immr);} - else if((shift&0b1110)==0b0010) {Vd = Y[2+a.Q]; s=32-((shift)<<3 | immr);} - else if((shift&0b1100)==0b0100) {Vd = Y[4+a.Q]; s=64-((shift)<<3 | immr);} - else if((shift&0b1000)==0b1000) {Vd = Y[6+a.Q]; s=128-((shift)<<3 | immr);} - snprintf(buff, sizeof(buff), "%cSHR V%d.%s, V%d.%s, #%d", a.U?'U':'S', Rd, Vd, Rn, Vd, s); - return buff; - } - - // INS - if(isMask(opcode, "01101110000rrrrr0ssss1nnnnnddddd", &a)) { - char s = '?'; - int idx1=0, idx2=0; - if(immr&1) {s='B'; idx1=(immr)>>1; idx2 = imms; } - else if((immr&3)==2) {s='H'; idx1=(immr)>>2; idx2=(imms)>>1;} - else if((immr&7)==4) {s='S'; idx1=(immr)>>3; idx2=(imms)>>2;} - else if((immr&15)==8) {s='D'; idx1=(immr)>>4; idx2=(imms)>>3;} - snprintf(buff, sizeof(buff), "INS V%d.%c[%d], V%d.%c[%d]", Rd, s, idx1, Rn, s, idx2); - return buff; - } - if(isMask(opcode, "01001110000rrrrr000111nnnnnddddd", &a)) { - char s = '?', R = 0; - int idx1=0; - if(immr&1) {s='B'; idx1=(immr)>>1; } - else if((immr&3)==2) {s='H'; idx1=(immr)>>2;} - else if((immr&7)==4) {s='S'; idx1=(immr)>>3;} - else if((immr&15)==8) {s='D'; idx1=(immr)>>4; R=1;} - snprintf(buff, sizeof(buff), "INS V%d.%c[%d], %s", Rd, s, idx1, R?Xt[Rn]:Wt[Rn]); - return buff; - } - - // ADR - if(isMask(opcode, "0ss10000iiiiiiiiiiiiiiiiiiiddddd", &a)) { - snprintf(buff, sizeof(buff), "ADR, %s, %ld", Xt[Rd], signExtend((imm)<<2|(imms), 20)); - return buff; - } - - // LDR / STR - if(isMask(opcode, "ss111101cciiiiiiiiiiiinnnnnttttt", &a)) { - char s = '?'; - int size=imms; - int op=0; - if(size==0 && opc==1) {s='B';} - else if(size==1 && opc==1) {s='H';} - else if(size==2 && opc==1) {s='S';} - else if(size==3 && opc==1) {s='D';} - else if(size==0 && opc==3) {s='Q'; size = 4;} - else if(size==0 && opc==0) {s='B'; op=1;} - else if(size==1 && opc==0) {s='H'; op=1;} - else if(size==2 && opc==0) {s='S'; op=1;} - else if(size==3 && opc==0) {s='D'; op=1;} - else if(size==0 && opc==2) {s='Q'; op=1; size = 4;} - - int offset = imm<>1); break; - case 2: if(!(sf&1)) - idx = (a.Q<<1) | a.S; - else { - scale = 3; - idx = a.Q; - } - break; - } - snprintf(buff, sizeof(buff), "%s1 {V%d.%s}[%d], [%s]", a.L?"LD":"ST", Rt, Y[scale], idx, XtSp[Rn]); - return buff; - } - // LDUR/STUR - if(isMask(opcode, "ff111100cL0iiiiiiiii00nnnnnttttt", &a)) { - const char* Y[] = {"B", "H", "S", "D", "Q"}; - int sz = sf; - if(sz==0 && a.c) - sz = 4; - int offset = signExtend(imm, 9); - if(!offset) - snprintf(buff, sizeof(buff), "%sUR %s%d, [%s]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn]); - else - snprintf(buff, sizeof(buff), "%sUR %s%d, [%s, %+d]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn], imm); - return buff; - } - // LDR/STR vector immediate - if(isMask(opcode, "ff111101cLiiiiiiiiiiiinnnnnttttt", &a)) { - const char* Y[] = {"B", "H", "S", "D", "Q"}; - int sz = sf; - if(sz==0 && a.c) - sz = 4; - int offset = imm< register - ldp x10, x11, [x0, (8 * 0)] - ldp x12, x13, [x0, (8 * 2)] - ldp x14, x15, [x0, (8 * 4)] - ldp x16, x17, [x0, (8 * 6)] - ldp x18, x19, [x0, (8 * 8)] - ldp x20, x21, [x0, (8 * 10)] - ldp x22, x23, [x0, (8 * 12)] - ldp x24, x25, [x0, (8 * 14)] - ldp x26, x27, [x0, (8 * 16)] - //jump to function - br x1 diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c index 8906db51..6bc61bf3 100755 --- a/src/dynarec/dynablock.c +++ b/src/dynarec/dynablock.c @@ -19,12 +19,10 @@ #include "dynablock_private.h" #include "dynarec_private.h" #include "elfloader.h" -#ifdef ARM64 -#include "dynarec_arm64.h" -#include "arm64_lock.h" -#else -#error Unsupported architecture! -#endif + +#include "dynarec_native.h" +#include "native_lock.h" + #include "custommem.h" #include "khash.h" @@ -74,13 +72,13 @@ void FreeDynablock(dynablock_t* db, int need_lock) if(db->parent->direct) { uintptr_t addr = (uintptr_t)db->x64_addr; if(addr>=startdb && addrparent->direct[addr-startdb], 0); // secured write + native_lock_xchg(&db->parent->direct[addr-startdb], 0); // secured write } // remove jumptable setJumpTableDefault64(db->x64_addr); // remove and free the sons for (int i=0; isons_size; ++i) { - dynablock_t *son = (dynablock_t*)arm64_lock_xchg(&db->sons[i], 0); + dynablock_t *son = (dynablock_t*)native_lock_xchg(&db->sons[i], 0); FreeDynablock(son, 0); } // only the father free the DynarecMap @@ -189,7 +187,7 @@ int FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t si end = enddb; if(end>startdb && startdirect[i-startdb], 0); + db = (dynablock_t*)native_lock_xchg(&dynablocks->direct[i-startdb], 0); if(db) { if(db->father) db = db->father; @@ -274,7 +272,7 @@ dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* c pthread_mutex_lock(&my_context->mutex_dyndump); if(!dynablocks->direct) { dynablock_t** p = (dynablock_t**)calloc(dynablocks->textsz, sizeof(dynablock_t*)); - if(arm64_lock_storeifnull(&dynablocks->direct, p)!=p) + if(native_lock_storeifnull(&dynablocks->direct, p)!=p) free(p); // someone already create the direct array, too late... } @@ -283,7 +281,7 @@ dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* c block = (dynablock_t*)calloc(1, sizeof(dynablock_t)); block->parent = dynablocks; - dynablock_t* tmp = (dynablock_t*)arm64_lock_storeifnull(&dynablocks->direct[addr-dynablocks->text], block); + dynablock_t* tmp = (dynablock_t*)native_lock_storeifnull(&dynablocks->direct[addr-dynablocks->text], block); if(tmp != block) { // a block appeard! pthread_mutex_unlock(&my_context->mutex_dyndump); @@ -353,7 +351,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t pthread_mutex_unlock(&my_context->mutex_dyndump); if(!ret) { dynarec_log(LOG_DEBUG, "Fillblock of block %p for %p returned an error\n", block, (void*)addr); - void* old = (void*)arm64_lock_storeifref(&dynablocks->direct[addr-dynablocks->text], 0, block); + void* old = (void*)native_lock_storeifref(&dynablocks->direct[addr-dynablocks->text], 0, block); if(old!=block && old) {// put it back in place, strange things are happening here! dynarec_log(LOG_INFO, "Warning, a wild block appeared at %p: %p\n", (void*)addr, old); // doing nothing else, the block has not be writen diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h new file mode 100755 index 00000000..ba059a90 --- /dev/null +++ b/src/dynarec/dynarec_arch.h @@ -0,0 +1,24 @@ +#ifndef __DYNAREC_ARCH__H_ +#define __DYNAREC_ARCH__H_ + +#ifdef ARM64 +#include "arm64/dynarec_arm64_private.h" +#include "arm64/dynarec_arm64_functions.h" + +#define instruction_native_t instruction_arm64_t + +uintptr_t arm_pass0(dynarec_arm_t* dyn, uintptr_t addr); +uintptr_t arm_pass1(dynarec_arm_t* dyn, uintptr_t addr); +uintptr_t arm_pass2(dynarec_arm_t* dyn, uintptr_t addr); +uintptr_t arm_pass3(dynarec_arm_t* dyn, uintptr_t addr); + +#define native_pass0 arm_pass0 +#define native_pass1 arm_pass1 +#define native_pass2 arm_pass2 +#define native_pass3 arm_pass3 + +#else +#error Unsupported platform +#endif + +#endif //__DYNAREC_ARCH__H_ \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64.c b/src/dynarec/dynarec_arm64.c deleted file mode 100755 index 18e05bce..00000000 --- a/src/dynarec/dynarec_arm64.c +++ /dev/null @@ -1,541 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "custommem.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "tools/bridge_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynablock.h" -#include "dynablock_private.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "dynarec_arm64_functions.h" -#include "elfloader.h" - -void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name) { - uint8_t *ip = (uint8_t*)inst->addr; - if(ip[0]==0xcc && ip[1]=='S' && ip[2]=='C') { - uintptr_t a = *(uintptr_t*)(ip+3); - if(a==0) { - dynarec_log(LOG_NONE, "%s%p: Exit x64emu%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (box64_dynarec_dump>1)?"\e[m":""); - } else { - dynarec_log(LOG_NONE, "%s%p: Native call to %p%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (void*)a, (box64_dynarec_dump>1)?"\e[m":""); - } - } else { - if(dec) { - dynarec_log(LOG_NONE, "%s%p: %s", (box64_dynarec_dump>1)?"\e[1m":"", ip, DecodeX64Trace(dec, inst->addr)); - } else { - dynarec_log(LOG_NONE, "%s%p: ", (box64_dynarec_dump>1)?"\e[1m":"", ip); - for(int i=0; isize; ++i) { - dynarec_log(LOG_NONE, "%02X ", ip[i]); - } - dynarec_log(LOG_NONE, " %s", name); - } - // print Call function name if possible - if(ip[0]==0xE8 || ip[0]==0xE9) { // Call / Jmp - uintptr_t nextaddr = (uintptr_t)ip + 5 + *((int32_t*)(ip+1)); - printFunctionAddr(nextaddr, "=> "); - } else if(ip[0]==0xFF) { - if(ip[1]==0x25) { - uintptr_t nextaddr = (uintptr_t)ip + 6 + *((int32_t*)(ip+2)); - printFunctionAddr(nextaddr, "=> "); - } - } - // end of line and colors - dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); - } -} - -void add_next(dynarec_arm_t *dyn, uintptr_t addr) { - if(!box64_dynarec_bigblock) - return; - for(int i=0; inext_sz; ++i) - if(dyn->next[i]==addr) - return; - if(dyn->next_sz == dyn->next_cap) { - dyn->next_cap += 16; - dyn->next = (uintptr_t*)realloc(dyn->next, dyn->next_cap*sizeof(uintptr_t)); - } - dyn->next[dyn->next_sz++] = addr; -} -uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr) { - // get closest, but no addresses befores - uintptr_t best = 0; - int i = 0; - while((inext_sz) && (best!=addr)) { - if(dyn->next[i]next+i, dyn->next+i+1, (dyn->next_sz-i-1)*sizeof(uintptr_t)); - --dyn->next_sz; - } else { - if((dyn->next[i]next[i]; - ++i; - } - } - return best; -} -#define PK(A) (*((uint8_t*)(addr+(A)))) -int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n) -{ - if(!n) - return 1; - if(PK(0)==0x90) - return is_nops(dyn, addr+1, n-1); - if(n>1 && PK(0)==0x66) // if opcode start with 0x66, and there is more after, than is *can* be a NOP - return is_nops(dyn, addr+1, n-1); - if(n>2 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x00) - return is_nops(dyn, addr+3, n-3); - if(n>2 && PK(0)==0x8d && PK(1)==0x76 && PK(2)==0x00) // lea esi, [esi] - return is_nops(dyn, addr+3, n-3); - if(n>3 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x40 && PK(3)==0x00) - return is_nops(dyn, addr+4, n-4); - if(n>3 && PK(0)==0x8d && PK(1)==0x74 && PK(2)==0x26 && PK(3)==0x00) - return is_nops(dyn, addr+4, n-4); - if(n>4 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x44 && PK(3)==0x00 && PK(4)==0x00) - return is_nops(dyn, addr+5, n-5); - if(n>5 && PK(0)==0x8d && PK(1)==0xb6 && PK(2)==0x00 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00) - return is_nops(dyn, addr+6, n-6); - if(n>6 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x80 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) - return is_nops(dyn, addr+7, n-7); - if(n>6 && PK(0)==0x8d && PK(1)==0xb4 && PK(2)==0x26 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) // lea esi, [esi+0] - return is_nops(dyn, addr+7, n-7); - if(n>7 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x84 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00 && PK(7)==0x00) - return is_nops(dyn, addr+8, n-8); - return 0; -} - -// return size of next instuciton, -1 is unknown -// not all instrction are setup -int next_instruction(dynarec_arm_t *dyn, uintptr_t addr) -{ - uint8_t opcode = PK(0); - uint8_t nextop; - switch (opcode) { - case 0x66: - opcode = PK(1); - switch(opcode) { - case 0x90: - return 2; - } - break; - case 0x81: - nextop = PK(1); - return fakeed(dyn, addr+2, 0, nextop)-addr + 4; - case 0x83: - nextop = PK(1); - return fakeed(dyn, addr+2, 0, nextop)-addr + 1; - case 0x84: - case 0x85: - case 0x88: - case 0x89: - case 0x8A: - case 0x8B: - case 0x8C: - case 0x8D: - case 0x8E: - case 0x8F: - nextop = PK(1); - return fakeed(dyn, addr+2, 0, nextop)-addr; - case 0x50: - case 0x51: - case 0x52: - case 0x53: - case 0x54: - case 0x55: - case 0x56: - case 0x57: - case 0x58: - case 0x59: - case 0x5A: - case 0x5B: - case 0x5C: - case 0x5D: - case 0x5E: - case 0x5F: - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - case 0x98: - case 0x99: - case 0x9B: - case 0x9C: - case 0x9D: - case 0x9E: - case 0x9F: - return 1; - case 0xA0: - case 0xA1: - case 0xA2: - case 0xA3: - return 5; - case 0xB0: - case 0xB1: - case 0xB2: - case 0xB3: - case 0xB4: - case 0xB5: - case 0xB6: - case 0xB7: - return 2; - case 0xB8: - case 0xB9: - case 0xBA: - case 0xBB: - case 0xBC: - case 0xBD: - case 0xBE: - case 0xBF: - return 5; - case 0xFF: - nextop = PK(1); - switch((nextop>>3)&7) { - case 0: // INC Ed - case 1: //DEC Ed - case 2: // CALL Ed - case 4: // JMP Ed - case 6: // Push Ed - return fakeed(dyn, addr+2, 0, nextop)-addr; - } - break; - default: - break; - } - return -1; -} -#undef PK - -int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n) -{ - int i = 0; - while(isize) - return X_PEND; // no more instructions, or too many jmp loop, stop - - uint32_t needed = dyn->insts[ninst].x64.use_flags; - if(needed) { - setf &= ~needed; - if(!setf) // all flags already used, no need to continue - return needed; - } - - if(!needed && !dyn->insts[ninst].x64.set_flags && !dyn->insts[ninst].x64.jmp_insts) { - int start = ninst; - int end = ninst; - while(endsize && !dyn->insts[end].x64.use_flags && !dyn->insts[end].x64.set_flags && !dyn->insts[end].x64.jmp_insts) - ++end; - needed = needed_flags(dyn, end, setf, recurse); - for(int i=start; iinsts[i].x64.need_flags = needed; - return needed; - } - - if(dyn->insts[ninst].x64.set_flags && (dyn->insts[ninst].x64.state_flags!=SF_MAYSET)) { - if((setf & ~dyn->insts[ninst].x64.set_flags) == 0) - return needed; // all done, gives all the flags needed - setf |= dyn->insts[ninst].x64.set_flags; // add new flags to continue - } - - int jinst = dyn->insts[ninst].x64.jmp_insts; - if(dyn->insts[ninst].x64.jmp) { - dyn->insts[ninst].x64.need_flags = (jinst==-1)?X_PEND:needed_flags(dyn, jinst, setf, recurse+1); - if(dyn->insts[ninst].x64.use_flags) // conditionnal jump - dyn->insts[ninst].x64.need_flags |= needed_flags(dyn, ninst+1, setf, recurse); - } else - dyn->insts[ninst].x64.need_flags = needed_flags(dyn, ninst+1, setf, recurse); - if(dyn->insts[ninst].x64.state_flags==SF_MAYSET) - needed |= dyn->insts[ninst].x64.need_flags; - else - needed |= (dyn->insts[ninst].x64.need_flags & ~dyn->insts[ninst].x64.set_flags); - if(needed == (X_PEND|X_ALL)) - needed = X_ALL; - return needed; -} - -instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, int arm_size) -{ - // x64 instruction is <16 bytes - int toadd; - if(x64_size>arm_size) - toadd = 1 + x64_size/15; - else - toadd = 1 + arm_size/15; - if((*size)+toadd>(*cap)) { - *cap = (*size)+toadd; - insts = (instsize_t*)realloc(insts, (*cap)*sizeof(instsize_t)); - } - while(toadd) { - if(x64_size>15) - insts[*size].x64 = 15; - else - insts[*size].x64 = x64_size; - x64_size -= insts[*size].x64; - if(arm_size>15) - insts[*size].nat = 15; - else - insts[*size].nat = arm_size; - arm_size -= insts[*size].nat; - ++(*size); - --toadd; - } - return insts; -} - -// add a value to table64 (if needed) and gives back the imm19 to use in LDR_literal -int Table64(dynarec_arm_t *dyn, uint64_t val) -{ - // find the value if already present - int idx = -1; - for(int i=0; itable64size && (idx==-1); ++i) - if(dyn->table64[i] == val) - idx = i; - // not found, add it - if(idx==-1) { - if(dyn->table64size == dyn->table64cap) { - dyn->table64cap+=4; - dyn->table64 = (uint64_t*)realloc(dyn->table64, dyn->table64cap * sizeof(uint64_t)); - } - idx = dyn->table64size++; - dyn->table64[idx] = val; - } - // calculate offset - int delta = dyn->tablestart + idx*sizeof(uint64_t) - (uintptr_t)dyn->block; - return delta; -} - - -uintptr_t arm_pass0(dynarec_arm_t* dyn, uintptr_t addr); -uintptr_t arm_pass1(dynarec_arm_t* dyn, uintptr_t addr); -uintptr_t arm_pass2(dynarec_arm_t* dyn, uintptr_t addr); -uintptr_t arm_pass3(dynarec_arm_t* dyn, uintptr_t addr); - -__thread void* current_helper = NULL; - -void CancelBlock64() -{ - dynarec_arm_t* helper = (dynarec_arm_t*)current_helper; - current_helper = NULL; - if(!helper) - return; - free(helper->next); - free(helper->insts); - free(helper->table64); - free(helper->sons_x64); - free(helper->sons_arm); - if(helper->dynablock && helper->dynablock->block) - FreeDynarecMap(helper->dynablock, (uintptr_t)helper->dynablock->block, helper->dynablock->size); -} - -void* FillBlock64(dynablock_t* block, uintptr_t addr) { - if(IsInHotPage(addr)) { - dynarec_log(LOG_DEBUG, "Cancelling dynarec FillBlock on hotpage for %p\n", (void*)addr); - return NULL; - } - if(addr>=box64_nodynarec_start && addrdone = 1; - return (void*)block; - } - // protect the 1st page - protectDB(addr, 1); - // init the helper - dynarec_arm_t helper = {0}; - current_helper = &helper; - helper.dynablock = block; - helper.start = addr; - uintptr_t start = addr; - helper.cap = 64; // needs epilog handling - helper.insts = (instruction_arm64_t*)calloc(helper.cap, sizeof(instruction_arm64_t)); - // pass 0, addresses, x86 jump addresses, overall size of the block - uintptr_t end = arm_pass0(&helper, addr); - // no need for next anymore - free(helper.next); - helper.next_sz = helper.next_cap = 0; - helper.next = NULL; - // basic checks - if(!helper.size) { - dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr); - CancelBlock64(); - return (void*)block; - } - if(!isprotectedDB(addr, 1)) { - dynarec_log(LOG_INFO, "Warning, write on current page on pass0, aborting dynablock creation (%p)\n", (void*)addr); - CancelBlock64(); - return NULL; - } - // protect the block of it goes over the 1st page - if((addr&~0xfff)!=(end&~0xfff)) // need to protect some other pages too - protectDB(addr, end-addr); //end is 1byte after actual end - // compute hash signature - uint32_t hash = X31_hash_code((void*)addr, end-addr); - // calculate barriers - for(int i=0; i=end) - helper.insts[i].x64.jmp_insts = -1; - else { - // find jump address instruction - int k=-1; - for(int i2=0; (i21)?"\e[01;36m":"", GetTID(), helper.arm_size, helper.isize); - printFunctionAddr(helper.start, " => "); - dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); - } - int oldtable64size = helper.table64size; - size_t oldarmsize = helper.arm_size; - helper.arm_size = 0; - helper.table64size = 0; // reset table64 (but not the cap) - arm_pass3(&helper, addr); - if((oldarmsize!=helper.arm_size) || (oldtable64size %d\n", helper.insts[i].size2, helper.insts[i].size); - } - printf_log(LOG_NONE, "Table64 \t%d -> %d\n", oldtable64size*8, helper.table64size*8); - printf_log(LOG_NONE, " ------------\n"); - //TODO: Cancel block and return empty one - } - // add table64 if needed - if(helper.table64size) { - memcpy((void*)helper.tablestart, helper.table64, helper.table64size*8); - } - // all done... - __clear_cache(p, p+sz); // need to clear the cache before execution... - // keep size of instructions for signal handling - { - size_t cap = 1; - for(int i=0; ihelper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15; - size_t size = 0; - block->instsize = (instsize_t*)calloc(cap, sizeof(instsize_t)); - for(int i=0; iinstsize = addInst(block->instsize, &size, &cap, helper.insts[i].x64.size, helper.insts[i].size/4); - block->instsize = addInst(block->instsize, &size, &cap, 0, 0); // add a "end of block" mark, just in case - } - // ok, free the helper now - free(helper.insts); - helper.insts = NULL; - free(helper.table64); - helper.table64 = NULL; - block->size = sz; - block->isize = helper.size; - block->block = p; - block->need_test = 0; - //block->x64_addr = (void*)start; - block->x64_size = end-start; - block->hash = X31_hash_code(block->x64_addr, block->x64_size); - // Check if something changed, to abbort if it as - if((block->hash != hash)) { - dynarec_log(LOG_INFO, "Warning, a block changed while beeing processed hash(%p:%ld)=%x/%x\n", block->x64_addr, block->x64_size, block->hash, hash); - CancelBlock64(); - return NULL; - } // fill sons if any - if(!isprotectedDB(addr, end-addr)) { - dynarec_log(LOG_INFO, "Warning, block unprotected while beeing processed %p:%ld, cancelling\n", block->x64_addr, block->x64_size); - CancelBlock64(); - return NULL; - //protectDB(addr, end-addr); - } - dynablock_t** sons = NULL; - int sons_size = 0; - if(helper.sons_size) { - sons = (dynablock_t**)calloc(helper.sons_size, sizeof(dynablock_t*)); - for (int i=0; iparent, helper.sons_x64[i], &created); - if(created) { // avoid breaking a working block! - son->block = helper.sons_arm[i]; - son->x64_addr = (void*)helper.sons_x64[i]; - son->x64_size = end-helper.sons_x64[i]; - if(!son->x64_size) {printf_log(LOG_NONE, "Warning, son with null x64 size! (@%p / ARM=%p)", son->x64_addr, son->block);} - son->father = block; - son->size = sz + son->block - block->block; // update size count, for debugging - //son->done = 1; - if(!son->parent) - son->parent = block->parent; - sons[sons_size] = son; - ++sons_size; - } - } - if(sons_size) { - block->sons = sons; - block->sons_size = sons_size; - } else - free(sons); - } - free(helper.sons_x64); - helper.sons_x64 = NULL; - free(helper.sons_arm); - helper.sons_arm = NULL; - current_helper = NULL; - //block->done = 1; - return (void*)block; -} diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c deleted file mode 100755 index 28720e31..00000000 --- a/src/dynarec/dynarec_arm64_00.c +++ /dev/null @@ -1,2453 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "bridge.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -int isSimpleWrapper(wrapper_t fun); - -uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - uint8_t nextop, opcode; - uint8_t gd, ed; - int8_t i8; - int32_t i32, tmp; - int64_t i64, j64; - uint8_t u8; - uint8_t gb1, gb2, eb1, eb2; - uint32_t u32; - uint64_t u64; - uint8_t wback, wb1, wb2, wb; - int64_t fixedaddress; - - opcode = F8; - MAYUSE(eb1); - MAYUSE(eb2); - MAYUSE(j64); - MAYUSE(wb); - - switch(opcode) { - case 0x00: - INST_NAME("ADD Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x1, 0); - GETGB(x2); - emit_add8(dyn, ninst, x1, x2, x4, x5); - EBBACK; - break; - case 0x01: - INST_NAME("ADD Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_add32(dyn, ninst, rex, ed, gd, x3, x4); - WBACK; - break; - case 0x02: - INST_NAME("ADD Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x2, 0); - GETGB(x1); - emit_add8(dyn, ninst, x1, x2, x3, x4); - GBBACK; - break; - case 0x03: - INST_NAME("ADD Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_add32(dyn, ninst, rex, gd, ed, x3, x4); - break; - case 0x04: - INST_NAME("ADD AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - u8 = F8; - UXTBw(x1, xRAX); - emit_add8c(dyn, ninst, x1, u8, x3, x4); - BFIx(xRAX, x1, 0, 8); - break; - case 0x05: - INST_NAME("ADD EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5); - break; - - case 0x08: - INST_NAME("OR Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x1, 0); - GETGB(x2); - emit_or8(dyn, ninst, x1, x2, x4, x2); - EBBACK; - break; - case 0x09: - INST_NAME("OR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_or32(dyn, ninst, rex, ed, gd, x3, x4); - WBACK; - break; - case 0x0A: - INST_NAME("OR Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x2, 0); - GETGB(x1); - emit_or8(dyn, ninst, x1, x2, x3, x4); - GBBACK; - break; - case 0x0B: - INST_NAME("OR Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_or32(dyn, ninst, rex, gd, ed, x3, x4); - break; - case 0x0C: - INST_NAME("OR AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - u8 = F8; - UXTBw(x1, xRAX); - emit_or8c(dyn, ninst, x1, u8, x3, x4); - BFIx(xRAX, x1, 0, 8); - break; - case 0x0D: - INST_NAME("OR EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - emit_or32c(dyn, ninst, rex, xRAX, i64, x3, x4); - break; - - case 0x0F: - switch(rep) { - case 1: - addr = dynarec64_F20F(dyn, addr, ip, ninst, rex, ok, need_epilog); - break; - case 2: - addr = dynarec64_F30F(dyn, addr, ip, ninst, rex, ok, need_epilog); - break; - default: - addr = dynarec64_0F(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - } - break; - case 0x10: - INST_NAME("ADC Eb, Gb"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x1, 0); - GETGB(x2); - emit_adc8(dyn, ninst, x1, x2, x4, x5); - EBBACK; - break; - case 0x11: - INST_NAME("ADC Ed, Gd"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_adc32(dyn, ninst, rex, ed, gd, x3, x4); - WBACK; - break; - case 0x12: - INST_NAME("ADC Gb, Eb"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x2, 0); - GETGB(x1); - emit_adc8(dyn, ninst, x1, x2, x4, x3); - GBBACK; - break; - case 0x13: - INST_NAME("ADC Gd, Ed"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_adc32(dyn, ninst, rex, gd, ed, x3, x4); - break; - case 0x14: - INST_NAME("ADC AL, Ib"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - u8 = F8; - UXTBw(x1, xRAX); - emit_adc8c(dyn, ninst, x1, u8, x3, x4, x5); - BFIx(xRAX, x1, 0, 8); - break; - case 0x15: - INST_NAME("ADC EAX, Id"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - MOV64xw(x1, i64); - emit_adc32(dyn, ninst, rex, xRAX, x1, x3, x4); - break; - - case 0x18: - INST_NAME("SBB Eb, Gb"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x1, 0); - GETGB(x2); - emit_sbb8(dyn, ninst, x1, x2, x4, x5); - EBBACK; - break; - case 0x19: - INST_NAME("SBB Ed, Gd"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_sbb32(dyn, ninst, rex, ed, gd, x3, x4); - WBACK; - break; - case 0x1A: - INST_NAME("SBB Gb, Eb"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x2, 0); - GETGB(x1); - emit_sbb8(dyn, ninst, x1, x2, x3, x4); - GBBACK; - break; - case 0x1B: - INST_NAME("SBB Gd, Ed"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_sbb32(dyn, ninst, rex, gd, ed, x3, x4); - break; - case 0x1C: - INST_NAME("SBB AL, Ib"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - u8 = F8; - UXTBw(x1, xRAX); - emit_sbb8c(dyn, ninst, x1, u8, x3, x4, x5); - BFIx(xRAX, x1, 0, 8); - break; - case 0x1D: - INST_NAME("SBB EAX, Id"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - MOV64xw(x2, i64); - emit_sbb32(dyn, ninst, rex, xRAX, x2, x3, x4); - break; - - case 0x20: - INST_NAME("AND Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x1, 0); - GETGB(x2); - emit_and8(dyn, ninst, x1, x2, x4, x5); - EBBACK; - break; - case 0x21: - INST_NAME("AND Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_and32(dyn, ninst, rex, ed, gd, x3, x4); - WBACK; - break; - case 0x22: - INST_NAME("AND Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x2, 0); - GETGB(x1); - emit_and8(dyn, ninst, x1, x2, x3, x4); - GBBACK; - break; - case 0x23: - INST_NAME("AND Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_and32(dyn, ninst, rex, gd, ed, x3, x4); - break; - case 0x24: - INST_NAME("AND AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - u8 = F8; - UXTBw(x1, xRAX); - emit_and8c(dyn, ninst, x1, u8, x3, x4); - BFIx(xRAX, x1, 0, 8); - break; - case 0x25: - INST_NAME("AND EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - emit_and32c(dyn, ninst, rex, xRAX, i64, x3, x4); - break; - - case 0x28: - INST_NAME("SUB Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x1, 0); - GETGB(x2); - emit_sub8(dyn, ninst, x1, x2, x4, x5); - EBBACK; - break; - case 0x29: - INST_NAME("SUB Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_sub32(dyn, ninst, rex, ed, gd, x3, x4); - WBACK; - break; - case 0x2A: - INST_NAME("SUB Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x2, 0); - GETGB(x1); - emit_sub8(dyn, ninst, x1, x2, x3, x4); - GBBACK; - break; - case 0x2B: - INST_NAME("SUB Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_sub32(dyn, ninst, rex, gd, ed, x3, x4); - break; - case 0x2C: - INST_NAME("SUB AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - u8 = F8; - UXTBw(x1, xRAX); - emit_sub8c(dyn, ninst, x1, u8, x3, x4, x5); - BFIx(xRAX, x1, 0, 8); - break; - case 0x2D: - INST_NAME("SUB EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - emit_sub32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5); - break; - case 0x2E: - INST_NAME("CS:"); - break; - - case 0x30: - INST_NAME("XOR Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x1, 0); - GETGB(x2); - emit_xor8(dyn, ninst, x1, x2, x4, x5); - EBBACK; - break; - case 0x31: - INST_NAME("XOR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_xor32(dyn, ninst, rex, ed, gd, x3, x4); - WBACK; - break; - case 0x32: - INST_NAME("XOR Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x2, 0); - GETGB(x1); - emit_xor8(dyn, ninst, x1, x2, x3, x4); - GBBACK; - break; - case 0x33: - INST_NAME("XOR Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_xor32(dyn, ninst, rex, gd, ed, x3, x4); - break; - case 0x34: - INST_NAME("XOR AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - u8 = F8; - UXTBw(x1, xRAX); - emit_xor8c(dyn, ninst, x1, u8, x3, x4); - BFIx(xRAX, x1, 0, 8); - break; - case 0x35: - INST_NAME("XOR EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4); - break; - case 0x36: - INST_NAME("SS:"); - break; - - case 0x38: - INST_NAME("CMP Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x1, 0); - GETGB(x2); - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - break; - case 0x39: - INST_NAME("CMP Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_cmp32(dyn, ninst, rex, ed, gd, x3, x4, x5); - break; - case 0x3A: - INST_NAME("CMP Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEB(x2, 0); - GETGB(x1); - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - break; - case 0x3B: - INST_NAME("CMP Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - GETED(0); - emit_cmp32(dyn, ninst, rex, gd, ed, x3, x4, x5); - break; - case 0x3C: - INST_NAME("CMP AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - u8 = F8; - UXTBw(x1, xRAX); - if(u8) { - MOV32w(x2, u8); - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - } else { - emit_cmp8_0(dyn, ninst, x1, x3, x4); - } - break; - case 0x3D: - INST_NAME("CMP EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - if(i64) { - MOV64xw(x2, i64); - emit_cmp32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); - } else - emit_cmp32_0(dyn, ninst, rex, xRAX, x3, x4); - break; - - case 0x50: - case 0x51: - case 0x52: - case 0x53: - case 0x54: - case 0x55: - case 0x56: - case 0x57: - INST_NAME("PUSH reg"); - gd = xRAX+(opcode&0x07)+(rex.b<<3); - if(gd==xRSP) { - MOVx_REG(x1, gd); - gd = x1; - } - PUSH1(gd); - break; - case 0x58: - case 0x59: - case 0x5A: - case 0x5B: - case 0x5C: - case 0x5D: - case 0x5E: - case 0x5F: - INST_NAME("POP reg"); - gd = xRAX+(opcode&0x07)+(rex.b<<3); - if(gd == xRSP) { - POP1(x1); - MOVx_REG(gd, x1); - } else { - POP1(gd); - } - break; - - case 0x63: - INST_NAME("MOVSXD Gd, Ed"); - nextop = F8; - GETGD; - if(rex.w) { - if(MODREG) { // reg <= reg - SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - LDRSW_U12(gd, ed, fixedaddress); - } - } else { - if(MODREG) { // reg <= reg - MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - LDRw_U12(gd, ed, fixedaddress); - } - } - break; - case 0x64: - addr = dynarec64_64(dyn, addr, ip, ninst, rex, rep, _FS, ok, need_epilog); - break; - case 0x65: - addr = dynarec64_64(dyn, addr, ip, ninst, rex, rep, _GS, ok, need_epilog); - break; - case 0x66: - addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - case 0x67: - addr = dynarec64_67(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - case 0x68: - INST_NAME("PUSH Id"); - i64 = F32S; - if(PK(0)==0xC3) { - MESSAGE(LOG_DUMP, "PUSH then RET, using indirect\n"); - TABLE64(x3, ip+1); - LDRSW_U12(x1, x3, 0); - PUSH1(x1); - } else { - MOV64x(x3, i64); - PUSH1(x3); - } - break; - case 0x69: - INST_NAME("IMUL Gd, Ed, Id"); - SETFLAGS(X_ALL, SF_PENDING); - nextop = F8; - GETGD; - GETED(4); - i64 = F32S; - MOV64xw(x4, i64); - if(rex.w) { - // 64bits imul - UFLAG_IF { - SMULH(x3, ed, x4); - MULx(gd, ed, x4); - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x3, d_imul64); - } else { - MULxw(gd, ed, x4); - } - } else { - // 32bits imul - UFLAG_IF { - SMULL(gd, ed, x4); - UFLAG_RES(gd); - LSRx(x3, gd, 32); - UFLAG_OP1(x3); - UFLAG_DF(x3, d_imul32); - MOVw_REG(gd, gd); - } else { - MULxw(gd, ed, x4); - } - } - break; - case 0x6A: - INST_NAME("PUSH Ib"); - i64 = F8S; - MOV64x(x3, i64); - PUSH1(x3); - break; - case 0x6B: - INST_NAME("IMUL Gd, Ed, Ib"); - SETFLAGS(X_ALL, SF_PENDING); - nextop = F8; - GETGD; - GETED(1); - i64 = F8S; - MOV64xw(x4, i64); - if(rex.w) { - // 64bits imul - UFLAG_IF { - SMULH(x3, ed, x4); - MULx(gd, ed, x4); - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x3, d_imul64); - } else { - MULxw(gd, ed, x4); - } - } else { - // 32bits imul - UFLAG_IF { - SMULL(gd, ed, x4); - UFLAG_RES(gd); - LSRx(x3, gd, 32); - UFLAG_OP1(x3); - UFLAG_DF(x3, d_imul32); - MOVw_REG(gd, gd); - } else { - MULxw(gd, ed, x4); - } - } - break; - - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - i8 = F8S; \ - BARRIER(2); \ - JUMP(addr+i8);\ - GETFLAGS; \ - if(dyn->insts[ninst].x64.jmp_insts==-1) { \ - /* out of the block */ \ - i32 = dyn->insts[ninst+1].address-(dyn->arm_size); \ - Bcond(NO, i32); \ - jump_to_next(dyn, addr+i8, 0, ninst); \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->arm_size); \ - Bcond(YES, i32); \ - } - - GOCOND(0x70, "J", "ib"); - - #undef GO - - case 0x80: - nextop = F8; - switch((nextop>>3)&7) { - case 0: //ADD - INST_NAME("ADD Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - emit_add8c(dyn, ninst, x1, u8, x2, x4); - EBBACK; - break; - case 1: //OR - INST_NAME("OR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - emit_or8c(dyn, ninst, x1, u8, x2, x4); - EBBACK; - break; - case 2: //ADC - INST_NAME("ADC Eb, Ib"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); - EBBACK; - break; - case 3: //SBB - INST_NAME("SBB Eb, Ib"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); - EBBACK; - break; - case 4: //AND - INST_NAME("AND Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - emit_and8c(dyn, ninst, x1, u8, x2, x4); - EBBACK; - break; - case 5: //SUB - INST_NAME("SUB Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); - EBBACK; - break; - case 6: //XOR - INST_NAME("XOR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - emit_xor8c(dyn, ninst, x1, u8, x2, x4); - EBBACK; - break; - case 7: //CMP - INST_NAME("CMP Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - if(u8) { - MOV32w(x2, u8); - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - } else { - emit_cmp8_0(dyn, ninst, x1, x3, x4); - } - break; - default: - DEFAULT; - } - break; - case 0x81: - case 0x83: - nextop = F8; - switch((nextop>>3)&7) { - case 0: //ADD - if(opcode==0x81) {INST_NAME("ADD Ed, Id");} else {INST_NAME("ADD Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED((opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5); - WBACK; - break; - case 1: //OR - if(opcode==0x81) {INST_NAME("OR Ed, Id");} else {INST_NAME("OR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED((opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); - WBACK; - break; - case 2: //ADC - if(opcode==0x81) {INST_NAME("ADC Ed, Id");} else {INST_NAME("ADC Ed, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED((opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - emit_adc32(dyn, ninst, rex, ed, x5, x3, x4); - WBACK; - break; - case 3: //SBB - if(opcode==0x81) {INST_NAME("SBB Ed, Id");} else {INST_NAME("SBB Ed, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED((opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4); - WBACK; - break; - case 4: //AND - if(opcode==0x81) {INST_NAME("AND Ed, Id");} else {INST_NAME("AND Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED((opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_and32c(dyn, ninst, rex, ed, i64, x3, x4); - WBACK; - break; - case 5: //SUB - if(opcode==0x81) {INST_NAME("SUB Ed, Id");} else {INST_NAME("SUB Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED((opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5); - WBACK; - break; - case 6: //XOR - if(opcode==0x81) {INST_NAME("XOR Ed, Id");} else {INST_NAME("XOR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED((opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_xor32c(dyn, ninst, rex, ed, i64, x3, x4); - WBACK; - break; - case 7: //CMP - if(opcode==0x81) {INST_NAME("CMP Ed, Id");} else {INST_NAME("CMP Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED((opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - if(i64) { - MOV64xw(x2, i64); - emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5); - } else - emit_cmp32_0(dyn, ninst, rex, ed, x3, x4); - break; - } - break; - case 0x84: - INST_NAME("TEST Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop=F8; - GETEB(x1, 0); - GETGB(x2); - emit_test8(dyn, ninst, x1, x2, x3, x4, x5); - break; - case 0x85: - INST_NAME("TEST Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop=F8; - GETGD; - GETED(0); - emit_test32(dyn, ninst, rex, ed, gd, x3, x5); - break; - case 0x86: - INST_NAME("(LOCK)XCHG Eb, Gb"); - // Do the swap - nextop = F8; - if(MODREG) { - GETGB(x4); - if(rex.rex) { - ed = xRAX+(nextop&7)+(rex.b<<3); - eb1 = ed; - eb2 = 0; - } else { - ed = (nextop&7); - eb1 = xRAX+(ed&3); - eb2 = ((ed&4)>>2); - } - UBFXw(x1, eb1, eb2*8, 8); - // do the swap 14 -> ed, 1 -> gd - BFIx(gb1, x1, gb2*8, 8); - BFIx(eb1, x4, eb2*8, 8); - } else { - DMB_ISH(); - GETGB(x4); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); - MARKLOCK; - // do the swap with exclusive locking - LDAXRB(x1, ed); - // do the swap 14 -> strb(ed), 1 -> gd - STLXRB(x3, x4, ed); - CBNZx_MARKLOCK(x3); - DMB_ISH(); - BFIx(gb1, x1, gb2*8, 8); - } - break; - case 0x87: - INST_NAME("(LOCK)XCHG Ed, Gd"); - nextop = F8; - if(MODREG) { - GETGD; - GETED(0); - MOVxw_REG(x1, gd); - MOVxw_REG(gd, ed); - MOVxw_REG(ed, x1); - } else { - GETGD; - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); - DMB_ISH(); - TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); - MARKLOCK; - LDAXRxw(x1, ed); - STLXRxw(x3, gd, ed); - CBNZx_MARKLOCK(x3); - B_MARK2_nocond; - MARK; - LDRxw_U12(x1, ed, 0); - STRxw_U12(gd, ed, 0); - MARK2; - DMB_ISH(); - MOVxw_REG(gd, x1); - } - break; - case 0x88: - INST_NAME("MOV Eb, Gb"); - nextop = F8; - gd = ((nextop&0x38)>>3)+(rex.r<<3); - if(rex.rex) { - gb2 = 0; - gb1 = xRAX + gd; - } else { - gb2 = ((gd&4)>>2); - gb1 = xRAX+(gd&3); - } - if(gb2) { - gd = x4; - UBFXw(gd, gb1, gb2*8, 8); - } else { - gd = gb1; // no need to extract - } - if(MODREG) { - ed = (nextop&7) + (rex.b<<3); - if(rex.rex) { - eb1 = xRAX+ed; - eb2 = 0; - } else { - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = ((ed&4)>>2); // L or H - } - BFIx(eb1, gd, eb2*8, 8); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); - STRB_U12(gd, ed, fixedaddress); - } - break; - case 0x89: - INST_NAME("MOV Ed, Gd"); - nextop=F8; - GETGD; - if(MODREG) { // reg <= reg - MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); - } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - STRxw_U12(gd, ed, fixedaddress); - if(box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp))) { - DMB_ISH(); - } - } - break; - case 0x8A: - INST_NAME("MOV Gb, Eb"); - nextop = F8; - if(rex.rex) { - gb1 = gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); - gb2=0; - } else { - gd = (nextop&0x38)>>3; - gb1 = xRAX+(gd&3); - gb2 = ((gd&4)>>2); - } - if(MODREG) { - if(rex.rex) { - wback = xRAX+(nextop&7)+(rex.b<<3); - wb2 = 0; - } else { - wback = (nextop&7); - wb2 = (wback>>2); - wback = xRAX+(wback&3); - } - if(wb2) { - UBFXw(x4, wback, wb2*8, 8); - ed = x4; - } else { - ed = wback; - } - } else { - if(box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || !ninst || box64_dynarec_strongmem>1 || (ninst && dyn->insts[ninst-1].x64.barrier))) { - DMB_ISH(); - } - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, 0, 0); - LDRB_U12(x4, wback, fixedaddress); - ed = x4; - } - BFIx(gb1, ed, gb2*8, 8); - break; - case 0x8B: - INST_NAME("MOV Gd, Ed"); - nextop=F8; - GETGD; - if(MODREG) { - MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { - if(box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || !ninst || box64_dynarec_strongmem>1 || (ninst && dyn->insts[ninst-1].x64.barrier))) { - DMB_ISH(); - } - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - LDRxw_U12(gd, ed, fixedaddress); - } - break; - case 0x8C: - INST_NAME("MOV Ed, Seg"); - nextop=F8; - if((nextop&0xC0)==0xC0) { // reg <= seg - LDRH_U12(xRAX+(nextop&7)+(rex.b<<3), xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); - } else { // mem <= seg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); - LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); - STRH_U12(x3, ed, fixedaddress); - } - break; - case 0x8D: - INST_NAME("LEA Gd, Ed"); - nextop=F8; - GETGD; - if(MODREG) { // reg <= reg? that's an invalid operation - DEFAULT; - } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, gd, &fixedaddress, 0, 0, rex, 0, 0); - if(gd!=ed) { // it's sometimes used as a 3 bytes NOP - MOVxw_REG(gd, ed); - } - else if(!rex.w) { - MOVw_REG(gd, gd); //truncate the higher 32bits as asked - } - } - break; - case 0x8E: - INST_NAME("MOV Seg,Ew"); - nextop = F8; - if((nextop&0xC0)==0xC0) { - ed = xRAX+(nextop&7)+(rex.b<<3); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 0, rex, 0, 0); - LDRH_U12(x1, ed, fixedaddress); - ed = x1; - } - STRw_U12(ed, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); - STRw_U12(wZR, xEmu, offsetof(x64emu_t, segs_serial[(nextop&0x38)>>3])); - break; - case 0x8F: - INST_NAME("POP Ed"); - nextop = F8; - if((nextop&0xC0)==0xC0) { - POP1(xRAX+(nextop&7)+(rex.b<<3)); - } else { - POP1(x2); // so this can handle POP [ESP] and maybe some variant too - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - if(ed==xRSP) { - STRx_U12(x2, ed, fixedaddress); - } else { - // complicated to just allow a segfault that can be recovered correctly - SUBx_U12(xRSP, xRSP, 8); - STRx_U12(x2, ed, fixedaddress); - ADDx_U12(xRSP, xRSP, 8); - } - } - break; - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - gd = xRAX+(opcode&0x07)+(rex.b<<3); - if(gd==xRAX) { - INST_NAME("NOP"); - } else { - INST_NAME("XCHG EAX, Reg"); - MOVxw_REG(x2, xRAX); - MOVxw_REG(xRAX, gd); - MOVxw_REG(gd, x2); - } - break; - - case 0x98: - INST_NAME("CWDE"); - if(rex.w) { - SXTWx(xRAX, xRAX); - } else { - SXTHw(xRAX, xRAX); - } - break; - case 0x99: - INST_NAME("CDQ"); - SBFXxw(xRDX, xRAX, rex.w?63:31, 1); - break; - - case 0x9B: - INST_NAME("FWAIT"); - break; - case 0x9C: - INST_NAME("PUSHF"); - READFLAGS(X_ALL); - PUSH1(xFlags); - break; - case 0x9D: - INST_NAME("POPF"); - SETFLAGS(X_ALL, SF_SET); - POP1(xFlags); - MOV32w(x1, 0x3F7FD7); - ANDw_REG(xFlags, xFlags, x1); - ORRw_mask(xFlags, xFlags, 0b011111, 0); //mask=0x00000002 - SET_DFNONE(x1); - break; - case 0x9E: - INST_NAME("SAHF"); - SETFLAGS(X_CF|X_PF|X_AF|X_ZF|X_SF, SF_SUBSET); - MOV32w(x2, 0b11010101); - BICw_REG(xFlags, xFlags, x2); - UBFXx(x1, xRAX, 8, 8); - ANDw_REG(x1, x1, x2); - ORRw_REG(xFlags, xFlags, x1); - SET_DFNONE(x1); - break; - case 0x9F: - INST_NAME("LAHF"); - READFLAGS(X_CF|X_PF|X_AF|X_ZF|X_SF); - BFIx(xRAX, xFlags, 8, 8); - break; - case 0xA0: - INST_NAME("MOV AL,Ob"); - u64 = F64; - MOV64x(x1, u64); - LDRB_U12(x2, x1, 0); - BFIx(xRAX, x2, 0, 8); - break; - case 0xA1: - INST_NAME("MOV EAX,Od"); - u64 = F64; - MOV64x(x1, u64); - LDRxw_U12(xRAX, x1, 0); - break; - case 0xA2: - INST_NAME("MOV Ob,AL"); - u64 = F64; - MOV64x(x1, u64); - STRB_U12(xRAX, x1, 0); - break; - case 0xA3: - INST_NAME("MOV Od,EAX"); - u64 = F64; - MOV64x(x1, u64); - STRxw_U12(xRAX, x1, 0); - break; - case 0xA4: - if(rep) { - INST_NAME("REP MOVSB"); - CBZx_NEXT(xRCX); - TBNZ_MARK2(xFlags, F_DF); - MARK; // Part with DF==0 - LDRB_S9_postindex(x1, xRSI, 1); - STRB_S9_postindex(x1, xRDI, 1); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK(xRCX); - B_NEXT_nocond; - MARK2; // Part with DF==1 - LDRB_S9_postindex(x1, xRSI, -1); - STRB_S9_postindex(x1, xRDI, -1); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK2(xRCX); - // done - } else { - INST_NAME("MOVSB"); - GETDIR(x3, 1); - LDRB_U12(x1, xRSI, 0); - STRB_U12(x1, xRDI, 0); - ADDx_REG(xRSI, xRSI, x3); - ADDx_REG(xRDI, xRDI, x3); - } - break; - case 0xA5: - if(rep) { - INST_NAME("REP MOVSD"); - CBZx_NEXT(xRCX); - TBNZ_MARK2(xFlags, F_DF); - MARK; // Part with DF==0 - LDRxw_S9_postindex(x1, xRSI, rex.w?8:4); - STRxw_S9_postindex(x1, xRDI, rex.w?8:4); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK(xRCX); - B_NEXT_nocond; - MARK2; // Part with DF==1 - LDRxw_S9_postindex(x1, xRSI, rex.w?-8:-4); - STRxw_S9_postindex(x1, xRDI, rex.w?-8:-4); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK2(xRCX); - // done - } else { - INST_NAME("MOVSD"); - GETDIR(x3, rex.w?8:4); - LDRxw_U12(x1, xRSI, 0); - STRxw_U12(x1, xRDI, 0); - ADDx_REG(xRSI, xRSI, x3); - ADDx_REG(xRDI, xRDI, x3); - } - break; - case 0xA6: - switch(rep) { - case 1: - case 2: - if(rep==1) {INST_NAME("REPNZ CMPSB");} else {INST_NAME("REPZ CMPSB");} - SETFLAGS(X_ALL, SF_MAYSET); - CBZx_NEXT(xRCX); - TBNZ_MARK2(xFlags, F_DF); - MARK; // Part with DF==0 - LDRB_S9_postindex(x1, xRSI, 1); - LDRB_S9_postindex(x2, xRDI, 1); - SUBx_U12(xRCX, xRCX, 1); - CMPSw_REG(x1, x2); - B_MARK3((rep==1)?cEQ:cNE); - CBNZx_MARK(xRCX); - B_MARK3_nocond; - MARK2; // Part with DF==1 - LDRB_S9_postindex(x1, xRSI, -1); - LDRB_S9_postindex(x2, xRDI, -1); - SUBx_U12(xRCX, xRCX, 1); - CMPSw_REG(x1, x2); - B_MARK3((rep==1)?cEQ:cNE); - CBNZx_MARK2(xRCX); - MARK3; // end - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - break; - default: - INST_NAME("CMPSB"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETDIR(x3, 1); - LDRB_U12(x1, xRSI, 0); - LDRB_U12(x2, xRDI, 0); - ADDx_REG(xRSI, xRSI, x3); - ADDx_REG(xRDI, xRDI, x3); - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - break; - } - break; - - case 0xA8: - INST_NAME("TEST AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - UXTBx(x1, xRAX); - u8 = F8; - MOV32w(x2, u8); - emit_test8(dyn, ninst, x1, x2, x3, x4, x5); - break; - case 0xA9: - INST_NAME("TEST EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i64 = F32S; - MOV64xw(x2, i64); - emit_test32(dyn, ninst, rex, xRAX, x2, x3, x4); - break; - case 0xAA: - if(rep) { - INST_NAME("REP STOSB"); - CBZx_NEXT(xRCX); - TBNZ_MARK2(xFlags, F_DF); - MARK; // Part with DF==0 - STRB_S9_postindex(xRAX, xRDI, 1); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK(xRCX); - B_NEXT_nocond; - MARK2; // Part with DF==1 - STRB_S9_postindex(xRAX, xRDI, -1); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK2(xRCX); - // done - } else { - INST_NAME("STOSB"); - GETDIR(x3, 1); - STRB_U12(xRAX, xRDI, 0); - ADDx_REG(xRDI, xRDI, x3); - } - break; - case 0xAB: - if(rep) { - INST_NAME("REP STOSD"); - CBZx_NEXT(xRCX); - TBNZ_MARK2(xFlags, F_DF); - MARK; // Part with DF==0 - STRxw_S9_postindex(xRAX, xRDI, rex.w?8:4); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK(xRCX); - B_NEXT_nocond; - MARK2; // Part with DF==1 - STRxw_S9_postindex(xRAX, xRDI, rex.w?-8:-4); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK2(xRCX); - // done - } else { - INST_NAME("STOSD"); - GETDIR(x3, rex.w?8:4); - STRxw_U12(xRAX, xRDI, 0); - ADDx_REG(xRDI, xRDI, x3); - } - break; - - case 0xAE: - switch(rep) { - case 1: - case 2: - if(rep==1) {INST_NAME("REPNZ SCASB");} else {INST_NAME("REPZ SCASB");} - SETFLAGS(X_ALL, SF_MAYSET); - CBZx_NEXT(xRCX); - UBFXw(x1, xRAX, 0, 8); - TBNZ_MARK2(xFlags, F_DF); - MARK; // Part with DF==0 - LDRB_S9_postindex(x2, xRDI, 1); - SUBx_U12(xRCX, xRCX, 1); - CMPSw_REG(x1, x2); - B_MARK3((rep==1)?cEQ:cNE); - CBNZx_MARK(xRCX); - B_MARK3_nocond; - MARK2; // Part with DF==1 - LDRB_S9_postindex(x2, xRDI, -1); - SUBx_U12(xRCX, xRCX, 1); - CMPSw_REG(x1, x2); - B_MARK3((rep==1)?cEQ:cNE); - CBNZx_MARK2(xRCX); - MARK3; // end - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - break; - default: - INST_NAME("SCASB"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETDIR(x3, 1); - UBFXw(x1, xRAX, 0, 8); - LDRB_U12(x2, xRDI, 0); - ADDx_REG(xRDI, xRDI, x3); - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - break; - } - break; - - - case 0xB0: - case 0xB1: - case 0xB2: - case 0xB3: - INST_NAME("MOV xL, Ib"); - u8 = F8; - MOV32w(x1, u8); - if(rex.rex) - gb1 = xRAX+(opcode&7)+(rex.b<<3); - else - gb1 = xRAX+(opcode&3); - BFIx(gb1, x1, 0, 8); - break; - case 0xB4: - case 0xB5: - case 0xB6: - case 0xB7: - INST_NAME("MOV xH, Ib"); - u8 = F8; - MOV32w(x1, u8); - if(rex.rex) { - gb1 = xRAX+(opcode&7)+(rex.b<<3); - BFIx(gb1, x1, 0, 8); - } else { - gb1 = xRAX+(opcode&3); - BFIx(gb1, x1, 8, 8); - } - break; - case 0xB8: - case 0xB9: - case 0xBA: - case 0xBB: - case 0xBC: - case 0xBD: - case 0xBE: - case 0xBF: - INST_NAME("MOV Reg, Id"); - gd = xRAX+(opcode&7)+(rex.b<<3); - if(rex.w) { - u64 = F64; - MOV64x(gd, u64); - } else { - u32 = F32; - MOV32w(gd, u32); - } - break; - case 0xC0: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - INST_NAME("ROL Eb, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEB(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rol8, ed, x3); - EBBACK; - break; - case 1: - INST_NAME("ROR Eb, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEB(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(ror8, ed, x3); - EBBACK; - break; - case 2: - INST_NAME("RCL Eb, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEB(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rcl8, ed, x3); - EBBACK; - break; - case 3: - INST_NAME("RCR Eb, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEB(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rcr8, ed, x3); - EBBACK; - break; - case 4: - case 6: - INST_NAME("SHL Eb, Ib"); - GETEB(x1, 1); - u8 = (F8)&0x1f; - if(u8) { - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_IF{ - MOV32w(x4, u8); UFLAG_OP2(x4); - }; - UFLAG_OP1(ed); - LSLw(ed, ed, u8); - EBBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shl8); - } else { - NOP; - } - break; - case 5: - INST_NAME("SHR Eb, Ib"); - GETEB(x1, 1); - u8 = (F8)&0x1f; - if(u8) { - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_IF{ - MOV32w(x4, u8); UFLAG_OP2(x4); - }; - UFLAG_OP1(ed); - if(u8) { - LSRw(ed, ed, u8); - EBBACK; - } - UFLAG_RES(ed); - UFLAG_DF(x3, d_shr8); - } else { - NOP; - } - break; - case 7: - INST_NAME("SAR Eb, Ib"); - GETSEB(x1, 1); - u8 = (F8)&0x1f; - if(u8) { - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_IF{ - MOV32w(x4, u8); UFLAG_OP2(x4); - }; - UFLAG_OP1(ed); - if(u8) { - ASRw(ed, ed, u8); - EBBACK; - } - UFLAG_RES(ed); - UFLAG_DF(x3, d_sar8); - } else { - NOP; - } - break; - } - break; - case 0xC1: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - INST_NAME("ROL Ed, Ib"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - GETED(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { WBACK; } - break; - case 1: - INST_NAME("ROR Ed, Ib"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - GETED(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { WBACK; } - break; - case 2: - INST_NAME("RCL Ed, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEDW(x4, x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4); - WBACK; - break; - case 3: - INST_NAME("RCR Ed, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEDW(x4, x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); - WBACK; - break; - case 4: - case 6: - INST_NAME("SHL Ed, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_shl32c(dyn, ninst, rex, ed, u8, x3, x4); - WBACK; - break; - case 5: - INST_NAME("SHR Ed, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_shr32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { - WBACK; - } - break; - case 7: - INST_NAME("SAR Ed, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_sar32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { - WBACK; - } - break; - } - break; - case 0xC2: - INST_NAME("RETN"); - //SETFLAGS(X_ALL, SF_SET); // Hack, set all flags (to an unknown state...) - READFLAGS(X_PEND); // lets play safe here too - BARRIER(2); - i32 = F16; - retn_to_epilog(dyn, ninst, i32); - *need_epilog = 0; - *ok = 0; - break; - case 0xC3: - INST_NAME("RET"); - // SETFLAGS(X_ALL, SF_SET); // Hack, set all flags (to an unknown state...) - READFLAGS(X_PEND); // so instead, force the defered flags, so it's not too slow, and flags are not lost - BARRIER(2); - ret_to_epilog(dyn, ninst); - *need_epilog = 0; - *ok = 0; - break; - - case 0xC6: - INST_NAME("MOV Eb, Ib"); - nextop=F8; - if(MODREG) { // reg <= u8 - u8 = F8; - if(!rex.rex) { - ed = (nextop&7); - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = (ed&4)>>2; // L or H - } else { - eb1 = xRAX+(nextop&7)+(rex.b<<3); - eb2 = 0; - } - MOV32w(x3, u8); - BFIx(eb1, x3, eb2*8, 8); - } else { // mem <= u8 - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 0, rex, 0, 1); - u8 = F8; - MOV32w(x3, u8); - STRB_U12(x3, ed, fixedaddress); - } - break; - case 0xC7: - INST_NAME("MOV Ed, Id"); - nextop=F8; - if(MODREG) { // reg <= i32 - i64 = F32S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV64xw(ed, i64); - } else { // mem <= i32 - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 4); - i64 = F32S; - MOV64xw(x3, i64); - STRxw_U12(x3, ed, fixedaddress); - } - break; - - case 0xC9: - INST_NAME("LEAVE"); - MOVx_REG(xRSP, xRBP); - POP1(xRBP); - break; - - case 0xCC: - SETFLAGS(X_ALL, SF_SET); // Hack, set all flags (to an unknown state...) - if(PK(0)=='S' && PK(1)=='C') { - addr+=2; - BARRIER(2); - INST_NAME("Special Box64 instruction"); - if((PK64(0)==0)) - { - addr+=8; - MESSAGE(LOG_DEBUG, "Exit x64 Emu\n"); - //GETIP(ip+1+2); // no use - //STORE_XEMU_REGS(xRIP); // no need, done in epilog - MOV32w(x1, 1); - STRw_U12(x1, xEmu, offsetof(x64emu_t, quit)); - *ok = 0; - *need_epilog = 1; - } else { - MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip))); - x87_forget(dyn, ninst, x3, x4, 0); - sse_purge07cache(dyn, ninst, x3); - tmp = isSimpleWrapper(*(wrapper_t*)(addr)); - if(box64_log<2 && tmp) { - //GETIP(ip+3+8+8); // read the 0xCC - call_n(dyn, ninst, *(void**)(addr+8), tmp); - addr+=8+8; - } else { - GETIP(ip+1); // read the 0xCC - STORE_XEMU_CALL(xRIP); - CALL_S(x64Int3, -1); - LOAD_XEMU_CALL(xRIP); - addr+=8+8; - TABLE64(x3, addr); // expected return address - CMPSx_REG(xRIP, x3); - B_MARK(cNE); - LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); - CBZw_NEXT(w1); - MARK; - LOAD_XEMU_REM(); - jump_to_epilog(dyn, 0, xRIP, ninst); - } - } - } else { - #if 1 - INST_NAME("INT 3"); - // check if TRAP signal is handled - LDRx_U12(x1, xEmu, offsetof(x64emu_t, context)); - MOV32w(x2, offsetof(box64context_t, signals[SIGTRAP])); - LDRx_REG(x3, x1, x2); - CMPSx_U12(x3, 0); - B_NEXT(cNE); - MOV32w(x1, SIGTRAP); - CALL_(raise, -1, 0); - break; - #else - DEFAULT; - #endif - } - break; - - case 0xCF: - INST_NAME("IRET"); - SETFLAGS(X_ALL, SF_SET); // Not a hack, EFLAGS are restored - BARRIER(2); - iret_to_epilog(dyn, ninst, rex.w); - *need_epilog = 0; - *ok = 0; - break; - case 0xD0: - case 0xD2: // TODO: Jump if CL is 0 - nextop = F8; - switch((nextop>>3)&7) { - case 0: - if(opcode==0xD0) { - INST_NAME("ROL Eb, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("ROL Eb, CL"); - ANDSw_mask(x2, xRCX, 0, 0b00100); - } - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEB(x1, 0); - CALL_(rol8, x1, x3); - EBBACK; - break; - case 1: - if(opcode==0xD0) { - INST_NAME("ROR Eb, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("ROR Eb, CL"); - ANDSw_mask(x2, xRCX, 0, 0b00100); - } - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEB(x1, 0); - CALL_(ror8, x1, x3); - EBBACK; - break; - case 2: - if(opcode==0xD0) {INST_NAME("RCL Eb, 1");} else {INST_NAME("RCL Eb, CL");} - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - if(opcode==0xD0) {MOV32w(x2, 1);} else {ANDSw_mask(x2, xRCX, 0, 0b00100);} - GETEB(x1, 0); - CALL_(rcl8, x1, x3); - EBBACK; - break; - case 3: - if(opcode==0xD0) {INST_NAME("RCR Eb, 1");} else {INST_NAME("RCR Eb, CL");} - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - if(opcode==0xD0) {MOV32w(x2, 1);} else {ANDSw_mask(x2, xRCX, 0, 0b00100);} - GETEB(x1, 0); - CALL_(rcr8, x1, x3); - EBBACK; - break; - case 4: - case 6: - if(opcode==0xD0) { - INST_NAME("SHL Eb, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("SHL Eb, CL"); - ANDSw_mask(x2, xRCX, 0, 0b00100); - } - SETFLAGS(X_ALL, SF_PENDING); - GETEB(x1, 0); - UFLAG_OP12(ed, x2) - LSLw_REG(ed, ed, x2); - EBBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shl8); - break; - case 5: - if(opcode==0xD0) { - INST_NAME("SHR Eb, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("SHR Eb, CL"); - ANDSw_mask(x2, xRCX, 0, 0b00100); - } - SETFLAGS(X_ALL, SF_PENDING); - GETEB(x1, 0); - UFLAG_OP12(ed, x2); - LSRw_REG(ed, ed, x2); - EBBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shr8); - break; - case 7: - if(opcode==0xD0) { - INST_NAME("SAR Eb, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("SAR Eb, CL"); - ANDSw_mask(x2, xRCX, 0, 0b00100); - } - SETFLAGS(X_ALL, SF_PENDING); - GETSEB(x1, 0); - UFLAG_OP12(ed, x2) - ASRw_REG(ed, ed, x2); - EBBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_sar8); - break; - } - break; - case 0xD1: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - INST_NAME("ROL Ed, 1"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - GETED(0); - emit_rol32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACK; - break; - case 1: - INST_NAME("ROR Ed, 1"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - GETED(0); - emit_ror32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACK; - break; - case 2: - INST_NAME("RCL Ed, 1"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - MOV32w(x2, 1); - GETEDW(x4, x1, 0); - CALL_(rcl32, ed, x4); - WBACK; - break; - case 3: - INST_NAME("RCR Ed, 1"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - MOV32w(x2, 1); - GETEDW(x4, x1, 0); - CALL_(rcr32, ed, x4); - WBACK; - break; - case 4: - case 6: - INST_NAME("SHL Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(0); - emit_shl32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACK; - break; - case 5: - INST_NAME("SHR Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(0); - emit_shr32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACK; - break; - case 7: - INST_NAME("SAR Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED(0); - emit_sar32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACK; - break; - } - break; - case 0xD3: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - INST_NAME("ROL Ed, CL"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - MOV64xw(x4, (rex.w?64:32)); - SUBx_REG(x3, x4, x3); - GETEDW(x4, x2, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - RORxw_REG(ed, ed, x3); - WBACK; - UFLAG_IF { // calculate flags directly - CMPSw_U12(x3, rex.w?63:31); - B_MARK(cNE); - LSRxw(x1, ed, rex.w?63:31); - ADDxw_REG(x1, x1, ed); - BFIw(xFlags, x1, F_OF, 1); - MARK; - BFIw(xFlags, ed, F_CF, 1); - UFLAG_DF(x2, d_none); - } - break; - case 1: - INST_NAME("ROR Ed, CL"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETEDW(x4, x2, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - RORxw_REG(ed, ed, x3); - WBACK; - UFLAG_IF { // calculate flags directly - CMPSw_U12(x3, 1); - B_MARK(cNE); - LSRxw(x2, ed, rex.w?62:30); // x2 = d>>30 - EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1)) - BFIw(xFlags, x2, F_OF, 1); - MARK; - LSRxw(x2, ed, rex.w?63:31); - BFIw(xFlags, x2, F_CF, 1); - UFLAG_DF(x2, d_none); - } - break; - case 2: - INST_NAME("RCL Ed, CL"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - if(rex.w) { - ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETEDW(x4, x1, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4); - WBACK; - break; - case 3: - INST_NAME("RCR Ed, CL"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - if(rex.w) { - ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETEDW(x4, x1, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); - WBACK; - break; - case 4: - case 6: - INST_NAME("SHL Ed, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - emit_shl32(dyn, ninst, rex, ed, x3, x5, x4); - WBACK; - break; - case 5: - INST_NAME("SHR Ed, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); - WBACK; - break; - case 7: - INST_NAME("SAR Ed, CL"); - SETFLAGS(X_ALL, SF_PENDING); - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - UFLAG_OP12(ed, x3); - ASRxw_REG(ed, ed, x3); - WBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, rex.w?d_sar64:d_sar32); - break; - } - break; - - case 0xD8: - addr = dynarec64_D8(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - case 0xD9: - addr = dynarec64_D9(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - - case 0xDB: - addr = dynarec64_DB(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - case 0xDC: - addr = dynarec64_DC(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - case 0xDD: - addr = dynarec64_DD(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - - case 0xDF: - addr = dynarec64_DF(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - #define GO(Z) \ - BARRIER(2); \ - JUMP(addr+i8); \ - if(dyn->insts[ninst].x64.jmp_insts==-1) { \ - /* out of the block */ \ - i32 = dyn->insts[ninst+1].address-(dyn->arm_size); \ - if(Z) {CBNZx(xRCX, i32);} else {CBZx(xRCX, i32);}; \ - jump_to_next(dyn, addr+i8, 0, ninst); \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->arm_size); \ - if(Z) {CBZx(xRCX, i32);} else {CBNZx(xRCX, i32);}; \ - } - case 0xE0: - INST_NAME("LOOPNZ"); - READFLAGS(X_ZF); - i8 = F8S; - SUBx_U12(xRCX, xRCX, 1); - TBNZ_NEXT(xFlags, 1<insts[ninst].natcall, &dyn->insts[ninst].retn)) - tmp = dyn->insts[ninst].pass2choice = 3; - else - tmp = dyn->insts[ninst].pass2choice = 0; - #else - tmp = dyn->insts[ninst].pass2choice; - #endif - switch(tmp) { - case 3: - SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state - BARRIER(1); - BARRIER_NEXT(1); - TABLE64(x2, addr); - PUSH1(x2); - MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn); - // calling a native function - sse_purge07cache(dyn, ninst, x3); - if(box64_log<2 && dyn->insts[ninst].natcall && (tmp=isSimpleWrapper(*(wrapper_t*)(dyn->insts[ninst].natcall+2)))) { - //GETIP(ip+3+8+8); // read the 0xCC - call_n(dyn, ninst, *(void**)(dyn->insts[ninst].natcall+2+8), tmp); - POP1(xRIP); // pop the return address - } else { - GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already - STORE_XEMU_CALL(xRIP); - CALL_S(x64Int3, -1); - LOAD_XEMU_CALL(xRIP); - TABLE64(x3, dyn->insts[ninst].natcall); - ADDx_U12(x3, x3, 2+8+8); - CMPSx_REG(xRIP, x3); - B_MARK(cNE); // Not the expected address, exit dynarec block - POP1(xRIP); // pop the return address - if(dyn->insts[ninst].retn) { - ADDx_U12(xRSP, xRSP, dyn->insts[ninst].retn); - } - TABLE64(x3, addr); - CMPSx_REG(xRIP, x3); - B_MARK(cNE); // Not the expected address again - LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); - CBZw_NEXT(w1); // not quitting, so lets continue - MARK; - LOAD_XEMU_REM(); // load remaining register, has they have changed - jump_to_epilog(dyn, 0, xRIP, ninst); - } - break; - default: - if(ninst && dyn->insts[ninst-1].x64.set_flags) { - READFLAGS(X_PEND); // that's suspicious - } else { - SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state - } - // regular call - BARRIER(1); - BARRIER_NEXT(1); - *need_epilog = 0; - *ok = 0; - TABLE64(x2, addr); - PUSH1(x2); - if(addr+i32==0) { // self modifying code maybe? so use indirect address fetching - TABLE64(x4, addr-4); - LDRx_U12(x4, x4, 0); - jump_to_next(dyn, 0, x4, ninst); - } else - jump_to_next(dyn, addr+i32, 0, ninst); - break; - } - break; - case 0xE9: - case 0xEB: - BARRIER(1); - if(opcode==0xE9) { - INST_NAME("JMP Id"); - i32 = F32S; - } else { - INST_NAME("JMP Ib"); - i32 = F8S; - } - JUMP(addr+i32); - PASS2IF(dyn->insts[ninst].x64.jmp_insts==-1, 1) { - // out of the block - jump_to_next(dyn, addr+i32, 0, ninst); - } else { - // inside the block - tmp = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->arm_size); - if(tmp==4) { - NOP; - } else { - B(tmp); - } - } - *need_epilog = 0; - *ok = 0; - break; - - case 0xF0: - addr = dynarec64_F0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - - case 0xF5: - INST_NAME("CMC"); - READFLAGS(X_CF); - SETFLAGS(X_CF, SF_SUBSET); - EORw_mask(xFlags, xFlags, 0, 0); //mask=0x00000001 - break; - case 0xF6: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - case 1: - INST_NAME("TEST Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - MOV32w(x2, u8); - emit_test8(dyn, ninst, x1, x2, x3, x4, x5); - break; - case 2: - INST_NAME("NOT Eb"); - GETEB(x1, 0); - MVNw_REG(x1, x1); - EBBACK; - break; - case 3: - INST_NAME("NEG Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 0); - emit_neg8(dyn, ninst, x1, x2, x4); - EBBACK; - break; - case 4: - INST_NAME("MUL AL, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x1, d_mul8); - GETEB(x1, 0); - UXTBw(x2, xRAX); - MULw(x1, x2, x1); - UFLAG_RES(x1); - BFIx(xRAX, x1, 0, 16); - break; - case 5: - INST_NAME("IMUL AL, Eb"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x1, d_imul8); - GETSEB(x1, 0); - SXTBw(x2, xRAX); - MULw(x1, x2, x1); - UFLAG_RES(x1); - BFIx(xRAX, x1, 0, 16); - break; - case 6: - INST_NAME("DIV Eb"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET); - GETEB(x1, 0); - CALL(div8, -1); - break; - case 7: - INST_NAME("IDIV Eb"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET); - GETEB(x1, 0); - CALL(idiv8, -1); - break; - } - break; - case 0xF7: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - case 1: - INST_NAME("TEST Ed, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDH(x1, 4); - i64 = F32S; - MOV64xw(x2, i64); - emit_test32(dyn, ninst, rex, ed, x2, x3, x4); - break; - case 2: - INST_NAME("NOT Ed"); - GETED(4); - MVNxw_REG(ed, ed); - WBACK; - break; - case 3: - INST_NAME("NEG Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED(0); - emit_neg32(dyn, ninst, rex, ed, x3, x4); - WBACK; - break; - case 4: - INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_mul64:d_mul32); - GETED(0); - if(rex.w) { - if(ed==xRDX) gd=x3; else gd=xRDX; - UMULH(gd, xRAX, ed); - MULx(xRAX, xRAX, ed); - if(gd!=xRDX) {MOVx_REG(xRDX, gd);} - } else { - UMULL(xRDX, xRAX, ed); //64 <- 32x32 - MOVw_REG(xRAX, xRDX); - LSRx(xRDX, xRDX, 32); - } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - break; - case 5: - INST_NAME("IMUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_imul64:d_imul32); - GETED(0); - if(rex.w) { - if(ed==xRDX) gd=x3; else gd=xRDX; - SMULH(gd, xRAX, ed); - MULx(xRAX, xRAX, ed); - if(gd!=xRDX) {MOVx_REG(xRDX, gd);} - } else { - SMULL(xRDX, xRAX, ed); //64 <- 32x32 - MOVw_REG(xRAX, xRDX); - LSRx(xRDX, xRDX, 32); - } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - break; - case 6: - INST_NAME("DIV Ed"); - SETFLAGS(X_ALL, SF_SET); - if(!rex.w) { - SET_DFNONE(x2); - GETED(0); - MOVw_REG(x3, xRAX); - ORRx_REG_LSL(x3, x3, xRDX, 32); - if(MODREG) { - MOVw_REG(x4, ed); - ed = x4; - } - UDIVx(x2, x3, ed); - MSUBx(x4, x2, ed, xRAX); - MOVw_REG(xRAX, x2); - MOVw_REG(xRDX, x4); - } else { - if(ninst - && dyn->insts[ninst-1].x64.addr - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) { - SET_DFNONE(x2); - GETED(0); - UDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - } else { - GETEDH(x1, 0); // get edd changed addr, so cannot be called 2 times for same op... - CBZxw_MARK(xRDX); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(div64, -1); - B_NEXT_nocond; - MARK; - UDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - SET_DFNONE(x2); - } - } - break; - case 7: - INST_NAME("IDIV Ed"); - SETFLAGS(X_ALL, SF_SET); - if(!rex.w) { - SET_DFNONE(x2) - GETSEDw(0); - MOVw_REG(x3, xRAX); - ORRx_REG_LSL(x3, x3, xRDX, 32); - SDIVx(x2, x3, wb); - MSUBx(x4, x2, wb, x3); - MOVw_REG(xRAX, x2); - MOVw_REG(xRDX, x4); - } else { - if(ninst && dyn->insts - && dyn->insts[ninst-1].x64.addr - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48 - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) { - SET_DFNONE(x2) - GETED(0); - SDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - } else { - GETEDH(x1, 0); // get edd changed addr, so cannot be called 2 times for same op... - //Need to see if RDX==0 and RAX not signed - // or RDX==-1 and RAX signed - CBNZx_MARK2(xRDX); - TBZ_MARK(xRAX, 31); - MARK2; - MVNx_REG(x2, xRDX); - CBNZx_MARK3(x2); - TBNZ_MARK(xRAX, 31); - MARK3; - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL((void*)idiv64, -1); - B_NEXT_nocond; - MARK; - SDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - SET_DFNONE(x2) - } - } - break; - } - break; - - case 0xFC: - INST_NAME("CLD"); - BFCw(xFlags, F_DF, 1); - break; - case 0xFD: - INST_NAME("STD"); - MOV32w(x1, 1); - BFIw(xFlags, x1, F_DF, 1); - break; - case 0xFE: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - INST_NAME("INC Eb"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - GETEB(x1, 0); - emit_inc8(dyn, ninst, x1, x2, x4); - EBBACK; - break; - case 1: - INST_NAME("DEC Eb"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - GETEB(x1, 0); - emit_dec8(dyn, ninst, x1, x2, x4); - EBBACK; - break; - default: - DEFAULT; - } - break; - case 0xFF: - nextop = F8; - switch((nextop>>3)&7) { - case 0: // INC Ed - INST_NAME("INC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - GETED(0); - emit_inc32(dyn, ninst, rex, ed, x3, x4); - WBACK; - break; - case 1: //DEC Ed - INST_NAME("DEC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - GETED(0); - emit_dec32(dyn, ninst, rex, ed, x3, x4); - WBACK; - break; - case 2: // CALL Ed - INST_NAME("CALL Ed"); - PASS2IF(((ninst && dyn->insts[ninst-1].x64.set_flags) - || ((ninst>1) && dyn->insts[ninst-2].x64.set_flags)), 1) - { - READFLAGS(X_PEND); // that's suspicious - } else { - SETFLAGS(X_ALL, SF_SET); //Hack to put flag in "don't care" state - } - GETEDx(0); - BARRIER(1); - BARRIER_NEXT(1); - if(!dyn->insts || ninst==dyn->size-1) { - *need_epilog = 0; - *ok = 0; - } - GETIP(addr); - PUSH1(xRIP); - jump_to_next(dyn, 0, ed, ninst); - break; - case 4: // JMP Ed - INST_NAME("JMP Ed"); - BARRIER(1); - GETEDx(0); - jump_to_next(dyn, 0, ed, ninst); - *need_epilog = 0; - *ok = 0; - break; - case 6: // Push Ed - INST_NAME("PUSH Ed"); - GETEDx(0); - PUSH1(ed); - break; - - default: - DEFAULT; - } - break; - - default: - DEFAULT; - } - - return addr; -} diff --git a/src/dynarec/dynarec_arm64_0f.c b/src/dynarec/dynarec_arm64_0f.c deleted file mode 100755 index 0a204193..00000000 --- a/src/dynarec/dynarec_arm64_0f.c +++ /dev/null @@ -1,1911 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "my_cpuid.h" -#include "emu/x87emu_private.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -#define GETG \ - gd = ((nextop&0x38)>>3)+(rex.r<<3) \ - -#define GETGX(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg(dyn, ninst, x1, gd) - -#define GETGX_empty(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg_empty(dyn, ninst, x1, gd) - -#define GETEX(a, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, D); \ - a = fpu_get_scratch(dyn); \ - VLDR128_U12(a, ed, fixedaddress); \ - } - -#define GETGM(a) \ - gd = ((nextop&0x38)>>3); \ - a = mmx_get_reg(dyn, ninst, x1, gd) - -#define GETEM(a, D) \ - if(MODREG) { \ - a = mmx_get_reg(dyn, ninst, x1, (nextop&7));\ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \ - a = fpu_get_scratch(dyn); \ - VLDR64_U12(a, ed, fixedaddress); \ - } - -#define PUTEM(a) \ - if(!MODREG) { \ - VSTR64_U12(a, ed, fixedaddress); \ - } - -uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t opcode = F8; - uint8_t nextop, u8; - uint8_t gd, ed; - uint8_t wback, wb2; - uint8_t eb1, eb2; - int32_t i32, i32_; - int v0, v1; - int q0, q1; - int d0, d1; - int s0; - uint64_t tmp64u; - int64_t j64; - int64_t fixedaddress; - MAYUSE(wb2); - MAYUSE(eb1); - MAYUSE(eb2); - MAYUSE(q0); - MAYUSE(q1); - MAYUSE(d0); - MAYUSE(d1); - MAYUSE(s0); - MAYUSE(j64); - #if STEP > 1 - static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 }; - #endif - - switch(opcode) { - - case 0x01: - INST_NAME("FAKE xgetbv"); - nextop = F8; - addr = fakeed(dyn, addr, ninst, nextop); - SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state - GETIP(ip); - STORE_XEMU_CALL(xRIP); - CALL(arm_ud, -1); - break; - - case 0x05: - INST_NAME("SYSCALL"); - GETIP(addr); - STORE_XEMU_CALL(xRIP); - CALL_S(x64Syscall, -1); - LOAD_XEMU_CALL(xRIP); - TABLE64(x3, addr); // expected return address - CMPSx_REG(xRIP, x3); - B_MARK(cNE); - LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); - CBZw_NEXT(w1); - MARK; - LOAD_XEMU_REM(); - jump_to_epilog(dyn, 0, xRIP, ninst); - break; - - case 0x09: - INST_NAME("WBINVD"); - break; - - case 0x0B: - INST_NAME("UD2"); - SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state - GETIP(ip); - STORE_XEMU_CALL(xRIP); - CALL(arm_ud, -1); - break; - - case 0x0D: - nextop = F8; - switch((nextop>>3)&7) { - case 1: - INST_NAME("PREFETCHW"); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); - PST_L1_STREAM_U12(ed, fixedaddress); - break; - default: //??? - DEFAULT; - } - break; - - case 0x10: - INST_NAME("MOVUPS Gx,Ex"); - nextop = F8; - GETG; - if(MODREG) { - ed = (nextop&7)+(rex.b<<3); - v1 = sse_get_reg(dyn, ninst, x1, ed); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - VMOVQ(v0, v1); - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); // no alignment issue with ARMv8 NEON :) - } - break; - case 0x11: - INST_NAME("MOVUPS Ex,Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - ed = (nextop&7)+(rex.b<<3); - v1 = sse_get_reg_empty(dyn, ninst, x1, ed); - VMOVQ(v1, v0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); - } - break; - case 0x12: - nextop = F8; - if(MODREG) { - INST_NAME("MOVHLPS Gx,Ex"); - GETGX(v0); - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVeD(v0, 0, v1, 1); - } else { - INST_NAME("MOVLPS Gx,Ex"); - GETGX(v0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VLD1_64(v0, 0, ed); - } - break; - case 0x13: - nextop = F8; - INST_NAME("MOVLPS Ex,Gx"); - GETGX(v0); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVeD(v1, 0, v0, 0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VST1_64(v0, 0, ed); // better to use VST1 than VSTR_64, to avoid NEON->VFPU transfert I assume - } - break; - case 0x14: - INST_NAME("UNPCKLPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VZIP1Q_32(v0, v0, q0); - break; - case 0x15: - INST_NAME("UNPCKHPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VZIP2Q_32(v0, v0, q0); - break; - case 0x16: - nextop = F8; - if(MODREG) { - INST_NAME("MOVLHPS Gx,Ex"); - GETGX(v0); - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVeD(v0, 1, v1, 0); - } else { - INST_NAME("MOVHPS Gx,Ex"); - GETGX(v0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VLD1_64(v0, 1, ed); - } - break; - case 0x17: - nextop = F8; - INST_NAME("MOVHPS Ex,Gx"); - GETGX(v0); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVeD(v1, 0, v0, 1); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VST1_64(v0, 1, ed); - } - break; - case 0x18: - nextop = F8; - if((nextop&0xC0)==0xC0) { - INST_NAME("NOP (multibyte)"); - } else - switch((nextop>>3)&7) { - case 0: - INST_NAME("PREFETCHh Ed"); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); - PLD_L1_STREAM_U12(ed, fixedaddress); - break; - case 1: - INST_NAME("PREFETCHh Ed"); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); - PLD_L1_KEEP_U12(ed, fixedaddress); - break; - case 2: - INST_NAME("PREFETCHh Ed"); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); - PLD_L2_KEEP_U12(ed, fixedaddress); - break; - case 3: - INST_NAME("PREFETCHh Ed"); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff, 7, rex, 0, 0); - PLD_L3_KEEP_U12(ed, fixedaddress); - break; - default: - INST_NAME("NOP (multibyte)"); - FAKEED; - } - break; - - case 0x1F: - INST_NAME("NOP (multibyte)"); - nextop = F8; - FAKEED; - break; - - case 0x28: - INST_NAME("MOVAPS Gx,Ex"); - nextop = F8; - GETG; - if(MODREG) { - ed = (nextop&7)+(rex.b<<3); - v1 = sse_get_reg(dyn, ninst, x1, ed); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - VMOVQ(v0, v1); - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); - } - break; - case 0x29: - INST_NAME("MOVAPS Ex,Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - ed = (nextop&7)+(rex.b<<3); - v1 = sse_get_reg_empty(dyn, ninst, x1, ed); - VMOVQ(v1, v0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); - } - break; - - case 0x2B: - INST_NAME("MOVNTPS Ex,Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - ed = (nextop&7)+(rex.b<<3); - v1 = sse_get_reg_empty(dyn, ninst, x1, ed); - VMOVQ(v1, v0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); - } - break; - - case 0x2E: - // no special check... - case 0x2F: - if(opcode==0x2F) {INST_NAME("COMISS Gx, Ex");} else {INST_NAME("UCOMISS Gx, Ex");} - SETFLAGS(X_ALL, SF_SET); - nextop = F8; - GETGX(v0); - if(MODREG) { - s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - } else { - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - } - FCMPS(v0, s0); - FCOMI(x1, x2); - break; - - case 0x31: - INST_NAME("RDTSC"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - CALL(ReadTSC, xRAX); // will return the u64 in xEAX - LSRx(xRDX, xRAX, 32); - MOVw_REG(xRAX, xRAX); // wipe upper part - break; - - case 0x38: - //SSE3 - nextop=F8; - switch(nextop) { - case 0x00: - INST_NAME("PSHUFB Gm, Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 0); - d0 = fpu_get_scratch(dyn); - MOVI_8(d0, 0b10001111); - VAND(d0, d0, q1); // mask the index - VTBL1_8(q0, q0, d0); - break; - - case 0x04: - INST_NAME("PMADDUBSW Gm,Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 0); - v0 = fpu_get_scratch(dyn); - v1 = fpu_get_scratch(dyn); - UXTL_8(v0, q0); // this is unsigned, so 0 extended - SXTL_8(v1, q1); // this is signed - VMULQ_16(v0, v0, v1); - SADDLPQ_16(v1, v0); - SQXTN_16(q0, v1); - break; - - case 0x0B: - INST_NAME("PMULHRSW Gm,Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 0); - SQRDMULH_16(q0, q0, q1); - break; - - default: - DEFAULT; - } - break; - - case 0x3A: // these are some more SSSE3 opcodes - opcode = F8; - switch(opcode) { - case 0x0F: - INST_NAME("PALIGNR Gm, Em, Ib"); - nextop = F8; - GETGM(q0); - GETEM(q1, 1); - u8 = F8; - if(u8>15) { - VEOR(q0, q0, q0); - } else if(u8>7) { - d0 = fpu_get_scratch(dyn); - VEOR(d0, d0, d0); - VEXT_8(q0, q0, d0, u8-8); - } else { - VEXT_8(q0, q1, q0, u8); - } - break; - default: - DEFAULT; - } - break; - - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - GETFLAGS; \ - nextop=F8; \ - GETGD; \ - if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - CSELxw(gd, ed, gd, YES); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); \ - Bcond(NO, +8); \ - LDRxw_U12(gd, ed, fixedaddress); \ - if(!rex.w) {MOVw_REG(gd, gd);} \ - } - - GOCOND(0x40, "CMOV", "Gd, Ed"); - #undef GO - case 0x50: - INST_NAME("MOVMSPKPS Gd, Ex"); - nextop = F8; - GETGD; - MOV32w(gd, 0); - if((nextop&0xC0)==0xC0) { - // EX is an xmm reg - GETEX(q0, 0); - VMOVQDto(x1, q0, 0); - LSRx(x1, x1, 31); - BFIx(gd, x1, 0, 1); - LSRx(x1, x1, 32); - BFIx(gd, x1, 1, 1); - VMOVQDto(x1, q0, 1); - LSRx(x1, x1, 31); - BFIx(gd, x1, 2, 1); - LSRx(x1, x1, 32); - BFIx(gd, x1, 3, 1); - } else { - // EX is memory - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, (0xfff<<3)-8, 7, rex, 0, 0); - LDRx_U12(x1, ed, fixedaddress+0); - LSRx(x1, x1, 31); - BFIx(gd, x1, 0, 1); - LSRx(x1, x1, 32); - BFIx(gd, x1, 1, 1); - LDRx_U12(x1, ed, fixedaddress+8); - LSRx(x1, x1, 31); - BFIx(gd, x1, 2, 1); - LSRx(x1, x1, 32); - BFIx(gd, x1, 3, 1); - } - break; - case 0x51: - INST_NAME("SQRTPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX_empty(v0); - VFSQRTQS(v0, q0); - break; - case 0x52: - INST_NAME("RSQRTPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX_empty(q1); - v0 = fpu_get_scratch(dyn); - // more precise - if(q1==q0) - v1 = fpu_get_scratch(dyn); - else - v1 = q1; - VFRSQRTEQS(v0, q0); - VFMULQS(v1, v0, q0); - VFRSQRTSQS(v1, v1, v0); - VFMULQS(q1, v1, v0); - break; - case 0x53: - INST_NAME("RCPPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX_empty(q1); - if(q0 == q1) - v1 = fpu_get_scratch(dyn); - else - v1 = q1; - v0 = fpu_get_scratch(dyn); - VFRECPEQS(v0, q0); - VFRECPSQS(v1, v0, q0); - VFMULQS(q1, v0, v1); - break; - case 0x54: - INST_NAME("ANDPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VANDQ(v0, v0, q0); - break; - case 0x55: - INST_NAME("ANDNPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VBICQ(v0, q0, v0); - break; - case 0x56: - INST_NAME("ORPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VORRQ(v0, v0, q0); - break; - case 0x57: - INST_NAME("XORPS Gx, Ex"); - nextop = F8; - GETG; - if(MODREG && ((nextop&7)+(rex.b<<3)==gd)) { - // special case for XORPS Gx, Gx - q0 = sse_get_reg_empty(dyn, ninst, x1, gd); - VEORQ(q0, q0, q0); - } else { - q0 = sse_get_reg(dyn, ninst, x1, gd); - GETEX(q1, 0); - VEORQ(q0, q0, q1); - } - break; - case 0x58: - INST_NAME("ADDPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFADDQS(v0, v0, q0); - break; - case 0x59: - INST_NAME("MULPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFMULQS(v0, v0, q0); - break; - case 0x5A: - INST_NAME("CVTPS2PD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(q1); - FCVTL(q1, q0); - break; - case 0x5B: - INST_NAME("CVTDQ2PS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX_empty(q1); - SCVTQFS(q1, q0); - break; - case 0x5C: - INST_NAME("SUBPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFSUBQS(v0, v0, q0); - break; - case 0x5D: - INST_NAME("MINPS Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - VFMINQS(v0, v0, v1); - break; - case 0x5E: - INST_NAME("DIVPS Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFDIVQS(v0, v0, q0); - break; - case 0x5F: - INST_NAME("MAXPS Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - VFMAXQS(v0, v0, v1); - break; - case 0x60: - INST_NAME("PUNPCKLBW Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - VZIP1_8(d0, d0, d1); - break; - case 0x61: - INST_NAME("PUNPCKLWD Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - VZIP1_16(d0, d0, d1); - break; - case 0x62: - INST_NAME("PUNPCKLDQ Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - VZIP1_32(d0, d0, d1); - break; - case 0x63: - INST_NAME("PACKSSWB Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - q0 = fpu_get_scratch(dyn); - VMOVeD(q0, 0, d0, 0); - VMOVeD(q0, 1, d1, 0); - SQXTN_8(d0, q0); - break; - case 0x64: - INST_NAME("PCMPGTB Gx,Ex"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VCMGT_8(v0, v0, v1); - break; - case 0x65: - INST_NAME("PCMPGTW Gx,Ex"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VCMGT_16(v0, v0, v1); - break; - case 0x66: - INST_NAME("PCMPGTD Gx,Ex"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VCMGT_32(v0, v0, v1); - break; - case 0x67: - INST_NAME("PACKUSWB Gm, Em"); - nextop = F8; - GETGM(v0); - q0 = fpu_get_scratch(dyn); - VMOVeD(q0, 0, v0, 0); - if(MODREG) { - v1 = mmx_get_reg(dyn, ninst, x1, (nextop&7)); - VMOVeD(q0, 1, v1, 0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VLD1_64(q0, 1, ed); - } - SQXTUN_8(v0, q0); - break; - case 0x68: - INST_NAME("PUNPCKHBW Gm,Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 1); - VZIP2_8(q0, q0, q1); - break; - case 0x69: - INST_NAME("PUNPCKHWD Gm,Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 1); - VZIP2_16(q0, q0, q1); - break; - case 0x6A: - INST_NAME("PUNPCKHDQ Gm,Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 1); - VZIP2_32(q0, q0, q1); - break; - case 0x6B: - INST_NAME("PACKSSDW Gm,Em"); - nextop = F8; - GETGM(v0); - if(MODREG) { - GETEM(v1, 0); - q0 = fpu_get_scratch(dyn); - VMOVeD(q0, 1, v1, 0); - } else { - q0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VLD1_64(q0, 1, ed); - } - VMOVeD(q0, 0, v0, 0); - SQXTN_16(v0, q0); - break; - - case 0x6E: - INST_NAME("MOVD Gm, Ed"); - nextop = F8; - gd = (nextop&0x38)>>3; - v0 = mmx_get_reg_empty(dyn, ninst, x3, gd); - if(MODREG) { - ed = xRAX + (nextop&7) + (rex.b<<3); - if(rex.w) { - FMOVDx(v0, ed); - } else { - FMOVSw(v0, ed); - } - } else { - v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - if(rex.w) { - VLDR64_U12(v0, ed, fixedaddress); - } else { - VLDR32_U12(v0, ed, fixedaddress); - } - } - break; - case 0x6F: - INST_NAME("MOVQ Gm, Em"); - nextop = F8; - GETG; - if(MODREG) { - v1 = mmx_get_reg(dyn, ninst, x1, nextop&7); // no rex.b on MMX - v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); - VMOVeD(v0, 0, v1, 0); - } else { - v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VLDR64_U12(v0, ed, fixedaddress); - } - break; - case 0x70: - INST_NAME("PSHUFW Gm,Em,Ib"); - nextop = F8; - gd = (nextop&0x38)>>3; - if(MODREG) { - u8 = F8; - v1 = mmx_get_reg(dyn, ninst, x1, (nextop&7)); - v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); - if(u8==0x4E) { - if(v0==v1) { - VEXT_8(v0, v0, v0, 4); // Swap Up/Lower 32bits parts - } else { - VMOVeS(v0, 0, v1, 1); - VMOVeS(v0, 1, v1, 0); - } - } else if(u8==0x00) { - // dumplicate lower 16bits to all spot - if(v0!=v1) { - VMOVeH(v0, 0, v1, 0); - } - VMOVeH(v0, 1, v1, 0); - VMOVeS(v0, 1, v1, 0); - } else if(u8==0x55) { - // dumplicate 16bits slot 1 to all spot - if(v0!=v1) { - VMOVeH(v0, 1, v1, 1); - } - VMOVeH(v0, 0, v1, 1); - VMOVeS(v0, 1, v1, 0); - } else if(u8==0xAA) { - // dumplicate 16bits slot 2 to all spot - if(v0!=v1) { - VMOVeH(v0, 2, v1, 2); - } - VMOVeH(v0, 3, v1, 2); - VMOVeS(v0, 0, v1, 1); - } else if(u8==0xFF) { - // dumplicate 16bits slot 3 to all spot - if(v0!=v1) { - VMOVeH(v0, 3, v1, 3); - } - VMOVeH(v0, 2, v1, 3); - VMOVeS(v0, 0, v1, 1); - } else if(v0!=v1) { - VMOVeH(v0, 0, v1, (u8>>(0*2))&3); - VMOVeH(v0, 1, v1, (u8>>(1*2))&3); - VMOVeH(v0, 2, v1, (u8>>(2*2))&3); - VMOVeH(v0, 3, v1, (u8>>(3*2))&3); - } else { - uint64_t swp[4] = { - (0)|(1<<8), - (2)|(3<<8), - (4)|(5<<8), - (6)|(7<<8) - }; - d0 = fpu_get_scratch(dyn); - tmp64u = swp[(u8>>(0*2))&3] | (swp[(u8>>(1*2))&3]<<16); - tmp64u |= (swp[(u8>>(2*2))&3]<<32) | (swp[(u8>>(3*2))&3]<<48); - MOV64x(x2, tmp64u); - VMOVQDfrom(d0, 0, x2); - VTBL1_8(v0, v1, d0); - } - } else { - v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - if (u8) { - i32 = -1; - for (int i=0; i<4; ++i) { - int32_t idx = (u8>>(i*2))&3; - if(idx!=i32) { - ADDx_U12(x2, ed, idx*2); - i32 = idx; - } - VLD1_16(v0, i, x2); - } - } else { - VLD1R_16(v0, ed); - } - } - break; - case 0x71: - nextop = F8; - switch((nextop>>3)&7) { - case 2: - INST_NAME("PSRLW Em, Ib"); - GETEM(q0, 1); - u8 = F8; - if(u8) { - if (u8>15) { - VEOR(q0, q0, q0); - } else if(u8) { - VSHR_16(q0, q0, u8); - } - if(!MODREG) { - VSTR64_U12(q0, ed, fixedaddress); - } - } - break; - case 4: - INST_NAME("PSRAW Ex, Ib"); - GETEM(q0, 1); - u8 = F8; - if(u8>15) u8=15; - if(u8) { - VSSHR_16(q0, q0, u8); - } - if(!MODREG) { - VSTR64_U12(q0, ed, fixedaddress); - } - break; - case 6: - INST_NAME("PSLLW Ex, Ib"); - GETEM(q0, 1); - u8 = F8; - if(u8) { - if (u8>15) { - VEOR(q0, q0, q0); - } else { - VSHL_16(q0, q0, u8); - } - if(!MODREG) { - VSTR64_U12(q0, ed, fixedaddress); - } - } - break; - default: - *ok = 0; - DEFAULT; - } - break; - case 0x72: - nextop = F8; - switch((nextop>>3)&7) { - case 2: - INST_NAME("PSRLD Em, Ib"); - GETEM(d0, 1); - u8 = F8; - if(u8) { - if (u8>31) { - VEOR(d0, d0, d0); - } else if(u8) { - VSHR_32(d0, d0, u8); - } - if(!MODREG) { - VSTR64_U12(d0, ed, fixedaddress); - } - } - break; - case 4: - INST_NAME("PSRAD Em, Ib"); - GETEM(d0, 1); - u8 = F8; - if(u8>31) u8=31; - if(u8) { - VSSHR_32(d0, d0, u8); - } - if(!MODREG) { - VSTR64_U12(d0, ed, fixedaddress); - } - break; - case 6: - INST_NAME("PSLLD Em, Ib"); - GETEM(d0, 1); - u8 = F8; - if(u8) { - if (u8>31) { - VEOR(d0, d0, d0); - } else { - VSHL_32(d0, d0, u8); - } - if(!MODREG) { - VSTR64_U12(d0, ed, fixedaddress); - } - } - break; - default: - DEFAULT; - } - break; - case 0x73: - nextop = F8; - switch((nextop>>3)&7) { - case 2: - INST_NAME("PSRLQ Em, Ib"); - GETEM(q0, 1); - u8 = F8; - if(u8) { - if (u8>63) { - VEOR(q0, q0, q0); - } else if(u8) { - USHR_64(q0, q0, u8); - } - PUTEM(q0); - } - break; - case 6: - INST_NAME("PSLLQ Em, Ib"); - GETEM(q0, 1); - u8 = F8; - if(u8) { - if (u8>63) { - VEOR(q0, q0, q0); - } else { - SHL_64(q0, q0, u8); - } - PUTEM(q0); - } - break; - default: - DEFAULT; - } - break; - case 0x74: - INST_NAME("PCMPEQB Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - VCMEQ_8(d0, d0, d1); - break; - case 0x75: - INST_NAME("PCMPEQW Gm,Em"); - nextop = F8; - GETGM(v0); - GETEM(q0, 0); - VCMEQ_16(v0, v0, q0); - break; - case 0x76: - INST_NAME("PCMPEQD Gm,Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VCMEQ_32(v0, v0, v1); - break; - case 0x77: - INST_NAME("EMMS"); - // empty MMX, FPU now usable - mmx_purgecache(dyn, ninst, x1); - /*emu->top = 0; - emu->fpu_stack = 0;*/ //TODO: Check if something is needed here? - break; - - case 0x7E: - INST_NAME("MOVD Ed, Gm"); - nextop = F8; - GETGM(v0); - if((nextop&0xC0)==0xC0) { - ed = xRAX + (nextop&7) + (rex.b<<3); - if(rex.w) { - VMOVQDto(ed, v0, 0); - } else { - VMOVSto(ed, v0, 0); - MOVxw_REG(ed, ed); - } - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - if(rex.w) { - VSTR64_U12(v0, ed, fixedaddress); - } else { - VSTR32_U12(v0, ed, fixedaddress); - } - } - break; - case 0x7F: - INST_NAME("MOVQ Em, Gm"); - nextop = F8; - GETGM(v0); - if(MODREG) { - v1 = mmx_get_reg_empty(dyn, ninst, x1, nextop&7); - VMOV(v1, v0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VSTR64_U12(v0, ed, fixedaddress); - } - break; - - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - i32_ = F32S; \ - BARRIER(2); \ - JUMP(addr+i32_);\ - GETFLAGS; \ - if(dyn->insts[ninst].x64.jmp_insts==-1) { \ - /* out of the block */ \ - i32 = dyn->insts[ninst+1].address-(dyn->arm_size); \ - Bcond(NO, i32); \ - jump_to_next(dyn, addr+i32_, 0, ninst); \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->arm_size); \ - Bcond(YES, i32); \ - } \ - - GOCOND(0x80, "J", "Id"); - #undef GO - - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - GETFLAGS; \ - nextop=F8; \ - CSETw(x3, YES); \ - if(MODREG) { \ - if(rex.rex) { \ - eb1= xRAX+(nextop&7)+(rex.b<<3); \ - eb2 = 0; \ - } else { \ - ed = (nextop&7); \ - eb2 = (ed>>2)*8; \ - eb1 = xRAX+(ed&3); \ - } \ - BFIx(eb1, x3, eb2, 8); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); \ - STRB_U12(x3, ed, fixedaddress); \ - } - - GOCOND(0x90, "SET", "Eb"); - #undef GO - - case 0xA2: - INST_NAME("CPUID"); - MOVx_REG(x1, xRAX); - CALL_(my_cpuid, -1, 0); - break; - case 0xA3: - INST_NAME("BT Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) - ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; - LDRxw_U12(x1, x3, fixedaddress); - ed = x1; - } - if(rex.w) { - ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f - } - LSRxw_REG(x4, ed, x2); - BFIw(xFlags, x4, F_CF, 1); - break; - case 0xA4: - nextop = F8; - INST_NAME("SHLD Ed, Gd, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED(1); - GETGD; - u8 = F8; - emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4); - WBACK; - break; - case 0xA5: - nextop = F8; - INST_NAME("SHLD Ed, Gd, CL"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - UXTBw(x3, xRCX); - SETFLAGS(X_ALL, SF_SET); - GETEDW(x4, x1, 0); - GETGD; - MOVxw_REG(x2, gd); - CALL_(rex.w?((void*)shld64):((void*)shld32), ed, x4); - WBACK; - break; - - case 0xAB: - INST_NAME("BTS Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) - ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; - LDRxw_U12(x1, x3, fixedaddress); - ed = x1; - wback = x3; - } - if(rex.w) { - ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f - } - LSRxw_REG(x4, ed, x2); - if(rex.w) { - ANDSx_mask(x4, x4, 1, 0, 0); //mask=1 - } else { - ANDSw_mask(x4, x4, 0, 0); //mask=1 - } - BFIw(xFlags, x4, F_CF, 1); - MOV32w(x4, 1); - LSLxw_REG(x4, x4, x2); - EORxw_REG(x4, ed, x4); - CSELxw(ed, ed, x4, cNE); - if(wback) { - STRxw_U12(ed, wback, fixedaddress); - } - break; - case 0xAC: - nextop = F8; - INST_NAME("SHRD Ed, Gd, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED(1); - GETGD; - u8 = F8; - emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4); - WBACK; - break; - case 0xAD: - nextop = F8; - INST_NAME("SHRD Ed, Gd, CL"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET); - UXTBw(x3, xRCX); - GETEDW(x4, x1, 0); - GETGD; - MOVxw_REG(x2, gd); - CALL_(rex.w?((void*)shrd64):((void*)shrd32), ed, x4); - WBACK; - break; - - case 0xAE: - nextop = F8; - if((nextop&0xF8)==0xE8) { - INST_NAME("LFENCE"); - } else - if((nextop&0xF8)==0xF0) { - INST_NAME("MFENCE"); - } else - if((nextop&0xF8)==0xF8) { - INST_NAME("SFENCE"); - } else { - switch((nextop>>3)&7) { - case 0: - INST_NAME("FXSAVE Ed"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - fpu_purgecache(dyn, ninst, x1, x2, x3); - if(MODREG) { - DEFAULT; - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(rex.w?((void*)fpu_fxsave64):((void*)fpu_fxsave32), -1); - } - break; - case 1: - INST_NAME("FXRSTOR Ed"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - fpu_purgecache(dyn, ninst, x1, x2, x3); - if(MODREG) { - DEFAULT; - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(rex.w?((void*)fpu_fxrstor64):((void*)fpu_fxrstor32), -1); - } - break; - case 2: - INST_NAME("LDMXCSR Md"); - GETED(0); - STRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr)); - break; - case 3: - INST_NAME("STMXCSR Md"); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - LDRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr)); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr)); - STRw_U12(x4, ed, fixedaddress); - } - break; - default: - DEFAULT; - } - } - break; - case 0xAF: - INST_NAME("IMUL Gd, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - nextop = F8; - GETGD; - GETED(0); - if(rex.w) { - // 64bits imul - UFLAG_IF { - SMULH(x3, gd, ed); - MULx(gd, gd, ed); - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x3, d_imul64); - } else { - MULxw(gd, gd, ed); - } - } else { - // 32bits imul - UFLAG_IF { - SMULL(gd, gd, ed); - UFLAG_RES(gd); - LSRx(x3, gd, 32); - UFLAG_OP1(x3); - UFLAG_DF(x3, d_imul32); - MOVw_REG(gd, gd); - } else { - MULxw(gd, gd, ed); - } - } - break; - - case 0xB3: - INST_NAME("BTR Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) - ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; - LDRxw_U12(x1, x3, fixedaddress); - ed = x1; - wback = x3; - } - if(rex.w) { - ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f - } - LSRxw_REG(x4, ed, x2); - if(rex.w) { - ANDSx_mask(x4, x4, 1, 0, 0); //mask=1 - } else { - ANDSw_mask(x4, x4, 0, 0); //mask=1 - } - BFIw(xFlags, x4, F_CF, 1); - MOV32w(x4, 1); - LSLxw_REG(x4, x4, x2); - EORxw_REG(x4, ed, x4); - CSELxw(ed, ed, x4, cEQ); - if(wback) { - STRxw_U12(ed, wback, fixedaddress); - } - break; - - case 0xB6: - INST_NAME("MOVZX Gd, Eb"); - nextop = F8; - GETGD; - if(MODREG) { - if(rex.rex) { - eb1 = xRAX+(nextop&7)+(rex.b<<3); - eb2 = 0; \ - } else { - ed = (nextop&7); - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = (ed&4)>>2; // L or H - } - UBFXxw(gd, eb1, eb2*8, 8); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); - LDRB_U12(gd, ed, fixedaddress); - } - break; - case 0xB7: - INST_NAME("MOVZX Gd, Ew"); - nextop = F8; - GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - UBFXxw(gd, ed, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - LDRH_U12(gd, ed, fixedaddress); - } - break; - - case 0xBA: - nextop = F8; - switch((nextop>>3)&7) { - case 4: - INST_NAME("BT Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - gd = x2; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xff0<<2, 3, rex, 0, 1); - LDRxw_U12(x1, wback, fixedaddress); - ed = x1; - } - u8 = F8; - u8&=rex.w?0x3f:0x1f; - if(u8) { - LSRxw(x1, ed, u8); - ed = x1; - } - BFIw(xFlags, ed, F_CF, 1); - break; - case 5: - INST_NAME("BTS Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xff0<<2, 3, rex, 0, 1); - LDRxw_U12(x1, wback, fixedaddress); - ed = x1; - } - u8 = F8; - u8&=(rex.w?0x3f:0x1f); - if(u8) { - LSRxw(x4, ed, u8); - } else { - MOVw_REG(x4, ed); - } - BFIw(xFlags, x4, F_CF, 1); - TBNZ_MARK3(x4, 0); // bit already set, jump to next instruction - MOV32w(x4, 1); - EORxw_REG_LSL(ed, ed, x4, u8); - if(wback) { - STRxw_U12(ed, wback, fixedaddress); - } - MARK3; - break; - case 6: - INST_NAME("BTR Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xff0<<2, 3, rex, 0, 1); - LDRxw_U12(x1, wback, fixedaddress); - ed = x1; - } - u8 = F8; - u8&=(rex.w?0x3f:0x1f); - if(u8) { - LSRxw(x4, ed, u8); - } else { - MOVw_REG(x4, ed); - } - BFIw(xFlags, x4, F_CF, 1); - TBZ_MARK3(x4, 0); // bit already clear, jump to next instruction - //MOVW(x14, 1); // already 0x01 - EORxw_REG_LSL(ed, ed, x4, u8); - if(wback) { - STRxw_U12(ed, wback, fixedaddress); - } - MARK3; - break; - case 7: - INST_NAME("BTC Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xff0<<2, 3, rex, 0, 1); - LDRxw_U12(x1, wback, fixedaddress); - ed = x1; - } - u8 = F8; - u8&=(rex.w?0x3f:0x1f); - if(u8) { - LSRxw(x4, ed, u8); - } else { - MOVw_REG(x4, ed); - } - BFIw(xFlags, x4, F_CF, 1); - MOV32w(x4, 1); - EORxw_REG_LSL(ed, ed, x4, u8); - if(wback) { - STRxw_U12(ed, wback, fixedaddress); - } - MARK3; - break; - default: - DEFAULT; - } - break; - case 0xBB: - INST_NAME("BTC Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) - ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; - LDRxw_U12(x1, x3, fixedaddress); - ed = x1; - wback = x3; - } - if(rex.w) { - ANDx_mask(x2, gd, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f - } - LSRxw_REG(x4, ed, x2); - if(rex.w) { - ANDx_mask(x4, x4, 1, 0, 0); //mask=1 - } else { - ANDw_mask(x4, x4, 0, 0); //mask=1 - } - BFIw(xFlags, x4, F_CF, 1); - MOV32w(x4, 1); - LSLxw_REG(x4, x4, x2); - EORxw_REG(ed, ed, x4); - if(wback) { - STRxw_U12(ed, wback, fixedaddress); - } - break; - case 0xBC: - INST_NAME("BSF Gd, Ed"); - SETFLAGS(X_ZF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETED(0); - GETGD; - TSTxw_REG(ed, ed); - B_MARK(cEQ); - RBITxw(x1, ed); // reverse - CLZxw(gd, x1); // x2 gets leading 0 == BSF - MARK; - CSETw(x1, cEQ); //ZF not set - BFIw(xFlags, x1, F_ZF, 1); - break; - case 0xBD: - INST_NAME("BSR Gd, Ed"); - SETFLAGS(X_ZF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETED(0); - GETGD; - TSTxw_REG(ed, ed); - B_MARK(cEQ); - CLZxw(gd, ed); // x2 gets leading 0 - SUBxw_U12(gd, gd, rex.w?63:31); - NEGxw_REG(gd, gd); // complement - MARK; - CSETw(x1, cEQ); //ZF not set - BFIw(xFlags, x1, F_ZF, 1); - break; - case 0xBE: - INST_NAME("MOVSX Gd, Eb"); - nextop = F8; - GETGD; - if(MODREG) { - if(rex.rex) { - wback = xRAX+(nextop&7)+(rex.b<<3); - wb2 = 0; - } else { - wback = (nextop&7); - wb2 = (wback>>2)*8; - wback = xRAX+(wback&3); - } - SBFXxw(gd, wback, wb2, 8); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0xfff, 0, rex, 0, 0); - LDRSBxw_U12(gd, ed, fixedaddress); - } - break; - case 0xBF: - INST_NAME("MOVSX Gd, Ew"); - nextop = F8; - GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - SXTHxw(gd, ed); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - LDRSHxw_U12(gd, ed, fixedaddress); - } - break; - - case 0xC2: - INST_NAME("CMPPS Gx, Ex, Ib"); - nextop = F8; - GETGX(v0); - GETEX(v1, 1); - u8 = F8; - switch(u8&7) { - // the inversion of the params in the comparison is there to handle NaN the same way SSE does - case 0: FCMEQQS(v0, v0, v1); break; // Equal - case 1: FCMGTQS(v0, v1, v0); break; // Less than - case 2: FCMGEQS(v0, v1, v0); break; // Less or equal - case 3: FCMEQQS(v0, v0, v0); - if(v0!=v1) { - q0 = fpu_get_scratch(dyn); - FCMEQQS(q0, v1, v1); - VANDQ(v0, v0, q0); - } - VMVNQ(v0, v0); - break; // NaN (NaN is not equal to himself) - case 4: FCMEQQS(v0, v0, v1); VMVNQ(v0, v0); break; // Not Equal (or unordered on ARM, not on X86...) - case 5: FCMGTQS(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or equal or unordered - case 6: FCMGEQS(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or unordered - case 7: FCMEQQS(v0, v0, v0); - if(v0!=v1) { - q0 = fpu_get_scratch(dyn); - FCMEQQS(q0, v1, v1); - VANDQ(v0, v0, q0); - } - break; // not NaN - } - break; - case 0xC3: - INST_NAME("MOVNTI Ed, Gd"); - nextop=F8; - GETGD; - if(MODREG) { // reg <= reg - MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); - } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - STRxw_U12(gd, ed, fixedaddress); - } - break; - case 0xC4: - INST_NAME("PINSRW Gm,Ed,Ib"); - nextop = F8; - GETGM(v0); - if(MODREG) { - u8 = (F8)&3; - ed = xRAX+(nextop&7)+(rex.b<<3); - VMOVQHfrom(v0, u8, ed); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 1); - u8 = (F8)&3; - VLD1_16(v0, u8, wback); - } - break; - case 0xC5: - INST_NAME("PEXTRW Gd,Em,Ib"); - nextop = F8; - GETGD; - if(MODREG) { - GETEM(v0, 1); - u8 = (F8)&3; - VMOVHto(gd, v0, u8); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 1); - u8 = (F8)&3; - LDRH_U12(gd, wback, u8*2); - } - break; - case 0xC6: - INST_NAME("SHUFPS Gx, Ex, Ib"); - nextop = F8; - GETGX(v0); - if(!MODREG) - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - d0 = fpu_get_scratch(dyn); - // first two elements from Gx - for(int i=0; i<2; ++i) { - VMOVeS(d0, i, v0, (u8>>(i*2)&3)); - } - // second two from Ex - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - for(int i=2; i<4; ++i) { - VMOVeS(d0, i, v1, (u8>>(i*2)&3)); - } - } else { - for(int i=2; i<4; ++i) { - ADDx_U12(x2, ed, (u8>>(i*2)&3)*4); - VLD1_32(d0, i, x2); - } - } - VMOVQ(v0, d0); - break; - - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: /* BSWAP reg */ - INST_NAME("BSWAP Reg"); - gd = xRAX+(opcode&7)+(rex.b<<3); - REVxw(gd, gd); - break; - - case 0xD3: - INST_NAME("PSRLQ Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - if(MODREG) - q0 = fpu_get_scratch(dyn); - else - q0 = d1; - NEG_64(q0, d1); - USHL_R_64(d0, d0, q0); - break; - - case 0xD5: - INST_NAME("PMULLW Gm, Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 0); - VMUL_16(q0, q0, q1); - break; - - case 0xD7: - nextop = F8; - INST_NAME("PMOVMSKB Gd, Em"); - v0 = fpu_get_scratch(dyn); - v1 = fpu_get_scratch(dyn); - q1 = fpu_get_scratch(dyn); - GETEM(q0, 0); - GETGD; - TABLE64(x1, (uintptr_t)&mask_shift8); - VLDR64_U12(v0, x1, 0); // load shift - MOVI_8(v1, 0x80); // load mask - VAND(q1, v1, q0); - USHL_8(q1, q1, v0); // shift - UADDLV_8(q1, q1); // accumalte - VMOVBto(gd, q1, 0); - break; - case 0xD8: - INST_NAME("PSUBUSB Gm, Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 0); - UQSUB_8(q0, q0, q1); - break; - case 0xD9: - INST_NAME("PSUBUSW Gm, Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 0); - UQSUB_16(q0, q0, q1); - break; - case 0xDA: - INST_NAME("PMINUB Gm, Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - UMIN_8(d0, d0, d1); - break; - case 0xDB: - INST_NAME("PAND Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VAND(v0, v0, v1); - break; - case 0xDC: - INST_NAME("PADDUSB Gm,Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 0); - UQADD_8(q0, q0, q1); - break; - case 0xDD: - INST_NAME("PADDUSW Gm,Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 0); - UQADD_16(q0, q0, q1); - break; - case 0xDE: - INST_NAME("PMAXUB Gm, Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - UMAX_8(d0, d0, d1); - break; - case 0xDF: - INST_NAME("PANDN Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VBIC(v0, v1, v0); - break; - case 0xE0: - INST_NAME("PAVGB Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - URHADD_8(v0, v0, v1); - break; - - case 0xE3: - INST_NAME("PAVGW Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - URHADD_16(d0, d0, d1); - break; - - case 0xE5: - INST_NAME("PMULHW Gm,Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - q0 = fpu_get_scratch(dyn); - VSMULL_16(q0, v0, v1); - SQSHRN_16(v0, q0, 16); - break; - - case 0xE7: - INST_NAME("MOVNTQ Em, Gm"); // Non Temporal par not handled for now - nextop = F8; - gd = (nextop&0x38)>>3; - if((nextop&0xC0)==0xC0) { - DEFAULT; - } else { - v0 = mmx_get_reg(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VSTR64_U12(v0, ed, fixedaddress); - } - break; - case 0xE8: - INST_NAME("PSUBSB Gm,Em"); - nextop = F8; - GETGM(v0); - GETEM(q0, 0); - SQSUB_8(v0, v0, q0); - break; - case 0xE9: - INST_NAME("PSUBSW Gm,Em"); - nextop = F8; - GETGM(v0); - GETEM(q0, 0); - SQSUB_16(v0, v0, q0); - break; - - case 0xEB: - INST_NAME("POR Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VORR(v0, v0, v1); - break; - case 0xEC: - INST_NAME("PADDSB Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - SQADD_8(d0, d0, d1); - break; - case 0xED: - INST_NAME("PADDSW Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - SQADD_16(d0, d0, d1); - break; - - case 0xEF: - INST_NAME("PXOR Gm,Em"); - nextop = F8; - gd = ((nextop&0x38)>>3); - if(MODREG && ((nextop&7))==gd) { - // special case for PXOR Gx, Gx - q0 = mmx_get_reg_empty(dyn, ninst, x1, gd); - VEOR(q0, q0, q0); - } else { - q0 = mmx_get_reg(dyn, ninst, x1, gd); - GETEM(q1, 0); - VEOR(q0, q0, q1); - } - break; - - case 0xF2: - INST_NAME("PSLLD Gm,Em"); - nextop = F8; - GETGM(d0); - GETEM(d1, 0); - v0 = fpu_get_scratch(dyn); - VMOVeD(v0, 0, d1, 0); - VMOVeD(v0, 1, d1, 0); - SQXTN_32(v0, v0); // 2*q1 in 32bits now - SSHL_32(d0, d0, v0); - break; - - case 0xF5: - INST_NAME("PMADDWD Gx, Ex"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - q0 = fpu_get_scratch(dyn); - VSMULL_16(q0, v0, v1); - VADDPQ_32(q0, q0, q0); //ADDP from Q to non-Q? - VMOVQ(v0, q0); - break; - case 0xF6: - INST_NAME("PSADBW Gm, Em"); - nextop = F8; - GETGM(q0); - GETEM(q1, 0); - d0 = fpu_get_scratch(dyn); - d1 = fpu_get_scratch(dyn); - VEOR(d1, d1, d1); // is it necessary? - UABDL_8(d0, q0, q1); - UADDLVQ_16(d1, d0); - VMOVeD(q0, 0, d1, 0); - break; - - case 0xF8: - INST_NAME("PSUBB Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VSUB_8(v0, v0, v1); - break; - case 0xF9: - INST_NAME("PSUBW Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VSUB_16(v0, v0, v1); - break; - case 0xFA: - INST_NAME("PSUBD Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VSUB_32(v0, v0, v1); - break; - - case 0xFC: - INST_NAME("PADDB Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VADD_8(v0, v0, v1); - break; - case 0xFD: - INST_NAME("PADDW Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VADD_16(v0, v0, v1); - break; - case 0xFE: - INST_NAME("PADDD Gm, Em"); - nextop = F8; - GETGM(v0); - GETEM(v1, 0); - VADD_32(v0, v0, v1); - break; - - default: - DEFAULT; - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_64.c b/src/dynarec/dynarec_arm64_64.c deleted file mode 100644 index 871d0e20..00000000 --- a/src/dynarec/dynarec_arm64_64.c +++ /dev/null @@ -1,880 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - -#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) - -uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t opcode = F8; - uint8_t nextop; - uint8_t u8; - uint8_t gd, ed, eb1, eb2, gb1, gb2; - uint8_t wback, wb1, wb2, wb; - int64_t i64, j64; - int v0; - int q0; - int d0; - int64_t fixedaddress; - MAYUSE(eb1); - MAYUSE(eb2); - MAYUSE(wb1); - MAYUSE(wb2); - MAYUSE(gb1); - MAYUSE(gb2); - MAYUSE(j64); - MAYUSE(d0); - MAYUSE(q0); - MAYUSE(v0); - - while((opcode==0xF2) || (opcode==0xF3)) { - rep = opcode-0xF1; - opcode = F8; - } - // REX prefix before the F0 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } - - switch(opcode) { - - case 0x03: - INST_NAME("ADD Gd, Seg:Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - grab_segdata(dyn, addr, ninst, x4, seg); - nextop = F8; - GETGD; - GETEDO(x4, 0); - emit_add32(dyn, ninst, rex, gd, ed, x3, x4); - break; - - case 0x0F: - opcode = F8; - switch(opcode) { - - case 0x10: - switch(rep) { - case 1: - INST_NAME("MOVSD Gx, Ex"); - nextop = F8; - GETG; - if(MODREG) { - ed = (nextop&7)+ (rex.b<<3); - v0 = sse_get_reg(dyn, ninst, x1, gd); - d0 = sse_get_reg(dyn, ninst, x1, ed); - VMOVeD(v0, 0, d0, 0); - } else { - grab_segdata(dyn, addr, ninst, x4, seg); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - ADDx_REG(x4, x4, ed); - VLDR64_U12(v0, x4, fixedaddress); // upper part reseted - } - break; - case 2: - INST_NAME("MOVSS Gx, Ex"); - nextop = F8; - GETG; - if(MODREG) { - v0 = sse_get_reg(dyn, ninst, x1, gd); - q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - VMOVeS(v0, 0, q0, 0); - } else { - grab_segdata(dyn, addr, ninst, x4, seg); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - ADDx_REG(x4, x4, ed); - VLDR32_U12(v0, x4, fixedaddress); - } - break; - default: - DEFAULT; - } - break; - case 0x11: - switch(rep) { - case 1: - INST_NAME("MOVSD Ex, Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - ed = (nextop&7)+ (rex.b<<3); - d0 = sse_get_reg(dyn, ninst, x1, ed); - VMOVeD(d0, 0, v0, 0); - } else { - grab_segdata(dyn, addr, ninst, x4, seg); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - ADDx_REG(x4, x4, ed); - VSTR64_U12(v0, x4, fixedaddress); - } - break; - case 2: - INST_NAME("MOVSS Ex, Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - VMOVeS(q0, 0, v0, 0); - } else { - grab_segdata(dyn, addr, ninst, x4, seg); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - ADDx_REG(x4, x4, ed); - VSTR32_U12(v0, x4, fixedaddress); - } - break; - default: - DEFAULT; - } - break; - - case 0xAF: - INST_NAME("IMUL Gd, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - nextop = F8; - grab_segdata(dyn, addr, ninst, x4, seg); - GETGD; - GETEDO(x4, 0); - if(rex.w) { - // 64bits imul - UFLAG_IF { - SMULH(x3, gd, ed); - MULx(gd, gd, ed); - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x3, d_imul64); - } else { - MULxw(gd, gd, ed); - } - } else { - // 32bits imul - UFLAG_IF { - SMULL(gd, gd, ed); - UFLAG_RES(gd); - LSRx(x3, gd, 32); - UFLAG_OP1(x3); - UFLAG_DF(x3, d_imul32); - MOVw_REG(gd, gd); - } else { - MULxw(gd, gd, ed); - } - } - break; - - default: - DEFAULT; - } - break; - - case 0x33: - INST_NAME("XOR Gd, Seg:Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - grab_segdata(dyn, addr, ninst, x4, seg); - nextop = F8; - GETGD; - GETEDO(x4, 0); - emit_xor32(dyn, ninst, rex, gd, ed, x3, x4); - break; - - case 0x39: - INST_NAME("CMP Seg:Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - grab_segdata(dyn, addr, ninst, x4, seg); - nextop = F8; - GETGD; - GETEDO(x4, 0); - emit_cmp32(dyn, ninst, rex, ed, gd, x3, x4, x5); - break; - - case 0x66: - addr = dynarec64_6664(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - - case 0x80: - nextop = F8; - grab_segdata(dyn, addr, ninst, x1, seg); - switch((nextop>>3)&7) { - case 0: //ADD - INST_NAME("ADD Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEBO(x1, 1); - u8 = F8; - emit_add8c(dyn, ninst, x1, u8, x2, x4); - EBBACK; - break; - case 1: //OR - INST_NAME("OR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEBO(x1, 1); - u8 = F8; - emit_or8c(dyn, ninst, x1, u8, x2, x4); - EBBACK; - break; - case 2: //ADC - INST_NAME("ADC Eb, Ib"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEBO(x1, 1); - u8 = F8; - emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); - EBBACK; - break; - case 3: //SBB - INST_NAME("SBB Eb, Ib"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEBO(x1, 1); - u8 = F8; - emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); - EBBACK; - break; - case 4: //AND - INST_NAME("AND Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEBO(x1, 1); - u8 = F8; - emit_and8c(dyn, ninst, x1, u8, x2, x4); - EBBACK; - break; - case 5: //SUB - INST_NAME("SUB Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEBO(x1, 1); - u8 = F8; - emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); - EBBACK; - break; - case 6: //XOR - INST_NAME("XOR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEBO(x1, 1); - u8 = F8; - emit_xor8c(dyn, ninst, x1, u8, x2, x4); - EBBACK; - break; - case 7: //CMP - INST_NAME("CMP Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEBO(x1, 1); - u8 = F8; - if(u8) { - MOV32w(x2, u8); - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - } else { - emit_cmp8_0(dyn, ninst, x1, x3, x4); - } - break; - default: - DEFAULT; - } - break; - case 0x81: - case 0x83: - nextop = F8; - grab_segdata(dyn, addr, ninst, x6, seg); - switch((nextop>>3)&7) { - case 0: //ADD - if(opcode==0x81) {INST_NAME("ADD Ed, Id");} else {INST_NAME("ADD Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5); - WBACKO(x6); - break; - case 1: //OR - if(opcode==0x81) {INST_NAME("OR Ed, Id");} else {INST_NAME("OR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); - WBACKO(x6); - break; - case 2: //ADC - if(opcode==0x81) {INST_NAME("ADC Ed, Id");} else {INST_NAME("ADC Ed, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - emit_adc32(dyn, ninst, rex, ed, x5, x3, x4); - WBACKO(x6); - break; - case 3: //SBB - if(opcode==0x81) {INST_NAME("SBB Ed, Id");} else {INST_NAME("SBB Ed, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4); - WBACKO(x6); - break; - case 4: //AND - if(opcode==0x81) {INST_NAME("AND Ed, Id");} else {INST_NAME("AND Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_and32c(dyn, ninst, rex, ed, i64, x3, x4); - WBACKO(x6); - break; - case 5: //SUB - if(opcode==0x81) {INST_NAME("SUB Ed, Id");} else {INST_NAME("SUB Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5); - WBACKO(x6); - break; - case 6: //XOR - if(opcode==0x81) {INST_NAME("XOR Ed, Id");} else {INST_NAME("XOR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - emit_xor32c(dyn, ninst, rex, ed, i64, x3, x4); - WBACKO(x6); - break; - case 7: //CMP - if(opcode==0x81) {INST_NAME("CMP Ed, Id");} else {INST_NAME("CMP Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - if(i64) { - MOV64xw(x2, i64); - emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5); - } else - emit_cmp32_0(dyn, ninst, rex, ed, x3, x4); - break; - } - break; - case 0x8A: - INST_NAME("MOV Gb, Eb"); - nextop = F8; - if(rex.rex) { - gb1 = gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); - gb2=0; - } else { - gd = (nextop&0x38)>>3; - gb1 = xRAX+(gd&3); - gb2 = ((gd&4)>>2); - } - if(MODREG) { - if(rex.rex) { - wback = xRAX+(nextop&7)+(rex.b<<3); - wb2 = 0; - } else { - wback = (nextop&7); - wb2 = (wback>>2); - wback = xRAX+(wback&3); - } - if(wb2) { - UBFXw(x4, wback, wb2*8, 8); - ed = x4; - } else { - ed = wback; - } - } else { - grab_segdata(dyn, addr, ninst, x4, seg); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 0); - LDRB_REG(x4, wback, x4); - ed = x4; - } - BFIx(gb1, ed, gb2*8, 8); - break; - case 0x89: - INST_NAME("MOV Seg:Ed, Gd"); - grab_segdata(dyn, addr, ninst, x4, seg); - nextop=F8; - GETGD; - if(MODREG) { // reg <= reg - MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); - } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); - STRxw_REG(gd, ed, x4); - } - break; - - case 0x8B: - INST_NAME("MOV Gd, Seg:Ed"); - grab_segdata(dyn, addr, ninst, x4, seg); - nextop=F8; - GETGD; - if(MODREG) { // reg <= reg - MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); - LDRxw_REG(gd, ed, x4); - } - break; - - case 0xC6: - INST_NAME("MOV Seg:Eb, Ib"); - grab_segdata(dyn, addr, ninst, x4, seg); - nextop=F8; - if(MODREG) { // reg <= u8 - u8 = F8; - if(!rex.rex) { - ed = (nextop&7); - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = (ed&4)>>2; // L or H - } else { - eb1 = xRAX+(nextop&7)+(rex.b<<3); - eb2 = 0; - } - MOV32w(x3, u8); - BFIx(eb1, x3, eb2*8, 8); - } else { // mem <= u8 - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - MOV32w(x3, u8); - STRB_REG(x3, ed, x4); - } - break; - case 0xC7: - INST_NAME("MOV Seg:Ed, Id"); - grab_segdata(dyn, addr, ninst, x4, seg); - nextop=F8; - if(MODREG) { // reg <= i32 - i64 = F32S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV64xw(ed, i64); - } else { // mem <= i32 - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 4); - i64 = F32S; - MOV64xw(x3, i64); - STRxw_REG(x3, ed, x4); - } - break; - - case 0xD1: - nextop = F8; - grab_segdata(dyn, addr, ninst, x6, seg); - switch((nextop>>3)&7) { - case 0: - INST_NAME("ROL Ed, 1"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - GETEDO(x6, 0); - emit_rol32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACKO(x6); - break; - case 1: - INST_NAME("ROR Ed, 1"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - GETEDO(x6, 0); - emit_ror32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACKO(x6); - break; - case 2: - INST_NAME("RCL Ed, 1"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - MOV32w(x2, 1); - GETEDO(x6, 0); - if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} - if(ed!=x1) {MOVxw_REG(x1, ed);} - CALL_(rcl32, ed, x6); - WBACK; - break; - case 3: - INST_NAME("RCR Ed, 1"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - MOV32w(x2, 1); - GETEDO(x6, 0); - if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} - if(ed!=x1) {MOVxw_REG(x1, ed);} - CALL_(rcr32, ed, x6); - WBACK; - break; - case 4: - case 6: - INST_NAME("SHL Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETEDO(x6, 0); - emit_shl32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACKO(x6); - break; - case 5: - INST_NAME("SHR Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETEDO(x6, 0); - emit_shr32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACKO(x6); - break; - case 7: - INST_NAME("SAR Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETEDO(x6, 0); - emit_sar32c(dyn, ninst, rex, ed, 1, x3, x4); - WBACKO(x6); - break; - } - break; - case 0xD3: - nextop = F8; - grab_segdata(dyn, addr, ninst, x6, seg); - switch((nextop>>3)&7) { - case 0: - INST_NAME("ROL Ed, CL"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - MOV64xw(x4, (rex.w?64:32)); - SUBx_REG(x3, x4, x3); - GETEDO(x6, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - RORxw_REG(ed, ed, x3); - WBACKO(x6); - UFLAG_IF { // calculate flags directly - CMPSw_U12(x3, rex.w?63:31); - B_MARK(cNE); - LSRxw(x4, ed, rex.w?63:31); - ADDxw_REG(x4, x4, ed); - BFIw(xFlags, x4, F_OF, 1); - MARK; - BFIw(xFlags, ed, F_CF, 1); - UFLAG_DF(x2, d_none); - } - break; - case 1: - INST_NAME("ROR Ed, CL"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETEDO(x6, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - RORxw_REG(ed, ed, x3); - WBACKO(x6); - UFLAG_IF { // calculate flags directly - CMPSw_U12(x3, 1); - B_MARK(cNE); - LSRxw(x2, ed, rex.w?62:30); // x2 = d>>30 - EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1)) - BFIw(xFlags, x2, F_OF, 1); - MARK; - LSRxw(x2, ed, rex.w?63:31); - BFIw(xFlags, x2, F_CF, 1); - UFLAG_DF(x2, d_none); - } - break; - case 2: - INST_NAME("RCL Ed, CL"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - if(rex.w) { - ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETEDO(x6, 0); - if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x6); - WBACK; - break; - case 3: - INST_NAME("RCR Ed, CL"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - if(rex.w) { - ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETEDO(x6, 0); - if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x6); - WBACK; - break; - case 4: - case 6: - INST_NAME("SHL Ed, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETEDO(x6, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - emit_shl32(dyn, ninst, rex, ed, x3, x5, x4); - WBACKO(x6); - break; - case 5: - INST_NAME("SHR Ed, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETEDO(x6, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); - WBACKO(x6); - break; - case 7: - INST_NAME("SAR Ed, CL"); - SETFLAGS(X_ALL, SF_PENDING); - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f - } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f - } - GETEDO(x6, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - UFLAG_OP12(ed, x3); - ASRxw_REG(ed, ed, x3); - WBACKO(x6); - UFLAG_RES(ed); - UFLAG_DF(x3, rex.w?d_sar64:d_sar32); - break; - } - break; - - case 0xF7: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - case 1: - INST_NAME("TEST Ed, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, 4); - i64 = F32S; - MOV64xw(x2, i64); - emit_test32(dyn, ninst, rex, ed, x2, x3, x4); - break; - case 2: - INST_NAME("NOT Ed"); - GETEDO(x6, 4); - MVNxw_REG(ed, ed); - WBACKO(x6); - break; - case 3: - INST_NAME("NEG Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEDO(x6, 0); - emit_neg32(dyn, ninst, rex, ed, x3, x4); - WBACKO(x6); - break; - case 4: - INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_mul64:d_mul32); - GETEDO(x6, 0); - if(rex.w) { - if(ed==xRDX) gd=x3; else gd=xRDX; - UMULH(gd, xRAX, ed); - MULx(xRAX, xRAX, ed); - if(gd!=xRDX) {MOVx_REG(xRDX, gd);} - } else { - UMULL(xRDX, xRAX, ed); //64 <- 32x32 - MOVw_REG(xRAX, xRDX); - LSRx(xRDX, xRDX, 32); - } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - break; - case 5: - INST_NAME("IMUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_imul64:d_imul32); - GETEDO(x6, 0); - if(rex.w) { - if(ed==xRDX) gd=x3; else gd=xRDX; - SMULH(gd, xRAX, ed); - MULx(xRAX, xRAX, ed); - if(gd!=xRDX) {MOVx_REG(xRDX, gd);} - } else { - SMULL(xRDX, xRAX, ed); //64 <- 32x32 - MOVw_REG(xRAX, xRDX); - LSRx(xRDX, xRDX, 32); - } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - break; - case 6: - INST_NAME("DIV Ed"); - SETFLAGS(X_ALL, SF_SET); - if(!rex.w) { - SET_DFNONE(x2); - GETEDO(x6, 0); - MOVw_REG(x3, xRAX); - ORRx_REG_LSL(x3, x3, xRDX, 32); - if(MODREG) { - MOVw_REG(x4, ed); - ed = x4; - } - UDIVx(x2, x3, ed); - MSUBx(x4, x2, ed, xRAX); - MOVw_REG(xRAX, x2); - MOVw_REG(xRDX, x4); - } else { - if(ninst - && dyn->insts[ninst-1].x64.addr - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) { - SET_DFNONE(x2); - GETEDO(x6, 0); - UDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - } else { - GETEDO(x6, 0); - CBZxw_MARK(xRDX); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(div64, -1); - B_NEXT_nocond; - MARK; - UDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - SET_DFNONE(x2); - } - } - break; - case 7: - INST_NAME("IDIV Ed"); - SETFLAGS(X_ALL, SF_SET); - if(!rex.w) { - SET_DFNONE(x2) - GETSEDOw(x6, 0); - MOVw_REG(x3, xRAX); - ORRx_REG_LSL(x3, x3, xRDX, 32); - SDIVx(x2, x3, wb); - MSUBx(x4, x2, wb, x3); - MOVw_REG(xRAX, x2); - MOVw_REG(xRDX, x4); - } else { - if(ninst && dyn->insts - && dyn->insts[ninst-1].x64.addr - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48 - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) { - SET_DFNONE(x2) - GETEDO(x6, 0); - SDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - } else { - GETEDO(x6, 0); - CBZxw_MARK(xRDX); - MVNx_REG(x2, xRDX); - CBZxw_MARK(x2); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL((void*)idiv64, -1); - B_NEXT_nocond; - MARK; - SDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - SET_DFNONE(x2) - } - } - break; - } - break; - - case 0xFF: - nextop = F8; - grab_segdata(dyn, addr, ninst, x6, seg); - switch((nextop>>3)&7) { - case 0: // INC Ed - INST_NAME("INC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - GETEDO(x6, 0); - emit_inc32(dyn, ninst, rex, ed, x3, x4); - WBACKO(x6); - break; - case 1: //DEC Ed - INST_NAME("DEC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - GETEDO(x6, 0); - emit_dec32(dyn, ninst, rex, ed, x3, x4); - WBACKO(x6); - break; - case 2: // CALL Ed - INST_NAME("CALL Ed"); - PASS2IF(((ninst && dyn->insts[ninst-1].x64.set_flags) - || ((ninst>1) && dyn->insts[ninst-2].x64.set_flags)), 1) - { - READFLAGS(X_PEND); // that's suspicious - } else { - SETFLAGS(X_ALL, SF_SET); //Hack to put flag in "don't care" state - } - GETEDOx(x6, 0); - BARRIER(1); - BARRIER_NEXT(1); - if(!dyn->insts || ninst==dyn->size-1) { - *need_epilog = 0; - *ok = 0; - } - GETIP(addr); - PUSH1(xRIP); - jump_to_next(dyn, 0, ed, ninst); - break; - case 4: // JMP Ed - INST_NAME("JMP Ed"); - BARRIER(1); - GETEDOx(x6, 0); - jump_to_next(dyn, 0, ed, ninst); - *need_epilog = 0; - *ok = 0; - break; - case 6: // Push Ed - INST_NAME("PUSH Ed"); - GETEDOx(x6, 0); - PUSH1(ed); - break; - - default: - DEFAULT; - } - break; - - default: - DEFAULT; - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_66.c b/src/dynarec/dynarec_arm64_66.c deleted file mode 100755 index 1715d845..00000000 --- a/src/dynarec/dynarec_arm64_66.c +++ /dev/null @@ -1,871 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - - -uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - uint8_t opcode = F8; - uint8_t nextop, u8; - int16_t i16; - uint16_t u16; - uint64_t u64; - int32_t i32; - int64_t j64; - uint8_t gd, ed; - uint8_t wback, wb1; - int64_t fixedaddress; - MAYUSE(u8); - MAYUSE(u16); - MAYUSE(u64); - MAYUSE(j64); - - while((opcode==0x2E) || (opcode==0x66)) // ignoring CS: or multiple 0x66 - opcode = F8; - - while((opcode==0xF2) || (opcode==0xF3)) { - rep = opcode-0xF1; - opcode = F8; - } - // REX prefix before the F0 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } - - if(rex.w && opcode!=0x0f) // rex.w cancels "66", but not for 66 0f type of prefix - return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode - - switch(opcode) { - case 0x01: - INST_NAME("ADD Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x2); - GETEW(x1, 0); - emit_add16(dyn, ninst, x1, x2, x4, x5); - EWBACK; - break; - case 0x03: - INST_NAME("ADD Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x1); - GETEW(x2, 0); - emit_add16(dyn, ninst, x1, x2, x3, x4); - GWBACK; - break; - case 0x05: - INST_NAME("ADD AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i32 = F16; - UXTHw(x1, xRAX); - MOV32w(x2, i32); - emit_add16(dyn, ninst, x1, x2, x3, x4); - BFIx(xRAX, x1, 0, 16); - break; - - case 0x09: - INST_NAME("OR Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x2); - GETEW(x1, 0); - emit_or16(dyn, ninst, x1, x2, x4, x2); - EWBACK; - break; - case 0x0B: - INST_NAME("OR Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x1); - GETEW(x2, 0); - emit_or16(dyn, ninst, x1, x2, x4, x3); - GWBACK; - break; - case 0x0D: - INST_NAME("OR AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i32 = F16; - UXTHw(x1, xRAX); - MOV32w(x2, i32); - emit_or16(dyn, ninst, x1, x2, x3, x4); - BFIx(xRAX, x1, 0, 16); - break; - - case 0x0F: - addr = dynarec64_660F(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - case 0x11: - INST_NAME("ADC Ew, Gw"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x2); - GETEW(x1, 0); - emit_adc16(dyn, ninst, x1, x2, x4, x5); - EWBACK; - break; - case 0x13: - INST_NAME("ADC Gw, Ew"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x1); - GETEW(x2, 0); - emit_adc16(dyn, ninst, x1, x2, x4, x3); - GWBACK; - break; - case 0x15: - INST_NAME("ADC AX, Iw"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - i32 = F16; - UXTHw(x1, xRAX); - MOV32w(x2, i32); - emit_adc16(dyn, ninst, x1, x2, x3, x4); - BFIx(xRAX, x1, 0, 16); - break; - - case 0x19: - INST_NAME("SBB Ew, Gw"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x2); - GETEW(x1, 0); - emit_sbb16(dyn, ninst, x1, x2, x4, x5); - EWBACK; - break; - case 0x1B: - INST_NAME("SBB Gw, Ew"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x1); - GETEW(x2, 0); - emit_sbb16(dyn, ninst, x1, x2, x4, x3); - GWBACK; - break; - case 0x1D: - INST_NAME("SBB AX, Iw"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - i16 = F16S; - UXTHw(x1, xRAX); - MOVZw(x2, i16); - emit_sbb16(dyn, ninst, x1, x2, x3, x4); - BFIx(xRAX, x1, 0, 16); - break; - - case 0x21: - INST_NAME("AND Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x2); - GETEW(x1, 0); - emit_and16(dyn, ninst, x1, x2, x4, x5); - EWBACK; - break; - case 0x23: - INST_NAME("AND Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x1); - GETEW(x2, 0); - emit_and16(dyn, ninst, x1, x2, x3, x4); - GWBACK; - break; - case 0x25: - INST_NAME("AND AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i32 = F16; - UXTHw(x1, xRAX); - MOV32w(x2, i32); - emit_and16(dyn, ninst, x1, x2, x3, x4); - BFIx(xRAX, x1, 0, 16); - break; - - case 0x29: - INST_NAME("SUB Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x2); - GETEW(x1, 0); - emit_sub16(dyn, ninst, x1, x2, x4, x5); - EWBACK; - break; - case 0x2B: - INST_NAME("SUB Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x1); - GETEW(x2, 0); - emit_sub16(dyn, ninst, x1, x2, x3, x4); - GWBACK; - break; - case 0x2D: - INST_NAME("SUB AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i32 = F16; - UXTHw(x1, xRAX); - MOV32w(x2, i32); - emit_sub16(dyn, ninst, x1, x2, x3, x4); - BFIx(xRAX, x1, 0, 16); - break; - - case 0x31: - INST_NAME("XOR Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x2); - GETEW(x1, 0); - emit_xor16(dyn, ninst, x1, x2, x4, x5); - EWBACK; - break; - case 0x33: - INST_NAME("XOR Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x1); - GETEW(x2, 0); - emit_xor16(dyn, ninst, x1, x2, x3, x4); - GWBACK; - break; - case 0x35: - INST_NAME("XOR AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i32 = F16; - UXTHw(x1, xRAX); - MOV32w(x2, i32); - emit_xor16(dyn, ninst, x1, x2, x3, x4); - BFIx(xRAX, x1, 0, 16); - break; - - case 0x39: - INST_NAME("CMP Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x2); - GETEW(x1, 0); - emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5); - break; - case 0x3B: - INST_NAME("CMP Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x1); - GETEW(x2, 0); - emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5); - break; - case 0x3D: - INST_NAME("CMP AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - i32 = F16; - UXTHw(x1, xRAX); - if(i32) { - MOV32w(x2, i32); - emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5); - } else { - emit_cmp16_0(dyn, ninst, x1, x3, x4); - } - break; - - case 0x64: - addr = dynarec64_6664(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - - case 0x66: - addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); - break; - - case 0x69: - INST_NAME("IMUL Gw,Ew,Iw"); - SETFLAGS(X_ALL, SF_PENDING); - nextop = F8; - UFLAG_DF(x1, d_imul16); - GETSEW(x1, 2); - i32 = F16S; - MOV32w(x2, i32); - MULw(x2, x2, x1); - UFLAG_RES(x2); - gd=x2; - GWBACK; - break; - - case 0x6B: - INST_NAME("IMUL Gw,Ew,Ib"); - SETFLAGS(X_ALL, SF_PENDING); - nextop = F8; - UFLAG_DF(x1, d_imul16); - GETSEW(x1, 1); - i32 = F8S; - MOV32w(x2, i32); - MULw(x2, x2, x1); - UFLAG_RES(x2); - gd=x2; - GWBACK; - break; - - case 0x81: - case 0x83: - nextop = F8; - switch((nextop>>3)&7) { - case 0: //ADD - if(opcode==0x81) { - INST_NAME("ADD Ew, Iw"); - } else { - INST_NAME("ADD Ew, Ib"); - } - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, (opcode==0x81)?2:1); - if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_add16(dyn, ninst, ed, x5, x2, x4); - EWBACK; - break; - case 1: //OR - if(opcode==0x81) {INST_NAME("OR Ew, Iw");} else {INST_NAME("OR Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, (opcode==0x81)?2:1); - if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_or16(dyn, ninst, x1, x5, x2, x4); - EWBACK; - break; - case 2: //ADC - if(opcode==0x81) {INST_NAME("ADC Ew, Iw");} else {INST_NAME("ADC Ew, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, (opcode==0x81)?2:1); - if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_adc16(dyn, ninst, x1, x5, x2, x4); - EWBACK; - break; - case 3: //SBB - if(opcode==0x81) {INST_NAME("SBB Ew, Iw");} else {INST_NAME("SBB Ew, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, (opcode==0x81)?2:1); - if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_sbb16(dyn, ninst, x1, x5, x2, x4); - EWBACK; - break; - case 4: //AND - if(opcode==0x81) {INST_NAME("AND Ew, Iw");} else {INST_NAME("AND Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, (opcode==0x81)?2:1); - if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_and16(dyn, ninst, x1, x5, x2, x4); - EWBACK; - break; - case 5: //SUB - if(opcode==0x81) {INST_NAME("SUB Ew, Iw");} else {INST_NAME("SUB Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, (opcode==0x81)?2:1); - if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_sub16(dyn, ninst, x1, x5, x2, x4); - EWBACK; - break; - case 6: //XOR - if(opcode==0x81) {INST_NAME("XOR Ew, Iw");} else {INST_NAME("XOR Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, (opcode==0x81)?2:1); - if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_xor16(dyn, ninst, x1, x5, x2, x4); - EWBACK; - break; - case 7: //CMP - if(opcode==0x81) {INST_NAME("CMP Ew, Iw");} else {INST_NAME("CMP Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, (opcode==0x81)?2:1); - if(opcode==0x81) i16 = F16S; else i16 = F8S; - if(i16) { - MOVZw(x2, i16); - emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5); - } else - emit_cmp16_0(dyn, ninst, x1, x3, x4); - break; - } - break; - - case 0x85: - INST_NAME("TEST Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETEW(x1, 0); - GETGW(x2); - emit_test16(dyn, ninst, x1, x2, x3, x4, x5); - break; - - case 0x89: - INST_NAME("MOV Ew, Gw"); - nextop = F8; - GETGD; // don't need GETGW here - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - if(ed!=gd) { - BFIx(ed, gd, 0, 16); - } - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - STRH_U12(gd, ed, fixedaddress); - } - break; - case 0x8B: - INST_NAME("MOV Gw, Ew"); - nextop = F8; - GETGD; // don't need GETGW neither - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - if(ed!=gd) { - BFIx(gd, ed, 0, 16); - } - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - LDRH_U12(x1, ed, fixedaddress); - BFIx(gd, x1, 0, 16); - } - break; - - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - gd = xRAX+(opcode&0x07)+(rex.b<<3); - if(gd==xRAX) { - INST_NAME("NOP"); - } else { - INST_NAME("XCHG AX, Reg"); - MOVw_REG(x2, xRAX); - BFIx(xRAX, gd, 0, 16); - BFIx(gd, x2, 0, 16); - } - break; - - case 0x98: - INST_NAME("CBW"); - SXTBw(x1, xRAX); - BFIw(xRAX, x1, 0, 16); - break; - - case 0xA1: - INST_NAME("MOV EAX,Od"); - u64 = F64; - MOV64x(x1, u64); - LDRH_U12(x2, x1, 0); - BFIx(xRAX, x2, 0, 16); - break; - - case 0xA3: - INST_NAME("MOV Od,EAX"); - u64 = F64; - MOV64x(x1, u64); - STRH_U12(xRAX, x1, 0); - break; - - case 0xA5: - if(rep) { - INST_NAME("REP MOVSW"); - CBZx_NEXT(xRCX); - TBNZ_MARK2(xFlags, F_DF); - MARK; // Part with DF==0 - LDRH_S9_postindex(x1, xRSI, 2); - STRH_S9_postindex(x1, xRDI, 2); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK(xRCX); - B_NEXT_nocond; - MARK2; // Part with DF==1 - LDRH_S9_postindex(x1, xRSI, -2); - STRH_S9_postindex(x1, xRDI, -2); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK2(xRCX); - // done - } else { - INST_NAME("MOVSW"); - GETDIR(x3, 2); - LDRH_U12(x1, xRSI, 0); - STRH_U12(x1, xRDI, 0); - ADDx_REG(xRSI, xRSI, x3); - ADDx_REG(xRDI, xRDI, x3); - } - break; - - case 0xA9: - INST_NAME("TEST AX,Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - u16 = F16; - MOV32w(x2, u16); - UBFXx(x1, xRAX, 0, 16); - emit_test16(dyn, ninst, x1, x2, x3, x4, x5); - break; - - case 0xAB: - if(rep) { - INST_NAME("REP STOSW"); - CBZx_NEXT(xRCX); - TBNZ_MARK2(xFlags, F_DF); - MARK; // Part with DF==0 - STRH_S9_postindex(xRAX, xRDI, 2); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK(xRCX); - B_NEXT_nocond; - MARK2; // Part with DF==1 - STRH_S9_postindex(xRAX, xRDI, -2); - SUBx_U12(xRCX, xRCX, 1); - CBNZx_MARK2(xRCX); - // done - } else { - INST_NAME("STOSW"); - GETDIR(x3, 2); - STRH_U12(xRAX, xRDI, 0); - ADDx_REG(xRDI, xRDI, x3); - } - break; - - case 0xB8: - case 0xB9: - case 0xBA: - case 0xBB: - case 0xBC: - case 0xBD: - case 0xBE: - case 0xBF: - INST_NAME("MOV Reg16, Iw"); - u16 = F16; - MOV32w(x1, u16); - gd = xRAX+(opcode&7)+(rex.b<<3); - BFIx(gd, x1, 0, 16); - break; - - case 0xC1: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - INST_NAME("ROL Ew, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEW(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rol16, x1, x3); - EWBACK; - break; - case 1: - INST_NAME("ROR Ew, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEW(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(ror16, x1, x3); - EWBACK; - break; - case 2: - INST_NAME("RCL Ew, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEW(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rcl16, x1, x3); - EWBACK; - break; - case 3: - INST_NAME("RCR Ew, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEW(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rcr16, x1, x3); - EWBACK; - break; - case 4: - case 6: - INST_NAME("SHL Ew, Ib"); - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); - GETEW(x1, 1); - u8 = F8; - MOV32w(x2, (u8&0x1f)); - UFLAG_OP12(ed, x2) - LSLw_IMM(ed, ed, u8&0x1f); - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shl16); - break; - case 5: - INST_NAME("SHR Ed, Ib"); - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); - GETEW(x1, 1); - u8 = F8; - MOV32w(x2, (u8&0x1f)); - UFLAG_OP12(ed, x2) - LSRw_IMM(ed, ed, u8&0x1f); - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shr16); - break; - case 7: - INST_NAME("SAR Ed, Ib"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - GETSEW(x1, 0); - u8 = F8; - MOV32w(x2, (u8&0x1f)); - UFLAG_OP12(ed, x2) - ASRw_REG(ed, ed, x2); - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_sar16); - break; - } - break; - - case 0xC7: - INST_NAME("MOV Ew, Iw"); - nextop = F8; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - u16 = F16; - MOV32w(x1, u16); - BFIx(ed, x1, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 2); - u16 = F16; - MOV32w(x1, u16); - STRH_U12(x1, ed, fixedaddress); - } - break; - - case 0xD1: - case 0xD3: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - if(opcode==0xD1) { - INST_NAME("ROL Ew, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("ROL Ew, CL"); - ANDSw_mask(x2, xRCX, 0, 0b00100); - } - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEW(x1, 0); - CALL_(rol16, x1, x3); - EWBACK; - break; - case 1: - if(opcode==0xD1) { - INST_NAME("ROR Ew, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("ROR Ew, CL"); - ANDSw_mask(x2, xRCX, 0, 0b00100); - } - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEW(x1, 0); - CALL_(ror16, x1, x3); - EWBACK; - break; - case 2: - if(opcode==0xD1) {INST_NAME("RCL Ew, 1"); } else { INST_NAME("RCL Ew, CL");} - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - if(opcode==0xD1) {MOV32w(x2, 1);} else {ANDSw_mask(x2, xRCX, 0, 0b00100);} - GETEW(x1, 0); - CALL_(rcl16, x1, x3); - EWBACK; - break; - case 3: - if(opcode==0xD1) {INST_NAME("RCR Ew, 1");} else {INST_NAME("RCR Ew, CL");} - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - if(opcode==0xD1) {MOV32w(x2, 1);} else {ANDSw_mask(x2, xRCX, 0, 0b00100);} - GETEW(x1, 0); - CALL_(rcr16, x1, x3); - EWBACK; - break; - case 4: - case 6: - if(opcode==0xD1) { - INST_NAME("SHL Ew, 1"); - MOV32w(x4, 1); - } else { - INST_NAME("SHL Ew, CL"); - ANDSw_mask(x4, xRCX, 0, 0b00100); - } - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); - GETEW(x1, 0); - UFLAG_OP12(ed, x4) - LSLw_REG(ed, ed, x4); - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shl16); - break; - case 5: - if(opcode==0xD1) { - INST_NAME("SHR Ew, 1"); - MOV32w(x4, 1); - } else { - INST_NAME("SHR Ew, CL"); - ANDSw_mask(x4, xRCX, 0, 0b00100); - } - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); - GETEW(x1, 0); - UFLAG_OP12(ed, x4) - LSRw_REG(ed, ed, x4); - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shr16); - break; - case 7: - if(opcode==0xD1) { - INST_NAME("SAR Ew, 1"); - MOV32w(x4, 1); - } else { - INST_NAME("SAR Ew, CL"); - ANDSw_mask(x4, xRCX, 0, 0b00100); - } - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); - GETSEW(x1, 0); - UFLAG_OP12(ed, x4) - ASRw_REG(ed, ed, x4); - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_sar16); - break; - } - break; - - case 0xF7: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - case 1: - INST_NAME("TEST Ew, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, 2); - u16 = F16; - MOV32w(x2, u16); - emit_test16(dyn, ninst, x1, x2, x3, x4, x5); - break; - case 2: - INST_NAME("NOT Ew"); - GETEW(x1, 0); - MVNw_REG(ed, ed); - EWBACK; - break; - case 3: - INST_NAME("NEG Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x1, 0); - emit_neg16(dyn, ninst, ed, x2, x4); - EWBACK; - break; - case 4: - INST_NAME("MUL AX, Ew"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x1, d_mul16); - GETEW(x1, 0); - UXTHw(x2, xRAX); - MULw(x1, x2, x1); - UFLAG_RES(x1); - BFIx(xRAX, x1, 0, 16); - BFXILx(xRDX, x1, 16, 16); - break; - case 5: - INST_NAME("IMUL AX, Ew"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x1, d_imul16); - GETSEW(x1, 0); - SXTHw(x2, xRAX); - MULw(x1, x2, x1); - UFLAG_RES(x1); - BFIx(xRAX, x1, 0, 16); - BFXILx(xRDX, x1, 16, 16); - break; - case 6: - INST_NAME("DIV Ew"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET); - GETEW(x1, 0); - CALL(div16, -1); - break; - case 7: - INST_NAME("IDIV Ew"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET); - GETEW(x1, 0); - CALL(idiv16, -1); - break; - } - break; - - case 0xFF: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - INST_NAME("INC Ew"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - GETEW(x1, 0); - emit_inc16(dyn, ninst, x1, x2, x4); - EWBACK; - break; - case 1: - INST_NAME("DEC Ew"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - GETEW(x1, 0); - emit_dec16(dyn, ninst, x1, x2, x4); - EWBACK; - break; - default: - DEFAULT; - } - break; - default: - DEFAULT; - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c deleted file mode 100755 index fd0fd355..00000000 --- a/src/dynarec/dynarec_arm64_660f.c +++ /dev/null @@ -1,1946 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -// Get EX as a quad -#define GETEX(a, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, D); \ - a = fpu_get_scratch(dyn); \ - VLDR128_U12(a, ed, fixedaddress); \ - } - -#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) - -#define GETGX(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg(dyn, ninst, x1, gd) - -#define GETGX_empty(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg_empty(dyn, ninst, x1, gd) - -uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t opcode = F8; - uint8_t nextop, u8; - int32_t i32; - uint8_t gd, ed; - uint8_t wback, wb1; - uint8_t eb1, eb2; - int64_t j64; - uint64_t tmp64u, tmp64u2; - int v0, v1; - int q0, q1; - int d0, d1; - int64_t fixedaddress; - - MAYUSE(d0); - MAYUSE(d1); - MAYUSE(q0); - MAYUSE(q1); - MAYUSE(eb1); - MAYUSE(eb2); - MAYUSE(j64); - #if STEP > 1 - static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 }; - #endif - - switch(opcode) { - - case 0x10: - INST_NAME("MOVUPD Gx,Ex"); - nextop = F8; - GETG; - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - VMOVQ(v0, v1); - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); - } - break; - case 0x11: - INST_NAME("MOVUPD Ex,Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVQ(v1, v0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); - } - break; - case 0x12: - INST_NAME("MOVLPD Gx, Eq"); - nextop = F8; - GETGX(v0); - if(MODREG) { - // access register instead of memory is bad opcode! - DEFAULT; - return addr; - } - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VLD1_64(v0, 0, ed); - break; - case 0x13: - INST_NAME("MOVLPD Eq, Gx"); - nextop = F8; - GETGX(v0); - if(MODREG) { - // access register instead of memory is bad opcode! - DEFAULT; - return addr; - } - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VST1_64(v0, 0, ed); - break; - case 0x14: - INST_NAME("UNPCKLPD Gx, Ex"); - nextop = F8; - GETGX(v0); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVeD(v0, 1, v1, 0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VLD1_64(v0, 1, ed); - } - break; - case 0x15: - INST_NAME("UNPCKHPD Gx, Ex"); - nextop = F8; - GETGX(v0); - VMOVeD(v0, 0, v0, 1); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVeD(v0, 1, v1, 1); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - v1 = fpu_get_scratch(dyn); - ADDx_U12(ed, ed, 8); - VLD1_64(v0, 1, ed); - } - break; - case 0x16: - INST_NAME("MOVHPD Gx, Eq"); - nextop = F8; - GETGX(v0); - if(MODREG) { - // access register instead of memory is bad opcode! - DEFAULT; - return addr; - } - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VLD1_64(v0, 1, ed); - break; - case 0x17: - INST_NAME("MOVHPD Eq, Gx"); - nextop = F8; - GETGX(v0); - if(MODREG) { - // access register instead of memory is bad opcode! - DEFAULT; - return addr; - } - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VST1_64(v0, 1, ed); - break; - - case 0x1F: - INST_NAME("NOP (multibyte)"); - nextop = F8; - FAKEED; - break; - - case 0x28: - INST_NAME("MOVAPD Gx,Ex"); - nextop = F8; - GETG; - if(MODREG) { - ed = (nextop&7)+(rex.b<<3); - v1 = sse_get_reg(dyn, ninst, x1, ed); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - VMOVQ(v0, v1); - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); - } - break; - case 0x29: - INST_NAME("MOVAPD Ex,Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - ed = (nextop&7)+(rex.b<<3); - v1 = sse_get_reg_empty(dyn, ninst, x1, ed); - VMOVQ(v1, v0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); - } - break; - - case 0x2E: - // no special check... - case 0x2F: - if(opcode==0x2F) {INST_NAME("COMISD Gx, Ex");} else {INST_NAME("UCOMISD Gx, Ex");} - SETFLAGS(X_ALL, SF_SET); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - FCMPD(v0, q0); - FCOMI(x1, x2); - break; - - case 0x38: // SSSE3 opcodes - nextop = F8; - switch(nextop) { - case 0x00: - INST_NAME("PSHUFB Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - d0 = fpu_get_scratch(dyn); - MOVIQ_8(d0, 0b10001111); - VANDQ(d0, d0, q1); // mask the index - VTBLQ1_8(q0, q0, d0); - break; - case 0x01: - INST_NAME("PHADDW Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - VADDPQ_16(q0, q0, q1); - break; - case 0x02: - INST_NAME("PHADDD Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - VADDPQ_32(q0, q0, q1); - break; - - case 0x04: - INST_NAME("PMADDUBSW Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v0 = fpu_get_scratch(dyn); - v1 = fpu_get_scratch(dyn); - UXTL_8(v0, q0); // this is unsigned, so 0 extended - SXTL_8(v1, q1); // this is signed - VMULQ_16(v0, v0, v1); - SADDLPQ_16(v1, v0); - UXTL2_8(v0, q0); // this is unsigned - SQXTN_16(q0, v1); // SQXTN reset the vector so need to grab the high part first - SXTL2_8(v1, q1); // this is signed - VMULQ_16(v0, v0, v1); - SADDLPQ_16(v0, v0); - SQXTN2_16(q0, v0); - break; - - case 0x08: - INST_NAME("PSIGNB Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v1 = fpu_get_scratch(dyn); - v0 = fpu_get_scratch(dyn); - NEGQ_8(v0, q0); // get NEG - CMLTQ_0_8(v1, q1); // calculate mask - VBICQ(q0, q0, v1); // apply not mask on dest - VANDQ(v0, v0, v1); // apply mask on src - VORRQ(q0, q0, v0); // merge - CMEQQ_0_8(v1, q1); // handle case where Ex is 0 - VBICQ(q0, q0, v1); - break; - case 0x09: - INST_NAME("PSIGNW Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v1 = fpu_get_scratch(dyn); - v0 = fpu_get_scratch(dyn); - NEGQ_16(v0, q0); // get NEG - CMLTQ_0_16(v1, q1); // calculate mask - VBICQ(q0, q0, v1); // apply not mask on dest - VANDQ(v0, v0, v1); // apply mask on src - VORRQ(q0, q0, v0); // merge - CMEQQ_0_16(v1, q1); // handle case where Ex is 0 - VBICQ(q0, q0, v1); - break; - case 0x0A: - INST_NAME("PSIGND Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v1 = fpu_get_scratch(dyn); - v0 = fpu_get_scratch(dyn); - NEGQ_32(v0, q0); // get NEG - CMLTQ_0_32(v1, q1); // calculate mask - VBICQ(q0, q0, v1); // apply not mask on dest - VANDQ(v0, v0, v1); // apply mask on src - VORRQ(q0, q0, v0); // merge - CMEQQ_0_32(v1, q1); // handle case where Ex is 0 - VBICQ(q0, q0, v1); - break; - case 0x0B: - INST_NAME("PMULHRSW Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - SQRDMULHQ_16(q0, q0, q1); - break; - - case 0x1C: - INST_NAME("PABSB Gx,Ex"); - nextop = F8; - GETEX(q1, 0); - GETG; - q0 = sse_get_reg_empty(dyn, ninst, x1, gd); - ABSQ_8(q0, q1); - break; - case 0x1D: - INST_NAME("PABSW Gx,Ex"); - nextop = F8; - GETEX(q1, 0); - GETG; - q0 = sse_get_reg_empty(dyn, ninst, x1, gd); - ABSQ_16(q0, q1); - break; - case 0x1E: - INST_NAME("PABSD Gx,Ex"); - nextop = F8; - GETEX(q1, 0); - GETG; - q0 = sse_get_reg_empty(dyn, ninst, x1, gd); - ABSQ_32(q0, q1); - break; - - case 0x20: - INST_NAME("PMOVSXBW Gx, Ex"); // SSE4 opcode! - nextop = F8; - GETEX(q1, 0); - GETGX_empty(q0); - SXTL_8(q0, q1); // 8bits->16bits - break; - case 0x21: - INST_NAME("PMOVSXBD Gx, Ex"); // SSE4 opcode! - nextop = F8; - GETEX(q1, 0); - GETGX_empty(q0); - SXTL_8(q0, q1); // 8bits->16bits - SXTL_16(q0, q0); //16bits->32bits - break; - case 0x22: - INST_NAME("PMOVSXBQ Gx, Ex"); // SSE4 opcode! - nextop = F8; - GETEX(q1, 0); - GETGX_empty(q0); - SXTL_8(q0, q1); // 8bits->16bits - SXTL_16(q0, q0); //16bits->32bits - SXTL_32(q0, q0); //32bits->64bits - break; - case 0x23: - INST_NAME("PMOVSXWD Gx, Ex"); // SSE4 opcode! - nextop = F8; - GETEX(q1, 0); - GETGX_empty(q0); - SXTL_16(q0, q1); // 16bits->32bits - break; - case 0x24: - INST_NAME("PMOVSXWQ Gx, Ex"); // SSE4 opcode! - nextop = F8; - GETEX(q1, 0); - GETGX_empty(q0); - SXTL_16(q0, q1); // 16bits->32bits - SXTL_32(q0, q1); // 32bits->64bits - break; - case 0x25: - INST_NAME("PMOVSXDQ Gx, Ex"); // SSE4 opcode! - nextop = F8; - GETEX(q1, 0); - GETGX_empty(q0); - SXTL_32(q0, q1); // 32bits->64bits - break; - - case 0x39: - INST_NAME("PMINSD Gx, Ex"); // SSE4 opcode! - nextop = F8; - GETEX(q1, 0); - GETGX(q0); - SMINQ_32(q0, q0, q1); - break; - - case 0x3D: - INST_NAME("PMINSD Gx, Ex"); // SSE4 opcode! - nextop = F8; - GETEX(q1, 0); - GETGX(q0); - SMAXQ_32(q0, q0, q1); - break; - - case 0xDB: - INST_NAME("AESIMC Gx, Ex"); // AES-NI - nextop = F8; - if(arm64_aes) { - GETEX(q1, 0); - GETGX_empty(q0); - AESIMC(q0, q1); - } else { - GETEX(q1, 0); - GETGX_empty(q0); - if(q0!=q1) { - VMOVQ(q0, q1); - } - sse_forget_reg(dyn, ninst, gd); - MOV32w(x1, gd); - CALL(arm_aesimc, -1); - } - break; - case 0xDC: - INST_NAME("AESENC Gx, Ex"); // AES-NI - nextop = F8; - if(arm64_aes) { - GETEX(q1, 0); - GETGX(q0); - v0 = fpu_get_scratch(dyn); // ARM64 internal operation differs a bit from x86_64 - VEORQ(v0, q0, q1); - AESE(v0, q1); - AESMC(v0, v0); - VEORQ(q0, v0, q1); - } else { - GETG; - sse_forget_reg(dyn, ninst, gd); - MOV32w(x1, gd); - CALL(arm_aese, -1); - GETGX(q0); - GETEX(q1, 0); - VEORQ(q0, q0, q1); - } - break; - case 0xDD: - INST_NAME("AESENCLAST Gx, Ex"); // AES-NI - nextop = F8; - if(arm64_aes) { - GETEX(q1, 0); - GETGX(q0); - v0 = fpu_get_scratch(dyn); // ARM64 internal operation differs a bit from x86_64 - VEORQ(v0, q0, q1); - AESE(v0, q1); - VEORQ(q0, v0, q1); - } else { - GETG; - sse_forget_reg(dyn, ninst, gd); - MOV32w(x1, gd); - CALL(arm_aeselast, -1); - GETGX(q0); - GETEX(q1, 0); - VEORQ(q0, q0, q1); - } - break; - case 0xDE: - INST_NAME("AESDEC Gx, Ex"); // AES-NI - nextop = F8; - if(arm64_aes) { - GETEX(q1, 0); - GETGX(q0); - v0 = fpu_get_scratch(dyn); // ARM64 internal operation differs a bit from x86_64 - VEORQ(v0, q0, q1); - AESD(v0, q1); - AESIMC(v0, v0); - VEORQ(q0, v0, q1); - } else { - GETG; - sse_forget_reg(dyn, ninst, gd); - MOV32w(x1, gd); - CALL(arm_aesd, -1); - GETGX(q0); - GETEX(q1, 0); - VEORQ(q0, q0, q1); - } - break; - case 0xDF: - INST_NAME("AESDECLAST Gx, Ex"); // AES-NI - nextop = F8; - if(arm64_aes) { - GETEX(q1, 0); - GETGX(q0); - v0 = fpu_get_scratch(dyn); // ARM64 internal operation differs a bit from x86_64 - VEORQ(v0, q0, q1); - AESD(v0, q1); - VEORQ(q0, v0, q1); - } else { - GETG; - sse_forget_reg(dyn, ninst, gd); - MOV32w(x1, gd); - CALL(arm_aesdlast, -1); - GETGX(q0); - GETEX(q1, 0); - VEORQ(q0, q0, q1); - } - break; - - default: - DEFAULT; - } - break; - - case 0x3A: // these are some more SSSE3 opcodes - opcode = F8; - switch(opcode) { - case 0x0B: - INST_NAME("ROUNDSD Gx, Ex, Ib"); - nextop = F8; - GETGX(q0); - GETEX(q1, 1); - u8 = F8; - v1 = fpu_get_scratch(dyn); - if(u8&4) { - u8 = sse_setround(dyn, ninst, x1, x2, x3); - FRINTXD(v1, q1); - x87_restoreround(dyn, ninst, u8); - } else { - const uint8_t rounds[] = {0, 2, 1, 3}; - MAYUSE(rounds); - FRINTRRD(v1, q1, rounds[u8&3]); - } - VMOVeD(q0, 0, v1, 0); - break; - - case 0x0F: - INST_NAME("PALIGNR Gx, Ex, Ib"); - nextop = F8; - GETGX(q0); - GETEX(q1, 1); - u8 = F8; - if(u8>31) { - VEORQ(q0, q0, q0); - } else if(u8>15) { - d0 = fpu_get_scratch(dyn); - VEORQ(d0, d0, d0); - VEXTQ_8(q0, q0, d0, u8-16); - } else { - VEXTQ_8(q0, q1, q0, u8); - } - break; - - case 0x22: - INST_NAME("PINSRD Gx, ED, Ib"); - nextop = F8; - GETGX(q0); - GETED(1); - u8 = F8; - if(rex.w) { - VMOVQDfrom(q0, (u8&1), ed); - } else { - VMOVQSfrom(q0, (u8&3), ed); - } - break; - - default: - DEFAULT; - } - break; - - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - GETFLAGS; \ - nextop=F8; \ - GETGD; \ - if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); \ - LDRH_U12(x1, ed, fixedaddress); \ - ed = x1; \ - } \ - Bcond(NO, +8); \ - BFIx(gd, ed, 0, 16); - - GOCOND(0x40, "CMOV", "Gw, Ew"); - #undef GO - - case 0x50: - nextop = F8; - INST_NAME("PMOVMSKD Gd, Ex"); - GETEX(q0, 0); - GETGD; - VMOVQDto(x1, q0, 1); - VMOVQDto(gd, q0, 0); - LSRx(x1, x1, 63); - LSRx(gd, gd, 63); - BFIx(gd, x1, 1, 1); - break; - - case 0x54: - INST_NAME("ANDPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VANDQ(v0, v0, q0); - break; - case 0x55: - INST_NAME("ANDNPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VBICQ(v0, q0, v0); - break; - case 0x56: - INST_NAME("ORPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VORRQ(v0, v0, q0); - break; - case 0x57: - INST_NAME("XORPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VEORQ(v0, v0, q0); - break; - case 0x58: - INST_NAME("ADDPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFADDQD(v0, v0, q0); - break; - case 0x59: - INST_NAME("MULPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFMULQD(v0, v0, q0); - break; - case 0x5A: - INST_NAME("CVTPD2PS Gx, Ex"); - nextop = F8; - GETEX(v1, 0); - GETGX_empty(v0); - FCVTXN(v0, v1); - break; - case 0x5B: - INST_NAME("CVTPS2DQ Gx, Ex"); - nextop = F8; - GETEX(v1, 0); - GETGX_empty(v0); - #ifdef PRECISE_CVT - LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr)); - UBFXx(x1, x1, 13, 2); // extract round requested - LSLx_REG(x1, x1, 3); - // Construct a "switch case", with each case 2 instructions, so 8 bytes - ADR(xLR, GETMARK); - ADDx_REG(xLR, xLR, x1); - B(xLR); - MARK; - VFCVTNSQS(v0, v1); // 0: Nearest (even) - B_NEXT_nocond; - VFCVTMSQS(v0, v1); // 1: Toward -inf - B_NEXT_nocond; - VFCVTPSQS(v0, v1); // 2: Toward +inf - B_NEXT_nocond; - VFCVTZSQS(v0, v1); // 3: Toward 0 - #else - VFCVTNSQS(v0, v1); - #endif - break; - case 0x5C: - INST_NAME("SUBPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFSUBQD(v0, v0, q0); - break; - case 0x5D: - INST_NAME("MINPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFMINQD(v0, v0, q0); - break; - case 0x5E: - INST_NAME("DIVPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFDIVQD(v0, v0, q0); - break; - case 0x5F: - INST_NAME("MAXPD Gx, Ex"); - nextop = F8; - GETEX(q0, 0); - GETGX(v0); - VFMAXQD(v0, v0, q0); - break; - case 0x60: - INST_NAME("PUNPCKLBW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VZIP1Q_8(v0, v0, q0); - break; - case 0x61: - INST_NAME("PUNPCKLWD Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VZIP1Q_16(v0, v0, q0); - break; - case 0x62: - INST_NAME("PUNPCKLDQ Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VZIP1Q_32(v0, v0, q0); - break; - case 0x63: - INST_NAME("PACKSSWB Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - SQXTN_8(q0, q0); - if(q0==q1) { - VMOVeD(q0, 1, q0, 0); - } else { - SQXTN2_8(q0, q1); - } - break; - case 0x64: - INST_NAME("PCMPGTB Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - VCMGTQ_8(v0, v0, v1); - break; - case 0x65: - INST_NAME("PCMPGTW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - VCMGTQ_16(v0, v0, v1); - break; - case 0x66: - INST_NAME("PCMPGTD Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - VCMGTQ_32(v0, v0, v1); - break; - case 0x67: - INST_NAME("PACKUSWB Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - SQXTUN_8(v0, v0); - if(v0==v1) { - VMOVeD(v0, 1, v0, 0); - } else { - SQXTUN2_8(v0, v1); - } - break; - case 0x68: - INST_NAME("PUNPCKHBW Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 1); - VZIP2Q_8(q0, q0, q1); - break; - case 0x69: - INST_NAME("PUNPCKHWD Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 1); - VZIP2Q_16(q0, q0, q1); - break; - case 0x6A: - INST_NAME("PUNPCKHDQ Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 1); - VZIP2Q_32(q0, q0, q1); - break; - case 0x6B: - INST_NAME("PACKSSDW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - SQXTN_16(v0, v0); - if(v0==v1) { - VMOVeD(v0, 1, v0, 0); - } else { - SQXTN2_16(v0, v1); - } - break; - case 0x6C: - INST_NAME("PUNPCKLQDQ Gx,Ex"); - nextop = F8; - GETGX(v0); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVeD(v0, 1, v1, 0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VLD1_64(v0, 1, ed); - } - break; - case 0x6D: - INST_NAME("PUNPCKHQDQ Gx,Ex"); - nextop = F8; - GETGX(v0); - VMOVeD(v0, 0, v0, 1); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVeD(v0, 1, v1, 1); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - ADDSx_U12(x1, ed, 8); - VLD1_64(v0, 1, x1); - } - break; - case 0x6E: - INST_NAME("MOVD Gx, Ed"); - nextop = F8; - GETG; - GETED(0); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - if(rex.w) { - FMOVDx(v0, ed); - } else { - VEORQ(v0, v0, v0); // RAZ vector - VMOVQSfrom(v0, 0, ed); - } - break; - case 0x6F: - INST_NAME("MOVDQA Gx,Ex"); - nextop = F8; - GETG; - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVQ(v0, v1); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); - } - break; - case 0x70: - INST_NAME("PSHUFD Gx,Ex,Ib"); - nextop = F8; - GETG; - i32 = -1; - if(MODREG) { - u8 = F8; - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - if(u8==0x4E) { - if(v0==v1) { - VEXTQ_8(v0, v0, v0, 8); // Swap Up/Lower 64bits parts - } else { - VMOVeD(v0, 0, v1, 1); - VMOVeD(v0, 1, v1, 0); - } - } else if(u8==0x00) { - // duplicate lower 32bits to all spot - if(v0!=v1) { - VMOVeS(v0, 0, v1, 0); - } - VMOVeS(v0, 1, v1, 0); - VMOVeD(v0, 1, v0, 0); - } else if(u8==0x55) { - // duplicate slot 1 to all spot - if(v0!=v1) { - VMOVeS(v0, 1, v1, 1); - } - VMOVeS(v0, 0, v1, 1); - VMOVeD(v0, 1, v0, 0); - } else if(u8==0xAA) { - // duplicate slot 2 to all spot - if(v0!=v1) { - VMOVeS(v0, 2, v1, 2); - } - VMOVeS(v0, 3, v1, 2); - VMOVeD(v0, 0, v0, 1); - } else if(u8==0xFF) { - // duplicate slot 3 to all spot - if(v0!=v1) { - VMOVeS(v0, 3, v1, 3); - } - VMOVeS(v0, 2, v1, 3); - VMOVeD(v0, 0, v0, 1); - } else if(v0!=v1) { - VMOVeS(v0, 0, v1, (u8>>(0*2))&3); - VMOVeS(v0, 1, v1, (u8>>(1*2))&3); - VMOVeS(v0, 2, v1, (u8>>(2*2))&3); - VMOVeS(v0, 3, v1, (u8>>(3*2))&3); - } else { - uint64_t swp[4] = { - (0)|(1<<8)|(2<<16)|(3<<24), - (4)|(5<<8)|(6<<16)|(7<<24), - (8)|(9<<8)|(10<<16)|(11<<24), - (12)|(13<<8)|(14<<16)|(15<<24) - }; - d0 = fpu_get_scratch(dyn); - tmp64u = swp[(u8>>(0*2))&3] | (swp[(u8>>(1*2))&3]<<32); - MOV64x(x2, tmp64u); - VMOVQDfrom(d0, 0, x2); - tmp64u2 = swp[(u8>>(2*2))&3] | (swp[(u8>>(3*2))&3]<<32); - if(tmp64u2==tmp64u) { - VMOVQDfrom(d0, 1, x2); - } else { - MOV64x(x3, tmp64u2); - VMOVQDfrom(d0, 1, x3); - } - VTBLQ1_8(v0, v1, d0); - } - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - if (u8) { - for (int i=0; i<4; ++i) { - int32_t idx = (u8>>(i*2))&3; - if(idx!=i32) { - ADDx_U12(x2, ed, idx*4); - i32 = idx; - } - VLD1_32(v0, i, x2); - } - } else { - VLDQ1R_32(v0, ed); - } - } - break; - case 0x71: - nextop = F8; - switch((nextop>>3)&7) { - case 2: - INST_NAME("PSRLW Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8) { - if (u8>15) { - VEORQ(q0, q0, q0); - } else if(u8) { - VSHRQ_16(q0, q0, u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - } - break; - case 4: - INST_NAME("PSRAW Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8>15) u8=15; - if(u8) { - VSSHRQ_16(q0, q0, u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - break; - case 6: - INST_NAME("PSLLW Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8) { - if (u8>15) { - VEORQ(q0, q0, q0); - } else { - VSHLQ_16(q0, q0, u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - } - break; - default: - *ok = 0; - DEFAULT; - } - break; - case 0x72: - nextop = F8; - switch((nextop>>3)&7) { - case 2: - INST_NAME("PSRLD Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8) { - if (u8>31) { - VEORQ(q0, q0, q0); - } else if(u8) { - VSHRQ_32(q0, q0, u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - } - break; - case 4: - INST_NAME("PSRAD Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8>31) u8=31; - if(u8) { - VSSHRQ_32(q0, q0, u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - break; - case 6: - INST_NAME("PSLLD Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8) { - if (u8>31) { - VEORQ(q0, q0, q0); - } else { - VSHLQ_32(q0, q0, u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - } - break; - default: - DEFAULT; - } - break; - case 0x73: - nextop = F8; - switch((nextop>>3)&7) { - case 2: - INST_NAME("PSRLQ Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8) { - if (u8>63) { - VEORQ(q0, q0, q0); - } else if(u8) { - VSHRQ_64(q0, q0, u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - } - break; - case 3: - INST_NAME("PSRLDQ Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8) { - if(u8>15) { - VEORQ(q0, q0, q0); - } else { - q1 = fpu_get_scratch(dyn); - VEORQ(q1, q1, q1); - VEXTQ_8(q0, q0, q1, u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - } - break; - case 6: - INST_NAME("PSLLQ Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8) { - if (u8>63) { - VEORQ(q0, q0, q0); - } else { - VSHLQ_64(q0, q0, u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - } - break; - case 7: - INST_NAME("PSLLDQ Ex, Ib"); - GETEX(q0, 1); - u8 = F8; - if(u8) { - if(u8>15) { - VEORQ(q0, q0, q0); - } else if(u8>0) { - q1 = fpu_get_scratch(dyn); - VEORQ(q1, q1, q1); - VEXTQ_8(q0, q1, q0, 16-u8); - } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - } - } - break; - default: - DEFAULT; - } - break; - - case 0x74: - INST_NAME("PCMPEQB Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VCMEQQ_8(v0, v0, q0); - break; - case 0x75: - INST_NAME("PCMPEQW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VCMEQQ_16(v0, v0, q0); - break; - case 0x76: - INST_NAME("PCMPEQD Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VCMEQQ_32(v0, v0, q0); - break; - - case 0x7E: - INST_NAME("MOVD Ed,Gx"); - nextop = F8; - GETGX(v0); - if(rex.w) { - if(MODREG) { - ed = xRAX + (nextop&7) + (rex.b<<3); - VMOVQDto(ed, v0, 0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VSTR64_U12(v0, ed, fixedaddress); - } - } else { - if(MODREG) { - ed = xRAX + (nextop&7) + (rex.b<<3); - VMOVSto(ed, v0, 0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VSTR32_U12(v0, ed, fixedaddress); - } - } - break; - case 0x7F: - INST_NAME("MOVDQA Ex,Gx"); - nextop = F8; - GETGX(v0); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVQ(v1, v0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); - } - break; - - case 0xA3: - INST_NAME("BT Ew, Gw"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGD - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, 0, 0); - SBFXw(x1, gd, 4, 12); // r1 = (gw>>4) - ADDx_REG_LSL(x3, wback, x1, 1); //(&ed)+=r1*2; - LDRH_U12(x1, x3, fixedaddress); - ed = x1; - } - ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f - LSRw_REG(x1, ed, x2); - BFIw(xFlags, x1, F_CF, 1); - break; - case 0xA4: - case 0xA5: - nextop = F8; - if(opcode==0xA4) { - INST_NAME("SHLD Ew, Gw, Ib"); - } else { - INST_NAME("SHLD Ew, Gw, CL"); - UXTBw(x3, xRCX); - } - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET); - GETEWW(x4, x1, (opcode==0xA4)?1:0); - GETGW(x2); - if(opcode==0xA4) { - u8 = F8; - MOV32w(x3, u8); - } - CALL_(shld16, x1, wback); - EWBACKW(x1); - break; - - case 0xAB: - INST_NAME("BTS Ew, Gw"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGD - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, 0, 0); - SBFXw(x4, gd, 4, 12); // r1 = (gw>>4) - ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2; - LDRH_U12(x4, x3, fixedaddress); - ed = x4; - } - ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f - LSRw_REG(x1, ed, x2); - BFIw(xFlags, x1, F_CF, 1); - ANDSw_mask(x1, x1, 0, 0); //mask=1 - B_NEXT(cNE); - MOV32w(x1, 1); - LSLxw_REG(x1, x1, x2); - EORx_REG(ed, ed, x1); - if(wback) { - STRH_U12(ed, wback, fixedaddress); - } - break; - case 0xAC: - case 0xAD: - nextop = F8; - if(opcode==0xAC) { - INST_NAME("SHRD Ew, Gw, Ib"); - } else { - INST_NAME("SHRD Ew, Gw, CL"); - UXTBw(x3, xRCX); - } - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET); - GETEWW(x4, x1, (opcode==0xAC)?1:0); - GETGW(x2); - if(opcode==0xAC) { - u8 = F8; - MOV32w(x3, u8); - } - CALL_(shrd16, x1, wback); - EWBACKW(x1); - break; - - case 0xAF: - INST_NAME("IMUL Gw,Ew"); - SETFLAGS(X_ALL, SF_PENDING); - nextop = F8; - UFLAG_DF(x1, d_imul16); - GETSEW(x1, 0); - GETSGW(x2); - MULw(x2, x2, x1); - UFLAG_RES(x2); - GWBACK; - break; - - case 0xB3: - INST_NAME("BTR Ew, Gw"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGD - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, 0, 0); - SBFXw(x4, gd, 4, 12); // r1 = (gw>>4) - ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2; - LDRH_U12(x4, x3, fixedaddress); - wback = x3; - ed = x4; - } - ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f - LSRw_REG(x1, ed, x2); - BFIw(xFlags, x1, F_CF, 1); - ANDSw_mask(x1, x1, 0, 0); //mask=1 - B_NEXT(cEQ); - MOV32w(x1, 1); - LSLxw_REG(x1, x1, x2); - EORx_REG(ed, ed, x1); - if(wback) { - STRH_U12(ed, wback, fixedaddress); - } - break; - - case 0xB6: - INST_NAME("MOVZX Gw, Eb"); - nextop = F8; - if(MODREG) { - if(rex.rex) { - eb1 = xRAX+(nextop&7)+(rex.b<<3); - eb2 = 0; \ - } else { - ed = (nextop&7); - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = (ed&4)>>2; // L or H - } - UBFXxw(x1, eb1, eb2*8, 8); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); - LDRB_U12(x1, ed, fixedaddress); - } - gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGW - BFIx(gd, x1, 0, 16); // insert in Gw - break; - case 0xB7: - INST_NAME("MOVZX Gw, Ew"); - nextop = F8; - if(MODREG) { - eb1 = xRAX+(nextop&7)+(rex.b<<3); - UBFXxw(x1, eb1, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff>>1, 1, rex, 0, 0); - LDRH_U12(x1, ed, fixedaddress); - } - gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGW - BFIx(gd, x1, 0, 16); // insert in Gw - break; - - - case 0xBB: - INST_NAME("BTC Ew, Gw"); - SETFLAGS(X_CF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); // GETGD - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, 0, 0); - SBFXw(x4, gd, 4, 12); // r1 = (gw>>4) - ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2; - LDRH_U12(x4, x3, fixedaddress); - wback = x3; - ed = x4; - } - ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f - LSRw_REG(x1, ed, x2); - BFIw(xFlags, x1, F_CF, 1); - ANDw_mask(x1, x1, 0, 0); //mask=1 - MOV32w(x1, 1); - LSLxw_REG(x1, x1, x2); - EORx_REG(ed, ed, x1); - if(wback) { - STRH_U12(ed, wback, fixedaddress); - } - break; - case 0xBC: - INST_NAME("BSF Ew,Gw"); - SETFLAGS(X_ZF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETGD; - GETEW(x1, 0); // Get EW - TSTw_REG(x1, x1); - B_MARK(cEQ); - RBITw(x1, x1); // reverse - CLZw(x2, x1); // x2 gets leading 0 == BSF - BFIw(gd, x2, 0, 16); - MARK; - CSETw(x1, cEQ); //ZF not set - BFIw(xFlags, x1, F_ZF, 1); - break; - case 0xBD: - INST_NAME("BSR Ew,Gw"); - SETFLAGS(X_ZF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETGD; - GETEW(x1, 0); // Get EW - TSTw_REG(x1, x1); // Don't use CBZ here, as the flag is reused later - B_MARK(cEQ); - LSLw(x1, x1, 16); // put bits on top - CLZw(x2, x1); // x2 gets leading 0 - SUBw_U12(x2, x2, 15); - NEGw_REG(x2, x2); // complement - BFIx(gd, x2, 0, 16); - MARK; - CSETw(x1, cEQ); //ZF not set - BFIw(xFlags, x1, F_ZF, 1); - break; - case 0xBE: - INST_NAME("MOVSX Gw, Eb"); - nextop = F8; - GETGD; - if(MODREG) { - if(rex.rex) { - ed = xRAX+(nextop&7)+(rex.b<<3); - eb1=ed; - eb2=0; - } else { - ed = (nextop&7); - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = (ed&4)>>2; // L or H - } - SBFXw(x1, eb1, eb2, 8); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, 0, 0); - LDRSBw_U12(x1, ed, fixedaddress); - } - BFIx(gd, x1, 0, 16); - break; - - case 0xC2: - INST_NAME("CMPPD Gx, Ex, Ib"); - nextop = F8; - GETGX(v0); - GETEX(v1, 1); - u8 = F8; - switch(u8&7) { - // the inversion of the params in the comparison is there to handle NaN the same way SSE does - case 0: FCMEQQD(v0, v0, v1); break; // Equal - case 1: FCMGTQD(v0, v1, v0); break; // Less than - case 2: FCMGEQD(v0, v1, v0); break; // Less or equal - case 3: FCMEQQD(v0, v0, v0); - if(v0!=v1) { - q0 = fpu_get_scratch(dyn); - FCMEQQD(q0, v1, v1); - VANDQ(v0, v0, q0); - } - VMVNQ(v0, v0); - break; // NaN (NaN is not equal to himself) - case 4: FCMEQQD(v0, v0, v1); VMVNQ(v0, v0); break; // Not Equal (or unordered on ARM, not on X86...) - case 5: FCMGTQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or equal or unordered - case 6: FCMGEQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or unordered - case 7: FCMEQQD(v0, v0, v0); - if(v0!=v1) { - q0 = fpu_get_scratch(dyn); - FCMEQQD(q0, v1, v1); - VANDQ(v0, v0, q0); - } - break; // not NaN - } - break; - - case 0xC4: - INST_NAME("PINSRW Gx,Ed,Ib"); - nextop = F8; - GETGX(v0); - if(MODREG) { - u8 = (F8)&7; - ed = xRAX+(nextop&7)+(rex.b<<3); - VMOVQHfrom(v0, u8, ed); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 1); - u8 = (F8)&7; - VLD1_16(v0, u8, wback); - } - break; - case 0xC5: - INST_NAME("PEXTRW Gd,Ex,Ib"); - nextop = F8; - GETGD; - if(MODREG) { - GETEX(v0, 1); - u8 = (F8)&7; - VMOVHto(gd, v0, u8); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 1); - u8 = (F8)&7; - LDRH_U12(gd, wback, u8*2); - } - break; - case 0xC6: - INST_NAME("SHUFPD Gx, Ex, Ib"); - nextop = F8; - GETGX(v0); - GETEX(v1, 1); - u8 = F8; - if(v0==v1 && u8==0) { - VMOVeD(v0, 1, v0, 0); - } else { - if(v0==v1) - q0 = fpu_get_scratch(dyn); - else - q0 = v0; - VMOVeD(q0, 0, v0, (u8&1)); - VMOVeD(q0, 1, v1, ((u8>>1)&1)); - if(v0==v1) { - VMOVQ(v0, q0); - } - } - break; - - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: /* BSWAP reg */ - INST_NAME("BSWAP Reg"); - gd = xRAX+(opcode&7)+(rex.b<<3); - if(rex.w) { - REV64x(gd, gd); - } else { - REV16w(x1, gd); - BFIx(gd, x1, 0, 16); - } - break; - - case 0xD1: - INST_NAME("PSRLW Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v0 = fpu_get_scratch(dyn); - VDUPQ_16(v0, q1, 0); - NEGQ_16(v0, v0); // neg, because SHR - USHLQ_16(q0, q0, v0); // SHR x8 - break; - case 0xD2: - INST_NAME("PSRLD Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v0 = fpu_get_scratch(dyn); - VDUPQ_32(v0, q1, 0); - NEGQ_32(v0, v0); // neg, because SHR - USHLQ_32(q0, q0, v0); // SHR x4 - break; - case 0xD3: - INST_NAME("PSRLQ Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v0 = fpu_get_scratch(dyn); - NEG_64(v0, q1); - VMOVeD(v0, 1, v0, 0); - USHLQ_64(q0, q0, v0); - break; - case 0xD4: - INST_NAME("PADDQ Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VADDQ_64(v0, v0, q0); - break; - case 0xD5: - INST_NAME("PMULLW Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - VMULQ_16(q0, q0, q1); - break; - case 0xD6: - INST_NAME("MOVQ Ex, Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - FMOVD(v1, v0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VSTR64_U12(v0, ed, fixedaddress); - } - break; - case 0xD7: - nextop = F8; - INST_NAME("PMOVMSKB Gd, Ex"); - v0 = fpu_get_scratch(dyn); - v1 = fpu_get_scratch(dyn); - q1 = fpu_get_scratch(dyn); - GETEX(q0, 0); - GETGD; - TABLE64(x1, (uintptr_t)&mask_shift8); - VLDR64_U12(v0, x1, 0); // load shift - MOVI_8(v1, 0x80); // load mask - VAND(q1, v1, q0); - USHL_8(q1, q1, v0); // shift - UADDLV_8(q1, q1); // accumalte - VMOVBto(gd, q1, 0); - // and now the high part - VMOVeD(q1, 0, q0, 1); - VAND(q1, v1, q1); // keep highest bit - USHL_8(q1, q1, v0); // shift - UADDLV_8(q1, q1); // accumalte - VMOVBto(x1, q1, 0); - BFIx(gd, x1, 8, 8); - break; - case 0xD8: - INST_NAME("PSUBUSB Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - UQSUBQ_8(q0, q0, q1); - break; - case 0xD9: - INST_NAME("PSUBUSW Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - UQSUBQ_16(q0, q0, q1); - break; - case 0xDA: - INST_NAME("PMINUB Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1,0); - UMINQ_8(q0, q0, q1); - break; - case 0xDB: - INST_NAME("PAND Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VANDQ(v0, v0, q0); - break; - case 0xDC: - INST_NAME("PADDUSB Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - UQADDQ_8(q0, q0, q1); - break; - case 0xDD: - INST_NAME("PADDUSW Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - UQADDQ_16(q0, q0, q1); - break; - case 0xDE: - INST_NAME("PMAXUB Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - UMAXQ_8(q0, q0, q1); - break; - case 0xDF: - INST_NAME("PANDN Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VBICQ(v0, q0, v0); - break; - - case 0xE0: - INST_NAME("PAVGB Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - URHADDQ_8(v0, v0, v1); - break; - - case 0xE1: - INST_NAME("PSRAW Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v0 = fpu_get_scratch(dyn); - VMOVeD(v0, 0, q1, 0); - VMOVeD(v0, 1, q1, 0); - SQXTN_32(v0, v0); // 2*q1 in 32bits now - NEG_32(v0, v0); // because we want SHR and not SHL - VMOVeD(v0, 1, v0, 0); - SQXTN_16(v0, v0); // 4*q1 in 32bits now - VMOVeD(v0, 1, v0, 0); - SSHLQ_16(q0, q0, v0); - break; - case 0xE2: - INST_NAME("PSRAD Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v0 = fpu_get_scratch(dyn); - VMOVeD(v0, 0, q1, 0); - VMOVeD(v0, 1, q1, 0); - SQXTN_32(v0, v0); // 2*q1 in 32bits now - NEG_32(v0, v0); // because we want SHR and not SHL - VMOVeD(v0, 1, v0, 0); - SSHLQ_32(q0, q0, v0); - break; - case 0xE3: - INST_NAME("PAVGW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - URHADDQ_16(v0, v0, q0); - break; - case 0xE4: - INST_NAME("PMULHUW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - q0 = fpu_get_scratch(dyn); - q1 = fpu_get_scratch(dyn); - VUMULL_16(q0, v0, v1); - VUMULL2_16(q1, v0, v1); - UQSHRN_16(v0, q0, 16); - UQSHRN2_16(v0, q1, 16); - break; - case 0xE5: - INST_NAME("PMULHW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - q0 = fpu_get_scratch(dyn); - q1 = fpu_get_scratch(dyn); - VSMULL_16(q0, v0, v1); - VSMULL2_16(q1, v0, v1); - SQSHRN_16(v0, q0, 16); - SQSHRN2_16(v0, q1, 16); - break; - case 0xE6: - INST_NAME("CVTTPD2DQ Gx, Ex"); - nextop = F8; - GETEX(v1, 0); - GETGX_empty(v0); - VFCVTNSQD(v0, v1); // convert double -> int64 - SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part - break; - case 0xE7: - INST_NAME("MOVNTDQ Ex, Gx"); - nextop = F8; - GETGX(v0); - if(MODREG) { - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVQ(v1, v0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); - } - break; - case 0xE8: - INST_NAME("PSUBSB Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - SQSUBQ_8(v0, v0, q0); - break; - case 0xE9: - INST_NAME("PSUBSW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - SQSUBQ_16(v0, v0, q0); - break; - case 0xEA: - INST_NAME("PMINSW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - SMINQ_16(v0, v0, q0); - break; - case 0xEB: - INST_NAME("POR Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VORRQ(v0, v0, q0); - break; - case 0xEC: - INST_NAME("PADDSB Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - SQADDQ_8(v0, v0, q0); - break; - case 0xED: - INST_NAME("PADDSW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - SQADDQ_16(v0, v0, q0); - break; - case 0xEE: - INST_NAME("PMAXSW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - SMAXQ_16(v0, v0, q0); - break; - case 0xEF: - INST_NAME("PXOR Gx,Ex"); - nextop = F8; - GETG; - if(MODREG && ((nextop&7)+(rex.b<<3)==gd)) { - // special case for PXOR Gx, Gx - q0 = sse_get_reg_empty(dyn, ninst, x1, gd); - VEORQ(q0, q0, q0); - } else { - q0 = sse_get_reg(dyn, ninst, x1, gd); - GETEX(q1, 0); - VEORQ(q0, q0, q1); - } - break; - - case 0xF2: - INST_NAME("PSLLD Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v0 = fpu_get_scratch(dyn); - VMOVeD(v0, 0, q1, 0); - VMOVeD(v0, 1, q1, 0); - SQXTN_32(v0, v0); // 2*q1 in 32bits now - VMOVeD(v0, 1, v0, 0); - SSHLQ_32(q0, q0, v0); - break; - case 0xF3: - INST_NAME("PSLLQ Gx,Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v0 = fpu_get_scratch(dyn); - VMOVQ(v0, q1); - VMOVeD(v0, 1, v0, 0); - USHLQ_64(q0, q0, v0); - break; - case 0xF4: - INST_NAME("PMULUDQ Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - q0 = fpu_get_scratch(dyn); - VUZP1Q_32(q0, v0, v0); //A3 A2 A1 A0 -> A3 A1 A2 A0 - if(MODREG) { - q1 = fpu_get_scratch(dyn); - } else { - q1 = v1; - } - VUZP1Q_32(q1, v1, v1); - VUMULL_32(v0, q0, q1); - break; - case 0xF5: - INST_NAME("PMADDWD Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - q0 = fpu_get_scratch(dyn); - q1 = fpu_get_scratch(dyn); - VSMULL_16(q0, v0, v1); - VSMULL2_16(q1, v0, v1); - VADDPQ_32(v0, q0, q1); - break; - case 0xF6: - INST_NAME("PSADBW Gx, Ex"); - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - d0 = fpu_get_scratch(dyn); - d1 = fpu_get_scratch(dyn); - VEOR(d1, d1, d1); // is it necessary? - UABDL_8(d0, q0, q1); - UADDLVQ_16(d1, d0); - VMOVeD(q0, 0, d1, 0); - UABDL2_8(d0, q0, q1); - UADDLVQ_16(d1, d0); - VMOVeD(q0, 1, d1, 0); - break; - case 0xF7: - INST_NAME("MASKMOVDQU Gx, Ex") - nextop = F8; - GETGX(q0); - GETEX(q1, 0); - v0 = fpu_get_scratch(dyn); - VLDR128_U12(v0, xRDI, 0); - if(MODREG) - v1 = fpu_get_scratch(dyn); // need to preserve the register - else - v1 = q1; - VSSHRQ_8(v1, q1, 7); // get the mask - VBICQ(v0, v0, v1); // mask destination - VANDQ(v1, q0, v1); // mask source - VORRQ(v1, v1, v0); // combine - VSTR128_U12(v1, xRDI, 0); // put back - break; - case 0xF8: - INST_NAME("PSUBB Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VSUBQ_8(v0, v0, q0); - break; - case 0xF9: - INST_NAME("PSUBW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VSUBQ_16(v0, v0, q0); - break; - case 0xFA: - INST_NAME("PSUBD Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VSUBQ_32(v0, v0, q0); - break; - case 0xFB: - INST_NAME("PSUBQ Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VSUBQ_64(v0, v0, q0); - break; - case 0xFC: - INST_NAME("PADDB Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VADDQ_8(v0, v0, q0); - break; - case 0xFD: - INST_NAME("PADDW Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VADDQ_16(v0, v0, q0); - break; - case 0xFE: - INST_NAME("PADDD Gx,Ex"); - nextop = F8; - GETGX(v0); - GETEX(q0, 0); - VADDQ_32(v0, v0, q0); - break; - - default: - DEFAULT; - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_6664.c b/src/dynarec/dynarec_arm64_6664.c deleted file mode 100644 index 9d65c104..00000000 --- a/src/dynarec/dynarec_arm64_6664.c +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - -#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) - -uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t opcode = F8; - uint8_t nextop; - uint8_t gd, ed; - int v0, v1; - int64_t fixedaddress; - - // REX prefix before the 66 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } - - /*if(rex.w && opcode!=0x0f) { // rex.w cancels "66", but not for 66 0f type of prefix - MESSAGE(LOG_DUMP, "Here!\n"); - return dynarec64_64(dyn, addr-2, ip, ninst, rex, rep, ok, need_epilog); - }*/ - - switch(opcode) { - - case 0x0F: - opcode = F8; - switch(opcode) { - - case 0xD6: - INST_NAME("MOVQ Ex, Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - FMOVD(v1, v0); - } else { - grab_segdata(dyn, addr, ninst, x4, _FS); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - VSTR64_REG(v0, ed, x4); - } - break; - - default: - DEFAULT; - } - break; - - case 0x89: - INST_NAME("MOV FS:Ew, Gw"); - nextop = F8; - GETGD; // don't need GETGW here - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - if(rex.w) { - MOVx_REG(ed, gd); - } else { - if(ed!=gd) { - BFIx(ed, gd, 0, 16); - } - } - } else { - grab_segdata(dyn, addr, ninst, x4, _FS); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); - if(rex.w) { - STRx_REG(gd, ed, x4); - } else { - STRH_REG(gd, ed, x4); - } - } - break; - - case 0x8B: - INST_NAME("MOV Gd, FS:Ed"); - nextop=F8; - GETGD; - if(MODREG) { // reg <= reg - ed = xRAX+(nextop&7)+(rex.b<<3); - if(rex.w) { - MOVx_REG(gd, ed); - } else { - if(ed!=gd) { - BFIx(gd, ed, 0, 16); - } - } - } else { // mem <= reg - grab_segdata(dyn, addr, ninst, x4, _FS); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); - if(rex.w) { - LDRx_REG(gd, ed, x4); - } else { - LDRH_REG(x1, ed, x4); - BFIx(gd, x1, 0, 16); - } - } - break; - - - default: - DEFAULT; - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_67.c b/src/dynarec/dynarec_arm64_67.c deleted file mode 100755 index 4e0e0a26..00000000 --- a/src/dynarec/dynarec_arm64_67.c +++ /dev/null @@ -1,428 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - -#define GETGX(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg(dyn, ninst, x1, gd) - -#define GETGM(a) \ - gd = ((nextop&0x38)>>3); \ - a = mmx_get_reg(dyn, ninst, x1, gd) - -#define GETGm gd = ((nextop&0x38)>>3) - -uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)need_epilog; - - uint8_t opcode = F8; - uint8_t nextop; - uint8_t gd, ed, wback, wb; - int64_t fixedaddress; - int8_t i8; - uint8_t u8; - int32_t i32; - int64_t j64, i64; - int v0, v1, s0; - MAYUSE(i32); - MAYUSE(j64); - MAYUSE(v0); - MAYUSE(v1); - MAYUSE(s0); - - // REX prefix before the 67 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } - rep = 0; - while((opcode==0xF2) || (opcode==0xF3)) { - rep = opcode-0xF1; - opcode = F8; - } - - switch(opcode) { - - case 0x0F: - opcode=F8; - switch(opcode) { - - case 0x2E: - // no special check... - case 0x2F: - if(rep) { - DEFAULT; - } else { - if(opcode==0x2F) {INST_NAME("COMISS Gx, Ex");} else {INST_NAME("UCOMISS Gx, Ex");} - SETFLAGS(X_ALL, SF_SET); - nextop = F8; - GETGX(v0); - if(MODREG) { - s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - } else { - s0 = fpu_get_scratch(dyn); - addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - } - FCMPS(v0, s0); - FCOMI(x1, x2); - } - break; - - case 0x6F: - INST_NAME("MOVQ Gm, Em"); - nextop = F8; - GETGm; - if(MODREG) { - v1 = mmx_get_reg(dyn, ninst, x1, nextop&7); // no rex.b on MMX - v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); - VMOVeD(v0, 0, v1, 0); - } else { - v0 = mmx_get_reg_empty(dyn, ninst, x1, gd); - addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VLDR64_U12(v0, ed, fixedaddress); - } - break; - - case 0x7F: - INST_NAME("MOVQ Em, Gm"); - nextop = F8; - GETGM(v0); - if(MODREG) { - v1 = mmx_get_reg_empty(dyn, ninst, x1, nextop&7); - VMOV(v1, v0); - } else { - addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VSTR64_U12(v0, ed, fixedaddress); - } - break; - - default: - DEFAULT; - } - break; - - case 0x89: - INST_NAME("MOV Ed, Gd"); - nextop=F8; - GETGD; - if(MODREG) { // reg <= reg - MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); - } else { // mem <= reg - addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); - STRxw_U12(gd, ed, fixedaddress); - } - break; - - case 0x8D: - INST_NAME("LEA Gd, Ed"); - nextop=F8; - GETGD; - if(MODREG) { // reg <= reg? that's an invalid operation - DEFAULT; - } else { // mem <= reg - // should a geted32 be created, to use 32bits regs instead of 64bits? - addr = geted32(dyn, addr, ninst, nextop, &ed, gd, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=gd) { - MOVw_REG(gd, ed); - } - } - break; - - case 0xC1: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - INST_NAME("ROL Ed, Ib"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - GETED32(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { WBACK; } - break; - case 1: - INST_NAME("ROR Ed, Ib"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); - GETED32(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { WBACK; } - break; - case 2: - INST_NAME("RCL Ed, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - GETED32W(x4, x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4); - WBACK; - break; - case 3: - INST_NAME("RCR Ed, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - GETED32W(x4, x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); - WBACK; - break; - case 4: - case 6: - INST_NAME("SHL Ed, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED32(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_shl32c(dyn, ninst, rex, ed, u8, x3, x4); - WBACK; - break; - case 5: - INST_NAME("SHR Ed, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED32(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_shr32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { - WBACK; - } - break; - case 7: - INST_NAME("SAR Ed, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - GETED32(1); - u8 = (F8)&(rex.w?0x3f:0x1f); - emit_sar32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { - WBACK; - } - break; - } - break; - - #define GO(NO, YES) \ - BARRIER(2); \ - JUMP(addr+i8);\ - if(dyn->insts[ninst].x64.jmp_insts==-1) { \ - /* out of the block */ \ - i32 = dyn->insts[ninst+1].address-(dyn->arm_size); \ - Bcond(NO, i32); \ - jump_to_next(dyn, addr+i8, 0, ninst); \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->arm_size); \ - Bcond(YES, i32); \ - } - case 0xE0: - INST_NAME("LOOPNZ (32bits)"); - READFLAGS(X_ZF); - i8 = F8S; - MOVw_REG(x1, xRCX); - SUBSw_U12(x1, x1, 1); - BFIx(xRCX, x1, 0, 32); - B_NEXT(cEQ); // ECX is 0, no LOOP - TSTw_mask(xFlags, 0b011010, 0); //mask=0x40 - GO(cNE, cEQ); - break; - case 0xE1: - INST_NAME("LOOPZ (32bits)"); - READFLAGS(X_ZF); - i8 = F8S; - MOVw_REG(x1, xRCX); - SUBSw_U12(x1, x1, 1); - BFIx(xRCX, x1, 0, 32); - B_NEXT(cEQ); // ECX is 0, no LOOP - TSTw_mask(xFlags, 0b011010, 0); //mask=0x40 - GO(cEQ, cNE); - break; - case 0xE2: - INST_NAME("LOOP (32bits)"); - i8 = F8S; - MOVw_REG(x1, xRCX); - SUBSw_U12(x1, x1, 1); - BFIx(xRCX, x1, 0, 32); - GO(cEQ, cNE); - break; - case 0xE3: - INST_NAME("JECXZ"); - i8 = F8S; - MOVw_REG(x1, xRCX); - TSTw_REG(x1, x1); - GO(cNE, cEQ); - break; - #undef GO - - case 0xE8: - return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode) - - case 0xF7: - nextop = F8; - switch((nextop>>3)&7) { - case 0: - case 1: - INST_NAME("TEST Ed, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED32H(x1, 4); - i64 = F32S; - MOV64xw(x2, i64); - emit_test32(dyn, ninst, rex, ed, x2, x3, x4); - break; - case 2: - INST_NAME("NOT Ed"); - GETED32(4); - MVNxw_REG(ed, ed); - WBACK; - break; - case 3: - INST_NAME("NEG Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED32(0); - emit_neg32(dyn, ninst, rex, ed, x3, x4); - WBACK; - break; - case 4: - INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_mul64:d_mul32); - GETED32(0); - if(rex.w) { - if(ed==xRDX) gd=x3; else gd=xRDX; - UMULH(gd, xRAX, ed); - MULx(xRAX, xRAX, ed); - if(gd!=xRDX) {MOVx_REG(xRDX, gd);} - } else { - UMULL(xRDX, xRAX, ed); //64 <- 32x32 - MOVw_REG(xRAX, xRDX); - LSRx(xRDX, xRDX, 32); - } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - break; - case 5: - INST_NAME("IMUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_imul64:d_imul32); - GETED32(0); - if(rex.w) { - if(ed==xRDX) gd=x3; else gd=xRDX; - SMULH(gd, xRAX, ed); - MULx(xRAX, xRAX, ed); - if(gd!=xRDX) {MOVx_REG(xRDX, gd);} - } else { - SMULL(xRDX, xRAX, ed); //64 <- 32x32 - MOVw_REG(xRAX, xRDX); - LSRx(xRDX, xRDX, 32); - } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - break; - case 6: - INST_NAME("DIV Ed"); - SETFLAGS(X_ALL, SF_SET); - if(!rex.w) { - SET_DFNONE(x2); - GETED32(0); - MOVw_REG(x3, xRAX); - ORRx_REG_LSL(x3, x3, xRDX, 32); - if(MODREG) { - MOVw_REG(x4, ed); - ed = x4; - } - UDIVx(x2, x3, ed); - MSUBx(x4, x2, ed, xRAX); - MOVw_REG(xRAX, x2); - MOVw_REG(xRDX, x4); - } else { - if(ninst && dyn->insts - && dyn->insts[ninst-1].x64.addr - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) { - SET_DFNONE(x2); - GETED32(0); - UDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - } else { - GETED32H(x1, 0); // get edd changed addr, so cannot be called 2 times for same op... - CBZxw_MARK(xRDX); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(div64, -1); - B_NEXT_nocond; - MARK; - UDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - SET_DFNONE(x2); - } - } - break; - case 7: - INST_NAME("IDIV Ed"); - SETFLAGS(X_ALL, SF_SET); - if(!rex.w) { - SET_DFNONE(x2) - GETSED32w(0); - MOVw_REG(x3, xRAX); - ORRx_REG_LSL(x3, x3, xRDX, 32); - SDIVx(x2, x3, wb); - MSUBx(x4, x2, wb, x3); - MOVw_REG(xRAX, x2); - MOVw_REG(xRDX, x4); - } else { - if(ninst && dyn->insts - && dyn->insts[ninst-1].x64.addr - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48 - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) { - SET_DFNONE(x2) - GETED32(0); - SDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - } else { - GETED32H(x1, 0); // get edd changed addr, so cannot be called 2 times for same op... - CBZxw_MARK(xRDX); - MVNx_REG(x2, xRDX); - CBZxw_MARK(x2); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL((void*)idiv64, -1); - B_NEXT_nocond; - MARK; - SDIVx(x2, xRAX, ed); - MSUBx(xRDX, x2, ed, xRAX); - MOVx_REG(xRAX, x2); - SET_DFNONE(x2) - } - } - break; - } - break; - - default: - DEFAULT; - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_d8.c b/src/dynarec/dynarec_arm64_d8.c deleted file mode 100644 index 9b65f764..00000000 --- a/src/dynarec/dynarec_arm64_d8.c +++ /dev/null @@ -1,232 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "emu/x87emu_private.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - - -uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t nextop = F8; - uint8_t ed; - int64_t fixedaddress; - int v1, v2; - int s0; - - MAYUSE(s0); - MAYUSE(v2); - MAYUSE(v1); - - switch(nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FADD ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FADDD(v1, v1, v2); - break; - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - INST_NAME("FMUL ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FMULD(v1, v1, v2); - break; - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - INST_NAME("FCOM ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FCMPD(v1, v2); - FCOM(x1, x2, x3); - break; - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - INST_NAME("FCOMP ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FCMPD(v1, v2); - FCOM(x1, x2, x3); - x87_do_pop(dyn, ninst); - break; - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - INST_NAME("FSUB ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FSUBD(v1, v1, v2); - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - INST_NAME("FSUBR ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FSUBD(v1, v2, v1); - break; - case 0xF0: - case 0xF1: - case 0xF2: - case 0xF3: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - INST_NAME("FDIV ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FDIVD(v1, v1, v2); - break; - case 0xF8: - case 0xF9: - case 0xFA: - case 0xFB: - case 0xFC: - case 0xFD: - case 0xFE: - case 0xFF: - INST_NAME("FDIVR ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FDIVD(v1, v2, v1); - break; - - default: - switch((nextop>>3)&7) { - case 0: - INST_NAME("FADD ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - FCVT_D_S(s0, s0); - FADDD(v1, v1, s0); - break; - case 1: - INST_NAME("FMUL ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - FCVT_D_S(s0, s0); - FMULD(v1, v1, s0); - break; - case 2: - INST_NAME("FCOM ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - FCVT_D_S(s0, s0); - FCMPD(v1, s0); - FCOM(x1, x2, x3); - break; - case 3: - INST_NAME("FCOMP ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - FCVT_D_S(s0, s0); - FCMPD(v1, s0); - FCOM(x1, x2, x3); - x87_do_pop(dyn, ninst); - break; - case 4: - INST_NAME("FSUB ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - FCVT_D_S(s0, s0); - FSUBD(v1, v1, s0); - break; - case 5: - INST_NAME("FSUBR ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - FCVT_D_S(s0, s0); - FSUBD(v1, s0, v1); - break; - case 6: - INST_NAME("FDIV ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - FCVT_D_S(s0, s0); - FDIVD(v1, v1, s0); - break; - case 7: - INST_NAME("FDIVR ST0, float[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - FCVT_D_S(s0, s0); - FDIVD(v1, s0, v1); - break; - default: - DEFAULT; - } - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_d9.c b/src/dynarec/dynarec_arm64_d9.c deleted file mode 100644 index 8089b68d..00000000 --- a/src/dynarec/dynarec_arm64_d9.c +++ /dev/null @@ -1,356 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "emu/x87emu_private.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - - -uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t nextop = F8; - uint8_t ed; - uint8_t wback, wb1; - int64_t fixedaddress; - int v1, v2; - int s0; - int i1, i2, i3; - - MAYUSE(s0); - MAYUSE(v2); - MAYUSE(v1); - - switch(nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FLD STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - v2 = x87_do_push(dyn, ninst); - FMOVD(v2, v1); - break; - - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - INST_NAME("FXCH STx"); - // swap the cache value, not the double value itself :p - i1 = x87_get_cache(dyn, ninst, x1, x2, nextop&7); - i2 = x87_get_cache(dyn, ninst, x1, x2, 0); - i3 = dyn->x87cache[i1]; - dyn->x87cache[i1] = dyn->x87cache[i2]; - dyn->x87cache[i2] = i3; - break; - - case 0xD0: - INST_NAME("FNOP"); - break; - - case 0xE0: - INST_NAME("FCHS"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - FNEGD(v1, v1); - break; - case 0xE1: - INST_NAME("FABS"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - FABSD(v1, v1); - break; - - case 0xE4: - INST_NAME("FTST"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - FCMPD_0(v1); - FCOM(x1, x2, x3); // same flags... - break; - case 0xE5: - INST_NAME("FXAM"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_refresh(dyn, ninst, x1, x2, 0); - CALL(fpu_fxam, -1); // should be possible inline, but is it worth it? - break; - - case 0xE8: - INST_NAME("FLD1"); - v1 = x87_do_push(dyn, ninst); - FTABLE64(v1, 1.0); - break; - case 0xE9: - INST_NAME("FLDL2T"); - v1 = x87_do_push(dyn, ninst); - FTABLE64(v1, L2T); - break; - case 0xEA: - INST_NAME("FLDL2E"); - v1 = x87_do_push(dyn, ninst); - FTABLE64(v1, L2E); - break; - case 0xEB: - INST_NAME("FLDPI"); - v1 = x87_do_push(dyn, ninst); - FTABLE64(v1, PI); - break; - case 0xEC: - INST_NAME("FLDLG2"); - v1 = x87_do_push(dyn, ninst); - FTABLE64(v1, LG2); - break; - case 0xED: - INST_NAME("FLDLN2"); - v1 = x87_do_push(dyn, ninst); - FTABLE64(v1, LN2); - break; - case 0xEE: - INST_NAME("FLDZ"); - v1 = x87_do_push(dyn, ninst); - FTABLE64(v1, 0.0); - break; - - case 0xFA: - INST_NAME("FSQRT"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - FSQRTD(v1, v1); - break; - - case 0xFC: - INST_NAME("FRNDINT"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - // use C helper for now, nothing staightforward is available - x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_frndint, -1); - /* - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - VCMP_F64_0(v1); - VMRS_APSR(); - B_NEXT(cVS); // Unordered, skip - B_NEXT(cEQ); // Zero, skip - u8 = x87_setround(dyn, ninst, x1, x2, x3); - VCVT_S32_F64(x1, v1); // limit to 32bits.... - VCVT_F64_S32(v1, x1); - x87_restoreround(dyn, ninst, u8); - */ - break; - case 0xF0: - INST_NAME("F2XM1"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_f2xm1, -1); - break; - case 0xF1: - INST_NAME("FYL2X"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fyl2x, -1); - x87_do_pop(dyn, ninst); - break; - case 0xF2: - INST_NAME("FTAN"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_ftan, -1); - v1 = x87_do_push(dyn, ninst); - FTABLE64(v1, 1.0); - break; - case 0xF3: - INST_NAME("FPATAN"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fpatan, -1); - x87_do_pop(dyn, ninst); - break; - case 0xF4: - INST_NAME("FXTRACT"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_do_push_empty(dyn, ninst, 0); - x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fxtract, -1); - break; - case 0xF5: - INST_NAME("FPREM1"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fprem1, -1); - break; - case 0xF6: - INST_NAME("FDECSTP"); - fpu_purgecache(dyn, ninst, x1, x2, x3); - LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); - SUBw_U12(x2, x2, 1); - ANDw_mask(x2, x2, 0, 2); //mask=7 - STRw_U12(x2, xEmu, offsetof(x64emu_t, top)); - break; - case 0xF7: - INST_NAME("FINCSTP"); - fpu_purgecache(dyn, ninst, x1, x2, x3); - LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); - ADDw_U12(x2, x2, 1); - ANDw_mask(x2, x2, 0, 2); //mask=7 - STRw_U12(x2, xEmu, offsetof(x64emu_t, top)); - break; - case 0xF8: - INST_NAME("FPREM"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fprem, -1); - break; - case 0xF9: - INST_NAME("FYL2XP1"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fyl2xp1, -1); - x87_do_pop(dyn, ninst); - break; - case 0xFB: - INST_NAME("FSINCOS"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_do_push_empty(dyn, ninst, 0); - x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fsincos, -1); - break; - case 0xFD: - INST_NAME("FSCALE"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - x87_forget(dyn, ninst, x1, x2, 1); - CALL(arm_fscale, -1); - break; - case 0xFE: - INST_NAME("FSIN"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_fsin, -1); - break; - case 0xFF: - INST_NAME("FCOS"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_forget(dyn, ninst, x1, x2, 0); - CALL(arm_fcos, -1); - break; - - - case 0xD1: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - case 0xE2: - case 0xE3: - case 0xE6: - case 0xE7: - case 0xEF: - DEFAULT; - break; - - default: - switch((nextop>>3)&7) { - case 0: - INST_NAME("FLD ST0, float[ED]"); - v1 = x87_do_push(dyn, ninst); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - FCVT_D_S(v1, s0); - break; - case 2: - INST_NAME("FST float[ED], ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - FCVT_S_D(s0, v1); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VSTR32_U12(s0, ed, fixedaddress); - break; - case 3: - INST_NAME("FSTP float[ED], ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - s0 = fpu_get_scratch(dyn); - FCVT_S_D(s0, v1); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VSTR32_U12(s0, ed, fixedaddress); - x87_do_pop(dyn, ninst); - break; - case 4: - INST_NAME("FLDENV Ed"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE? - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=x1) { - MOVx_REG(x1, ed); - } - MOV32w(x2, 0); - CALL(fpu_loadenv, -1); - break; - case 5: - INST_NAME("FLDCW Ew"); - GETEW(x1, 0); - STRH_U12(x1, xEmu, offsetof(x64emu_t, cw)); // hopefully cw is not too far for an imm8 - UBFXw(x1, x1, 10, 2); // extract round - STRw_U12(x1, xEmu, offsetof(x64emu_t, round)); - break; - case 6: - INST_NAME("FNSTENV Ed"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE? - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=x1) { - MOVx_REG(x1, ed); - } - MOV32w(x2, 0); - CALL(fpu_savenv, -1); - break; - case 7: - INST_NAME("FNSTCW Ew"); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - ed = x1; - wb1 = 1; - LDRH_U12(x1, xEmu, offsetof(x64emu_t, cw)); - EWBACK; - break; - default: - DEFAULT; - } - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_db.c b/src/dynarec/dynarec_arm64_db.c deleted file mode 100644 index 52204ffe..00000000 --- a/src/dynarec/dynarec_arm64_db.c +++ /dev/null @@ -1,307 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "emu/x87emu_private.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - - -uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t nextop = F8; - uint8_t ed; - uint8_t wback; - uint8_t u8; - int64_t fixedaddress; - int v1, v2; - int s0; - int64_t j64; - - MAYUSE(s0); - MAYUSE(v2); - MAYUSE(v1); - MAYUSE(j64); - - switch(nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FCMOVNB ST0, STx"); - READFLAGS(X_CF); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - TSTw_mask(xFlags, 0, 0); //mask=1<>3)&7) { - case 0: - INST_NAME("FILD ST0, Ed"); - v1 = x87_do_push(dyn, ninst); - s0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - SXTL_32(v1, s0); - SCVTFDD(v1, v1); - break; - case 1: - INST_NAME("FISTTP Ed, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - ed = x1; - } - s0 = fpu_get_scratch(dyn); - #if 0 - FRINT32ZD(s0, v1); - FCVTZSwD(ed, s0); - WBACK; - #else - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - FRINTZD(s0, v1); - VFCVTZSd(s0, s0); - SQXTN_S_D(s0, s0); - VSTR32_U12(s0, wback, fixedaddress); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV32w(x5, 0x80000000); - STRw_U12(x5, wback, fixedaddress); - MARK3; - #endif - x87_do_pop(dyn, ninst); - break; - case 2: - INST_NAME("FIST Ed, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - ed = x1; - } - s0 = fpu_get_scratch(dyn); - #if 0 - FRINT32XD(s0, v1); - FCVTZSwD(ed, s0); - WBACK; - #else - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - FRINTXD(s0, v1); - VFCVTZSd(s0, s0); - SQXTN_S_D(s0, s0); - VSTR32_U12(s0, wback, fixedaddress); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV32w(x5, 0x80000000); - STRw_U12(x5, wback, fixedaddress); - MARK3; - #endif - x87_restoreround(dyn, ninst, u8); - break; - case 3: - INST_NAME("FISTP Ed, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - ed = x1; - } - s0 = fpu_get_scratch(dyn); - #if 0 - FRINT32XD(s0, v1); - FCVTZSwD(ed, s0); - WBACK; - #else - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - FRINTXD(s0, v1); - VFCVTZSd(s0, s0); - SQXTN_S_D(s0, s0); - VSTR32_U12(s0, wback, fixedaddress); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV32w(x5, 0x80000000); - STRw_U12(x5, wback, fixedaddress); - MARK3; - #endif - x87_restoreround(dyn, ninst, u8); - x87_do_pop(dyn, ninst); - break; - case 5: - INST_NAME("FLD tbyte"); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(PK(0)==0xDB && ((PK(1)>>3)&7)==7) { - // the FLD is immediatly followed by an FSTP - LDRx_U12(x5, ed, 0); - LDRH_U12(x6, ed, 8); - // no persistant scratch register, so unrool both instruction here... - MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n"); - nextop = F8; //0xDB - nextop = F8; //modrm - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - STRx_U12(x5, ed, 0); - STRH_U12(x6, ed, 8); - } else { - if(ed!=x1) { - MOVx_REG(x1, ed); - } - x87_do_push_empty(dyn, ninst, x3); - CALL(arm_fld, -1); - } - break; - case 7: - INST_NAME("FSTP tbyte"); - x87_forget(dyn, ninst, x1, x3, 0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=x1) { - MOVx_REG(x1, ed); - } - CALL(arm_fstp, -1); - x87_do_pop(dyn, ninst); - break; - default: - DEFAULT; - } - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_dc.c b/src/dynarec/dynarec_arm64_dc.c deleted file mode 100644 index 0fa8ddb2..00000000 --- a/src/dynarec/dynarec_arm64_dc.c +++ /dev/null @@ -1,219 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "emu/x87emu_private.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - - -uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t nextop = F8; - uint8_t wback; - int64_t fixedaddress; - int v1, v2; - - MAYUSE(v2); - MAYUSE(v1); - - switch(nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FADD STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FADDD(v1, v1, v2); - break; - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - INST_NAME("FMUL STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FMULD(v1, v1, v2); - break; - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - INST_NAME("FCOM ST0, STx"); //yep - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FCMPD(v1, v2); - FCOM(x1, x2, x3); - break; - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - INST_NAME("FCOMP ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FCMPD(v1, v2); - FCOM(x1, x2, x3); - x87_do_pop(dyn, ninst); - break; - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - INST_NAME("FSUBR STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FSUBD(v1, v2, v1); - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - INST_NAME("FSUB STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FSUBD(v1, v1, v2); - break; - case 0xF0: - case 0xF1: - case 0xF2: - case 0xF3: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - INST_NAME("FDIVR STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FDIVD(v1, v2, v1); - break; - case 0xF8: - case 0xF9: - case 0xFA: - case 0xFB: - case 0xFC: - case 0xFD: - case 0xFE: - case 0xFF: - INST_NAME("FDIV STx, ST0"); - v2 = x87_get_st(dyn, ninst, x1, x2, 0); - v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FDIVD(v1, v1, v2); - break; - default: - switch((nextop>>3)&7) { - case 0: - INST_NAME("FADD ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); - VLDR64_U12(v2, wback, fixedaddress); - FADDD(v1, v1, v2); - break; - case 1: - INST_NAME("FMUL ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); - VLDR64_U12(v2, wback, fixedaddress); - FMULD(v1, v1, v2); - break; - case 2: - INST_NAME("FCOM ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); - VLDR64_U12(v2, wback, fixedaddress); - FCMPD(v1, v2); - FCOM(x1, x2, x3); - break; - case 3: - INST_NAME("FCOMP ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); - VLDR64_U12(v2, wback, fixedaddress); - FCMPD(v1, v2); - FCOM(x1, x2, x3); - x87_do_pop(dyn, ninst); - break; - case 4: - INST_NAME("FSUB ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); - VLDR64_U12(v2, wback, fixedaddress); - FSUBD(v1, v1, v2); - break; - case 5: - INST_NAME("FSUBR ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); - VLDR64_U12(v2, wback, fixedaddress); - FSUBD(v1, v2, v1); - break; - case 6: - INST_NAME("FDIV ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); - VLDR64_U12(v2, wback, fixedaddress); - FDIVD(v1, v1, v2); - break; - case 7: - INST_NAME("FDIVR ST0, double[ED]"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, 0, 0); - VLDR64_U12(v2, wback, fixedaddress); - FDIVD(v1, v2, v1); - break; - } - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_dd.c b/src/dynarec/dynarec_arm64_dd.c deleted file mode 100644 index 4068af44..00000000 --- a/src/dynarec/dynarec_arm64_dd.c +++ /dev/null @@ -1,205 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "emu/x87emu_private.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - - -uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t nextop = F8; - uint8_t ed; - int64_t fixedaddress; - int v1, v2; - int s0; - - MAYUSE(s0); - MAYUSE(v2); - MAYUSE(v1); - - switch(nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FFREE STx"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - x87_purgecache(dyn, ninst, x1, x2, x3); - MOV32w(x1, nextop-0xC0); - CALL(fpu_do_free, -1); - break; - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - INST_NAME("FST ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FMOVD(v2, v1); - break; - case 0xD8: - INST_NAME("FSTP ST0, ST0"); - x87_do_pop(dyn, ninst); - break; - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - INST_NAME("FSTP ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FMOVD(v2, v1); - x87_do_pop(dyn, ninst); - break; - - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - INST_NAME("FUCOM ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FCMPD(v1, v2); - FCOM(x1, x2, x3); - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - INST_NAME("FUCOMP ST0, STx"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FCMPD(v1, v2); - FCOM(x1, x2, x3); - x87_do_pop(dyn, ninst); - break; - - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - case 0xF0: - case 0xF1: - case 0xF2: - case 0xF3: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - case 0xF8: - case 0xF9: - case 0xFA: - case 0xFB: - case 0xFC: - case 0xFD: - case 0xFE: - case 0xFF: - DEFAULT; - break; - - default: - switch((nextop>>3)&7) { - case 0: - INST_NAME("FLD double"); - v1 = x87_do_push(dyn, ninst); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VLDR64_U12(v1, ed, fixedaddress); - break; - case 1: - INST_NAME("FISTTP i64, ST0"); - v1 = x87_do_push(dyn, ninst); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - s0 = fpu_get_scratch(dyn); - FRINT64ZD(s0, v1); - FCVTZSxD(x2, s0); - STRx_U12(x2, ed, fixedaddress); - x87_do_pop(dyn, ninst); - break; - case 2: - INST_NAME("FST double"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VSTR64_U12(v1, ed, fixedaddress); - break; - case 3: - INST_NAME("FSTP double"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VSTR64_U12(v1, ed, fixedaddress); - x87_do_pop(dyn, ninst); - break; - case 4: - INST_NAME("FRSTOR m108byte"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - fpu_purgecache(dyn, ninst, x1, x2, x3); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(arm_frstor, -1); - break; - case 6: - INST_NAME("FSAVE m108byte"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - fpu_purgecache(dyn, ninst, x1, x2, x3); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(arm_fsave, -1); - break; - case 7: - INST_NAME("FNSTSW m2byte"); - fpu_purgecache(dyn, ninst, x1, x2, x3); - addr = geted(dyn, addr, ninst, nextop, &ed, x4, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - LDRw_U12(x1, xEmu, offsetof(x64emu_t, top)); - LDRH_U12(x3, xEmu, offsetof(x64emu_t, sw)); - BFIw(x3, x1, 11, 3); // inject TOP at bit 11 (3 bits) - STRH_U12(x3, ed, fixedaddress); // store whole sw flags - break; - default: - DEFAULT; - } - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_df.c b/src/dynarec/dynarec_arm64_df.c deleted file mode 100644 index c1ace798..00000000 --- a/src/dynarec/dynarec_arm64_df.c +++ /dev/null @@ -1,295 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "emu/x87emu_private.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - - -uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t nextop = F8; - uint8_t ed, wback, u8; - int v1, v2; - int s0; - int64_t j64; - int64_t fixedaddress; - - MAYUSE(s0); - MAYUSE(v2); - MAYUSE(v1); - MAYUSE(j64); - - switch(nextop) { - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - INST_NAME("FFREEP STx"); - // not handling Tag... - x87_do_pop(dyn, ninst); - break; - - case 0xE0: - INST_NAME("FNSTSW AX"); - LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); - LDRH_U12(x1, xEmu, offsetof(x64emu_t, sw)); - BFIw(x1, x2, 11, 3); // inject top - BFIw(xRAX, x1, 0, 16); - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - INST_NAME("FUCOMIP ST0, STx"); - SETFLAGS(X_ALL, SF_SET); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FCMPD(v1, v2); - FCOMI(x1, x2); - x87_do_pop(dyn, ninst); - break; - case 0xF0: - case 0xF1: - case 0xF2: - case 0xF3: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - INST_NAME("FCOMIP ST0, STx"); - SETFLAGS(X_ALL, SF_SET); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); - FCMPD(v1, v2); - FCOMI(x1, x2); - x87_do_pop(dyn, ninst); - break; - - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - case 0xF8: - case 0xF9: - case 0xFA: - case 0xFB: - case 0xFC: - case 0xFD: - case 0xFE: - case 0xFF: - DEFAULT; - break; - - default: - switch((nextop>>3)&7) { - case 0: - INST_NAME("FILD ST0, Ew"); - v1 = x87_do_push(dyn, ninst); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - LDRSHw_U12(x1, wback, fixedaddress); - SCVTFDw(v1, x1); - break; - case 1: - INST_NAME("FISTTP Ew, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - ed = x1; - s0 = fpu_get_scratch(dyn); - #if 0 - // this version needs ARM v8.5, //TODO: add detection of this extensio to use it - FRINT32ZD(s0, v1); - // no saturation instruction on Arm, so using NEON - VFCVTZSd(s0, s0); - SQXTN_S_D(s0, s0); - SQXTN_H_S(s0, s0); - VSTR16_U12(s0, wback, fixedaddress); - #else - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - VFCVTZSd(s0, v1); - SQXTN_S_D(s0, s0); - SQXTN_H_S(s0, s0); - VSTR16_U12(s0, wback, fixedaddress); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV32w(x5, 0x8000); - STRH_U12(x5, wback, fixedaddress); - MARK3; - #endif - x87_do_pop(dyn, ninst); - break; - case 2: - INST_NAME("FIST Ew, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - u8 = x87_setround(dyn, ninst, x1, x2, x4); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - ed = x1; - s0 = fpu_get_scratch(dyn); - #if 0 - FRINT32XD(s0, v1); - // no saturation instruction on Arm, so using NEON - VFCVTZSd(s0, s0); - SQXTN_S_D(s0, s0); - SQXTN_H_S(s0, s0); - VSTR16_U12(s0, wback, fixedaddress); - #else - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - FRINTXD(s0, v1); - VFCVTZSd(s0, s0); - SQXTN_S_D(s0, s0); - SQXTN_H_S(s0, s0); - VSTR16_U12(s0, wback, fixedaddress); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV32w(x5, 0x8000); - STRH_U12(x5, wback, fixedaddress); - MARK3; - #endif - x87_restoreround(dyn, ninst, u8); - break; - case 3: - INST_NAME("FISTP Ew, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - u8 = x87_setround(dyn, ninst, x1, x2, x4); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); - ed = x1; - s0 = fpu_get_scratch(dyn); - #if 0 - FRINT32XD(s0, v1); - // no saturation instruction on Arm, so using NEON - VFCVTZSd(s0, s0); - SQXTN_S_D(s0, s0); - SQXTN_H_S(s0, s0); - VSTR16_U12(s0, wback, fixedaddress); - #else - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - FRINTXD(s0, v1); - VFCVTZSd(s0, s0); - SQXTN_S_D(s0, s0); - SQXTN_H_S(s0, s0); - VSTR16_U12(s0, wback, fixedaddress); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV32w(x5, 0x8000); - STRH_U12(x5, wback, fixedaddress); - MARK3; - #endif - x87_do_pop(dyn, ninst); - x87_restoreround(dyn, ninst, u8); - break; - case 4: - INST_NAME("FBLD ST0, tbytes"); - x87_do_push_empty(dyn, ninst, x1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(fpu_fbld, -1); - break; - case 5: - INST_NAME("FILD ST0, i64"); - v1 = x87_do_push(dyn, ninst); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - LDRx_U12(x1, wback, fixedaddress); - SCVTFDx(v1, x1); - break; - case 6: - INST_NAME("FBSTP tbytes, ST0"); - x87_forget(dyn, ninst, x1, x2, 0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); - if(ed!=x1) {MOVx_REG(x1, ed);} - CALL(fpu_fbst, -1); - x87_do_pop(dyn, ninst); - break; - case 7: - INST_NAME("FISTP i64, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0); - u8 = x87_setround(dyn, ninst, x1, x2, x4); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - ed = x1; - s0 = fpu_get_scratch(dyn); - #if 0 - FRINT64XD(s0, v1); - VFCVTZSd(s0, s0); - VSTR64_U12(s0, wback, fixedaddress); - #else - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - FRINTXD(s0, v1); - VFCVTZSd(s0, s0); - VSTR64_U12(s0, wback, fixedaddress); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV64x(x5, 0x8000000000000000LL); - STRx_U12(x5, wback, fixedaddress); - MARK3; - #endif - x87_restoreround(dyn, ninst, u8); - x87_do_pop(dyn, ninst); - break; - default: - DEFAULT; - } - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_emit_logic.c b/src/dynarec/dynarec_arm64_emit_logic.c deleted file mode 100755 index 5255f47c..00000000 --- a/src/dynarec/dynarec_arm64_emit_logic.c +++ /dev/null @@ -1,679 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "../tools/bridge_private.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -// emit OR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_or64:d_or32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - ORRxw_REG(s1, s1, s2); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_CF | X_AF | X_OF) { - MOV32w(s3, (1<=0 && c<256) { -// IFX(X_ALL) { -// ORRS_IMM8(s1, s1, c, 0); -// } else { -// ORR_IMM8(s1, s1, c, 0); -// } -// } else { -// IFX(X_PEND) {} else {MOVW(s3, c);} -// IFX(X_ALL) { -// ORRS_REG_LSL_IMM5(s1, s1, s3, 0); -// } else { -// ORR_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_CF | X_AF | X_ZF) { -// BIC_IMM8(xFlags, xFlags, (1<=0 && c<256) { -// IFX(X_ALL) { -// XORS_IMM8(s1, s1, c); -// } else { -// XOR_IMM8(s1, s1, c); -// } -// } else { -// IFX(X_PEND) {} else {MOVW(s3, c);} -// IFX(X_ALL) { -// XORS_REG_LSL_IMM5(s1, s1, s3, 0); -// } else { -// XOR_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_CF | X_AF | X_ZF) { -// BIC_IMM8(xFlags, xFlags, (1<=0 && c<256) { -// IFX(X_ALL) { -// ANDS_IMM8(s1, s1, c); -// } else { -// AND_IMM8(s1, s1, c); -// } -// } else { -// IFX(X_PEND) {} else {MOVW(s3, c);} -// IFX(X_ALL) { -// ANDS_REG_LSL_IMM5(s1, s1, s3, 0); -// } else { -// AND_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_CF | X_AF | X_ZF) { -// BIC_IMM8(xFlags, xFlags, (1< -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "../tools/bridge_private.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -// emit ADD32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_add32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, rex.w?d_add64:d_add32b); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_AF) { - ORRxw_REG(s3, s1, s2); // s3 = op1 | op2 - ANDxw_REG(s4, s1, s2); // s4 = op1 & op2 - } - IFX(X_ALL) { - ADDSxw_REG(s1, s1, s2); - } else { - ADDxw_REG(s1, s1, s2); - } - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF) { - BICxw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRxw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) - LSRxw(s4, s3, 3); - BFIxw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_CF) { - CSETw(s4, cCS); - BFIw(xFlags, s4, F_CF, 1); - } - IFX(X_OF) { - CSETw(s4, cVS); - BFIw(xFlags, s4, F_OF, 1); - } - IFX(X_SF) { - LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch -void emit_add32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5) -{ - MAYUSE(s5); - if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.need_flags==X_PEND)) - { - // special case when doing math on ESP and only PEND is needed: ignoring it! - if(c>=0 && c<0x1000) { - ADDx_U12(s1, s1, c); - } else { - MOV64x(s3, c); - ADDx_REG(s1, s1, s3); - } - return; - } - IFX(X_PEND) { - MOV64xw(s5, c); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s5, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_add64:d_add32b); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - IFX(X_AF) { - IFX(X_PEND) {} else {MOV64xw(s5, c);} - ORRxw_REG(s3, s1, s5); // s3 = op1 | op2 - ANDxw_REG(s4, s1, s5); // s4 = op1 & op2 - } - if(c>=0 && c<0x1000) { - IFX(X_ALL) { - ADDSxw_U12(s1, s1, c); - } else { - ADDxw_U12(s1, s1, c); - } - } else { - IFX(X_PEND|X_AF) {} else {MOV64xw(s5, c);} - IFX(X_ALL) { - ADDSxw_REG(s1, s1, s5); - } else { - ADDxw_REG(s1, s1, s5); - } - } - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF) { - BICxw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRxw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) - LSRxw(s4, s3, 3); - BFIxw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_CF) { - CSETw(s4, cCS); - BFIw(xFlags, s4, F_CF, 1); - } - IFX(X_OF) { - CSETw(s4, cVS); - BFIw(xFlags, s4, F_OF, 1); - } - IFX(X_SF) { - LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, rex.w?d_sub64:d_sub32); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_AF) { - MVNxw_REG(s3, s1); - ORRxw_REG(s3, s3, s2); // s3 = ~op1 | op2 - BICxw(s4, s2, s1); // s4 = ~op1 & op2 - } - IFX(X_ALL) { - SUBSxw_REG(s1, s1, s2); - } else { - SUBxw_REG(s1, s1, s2); - } - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF) { - ANDxw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRxw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - LSRxw(s4, s3, 3); - BFIx(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_CF) { - // inverted carry - CSETw(s4, cCC); - BFIw(xFlags, s4, F_CF, 1); - } - IFX(X_OF) { - CSETw(s4, cVS); - BFIw(xFlags, s4, F_OF, 1); - } - IFX(X_SF) { - LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SUB32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch -void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5) -{ - MAYUSE(s5); - if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.need_flags==X_PEND)) - { - // special case when doing math on RSP and only PEND is needed: ignoring it! - if(c>=0 && c<0x1000) { - SUBxw_U12(s1, s1, c); - } else { - MOV64xw(s5, c); - SUBxw_REG(s1, s1, s5); - } - return; - } - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - MOV64xw(s5, c); - STRxw_U12(s5, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_sub64:d_sub32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - IFX(X_AF) { - IFX(X_PEND) {} else {MOV64xw(s5, c);} - ORNxw_REG(s3, s5, s1); // s3 = ~op1 | op2 - BICxw_REG(s4, s5, s1); // s4 = ~op1 & op2 - } - if(c>=0 && c<0x1000) { - IFX(X_ALL) { - SUBSxw_U12(s1, s1, c); - } else { - SUBxw_U12(s1, s1, c); - } - } else { - IFX(X_PEND|X_AF) {} else {MOV64xw(s5, c);} - IFX(X_ALL) { - SUBSxw_REG(s1, s1, s5); - } else { - SUBxw_REG(s1, s1, s5); - } - } - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF) { - ANDxw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRxw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - LSRxw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_CF) { - // inverted carry - CSETw(s4, cCC); - BFIw(xFlags, s4, F_CF, 1); - } - IFX(X_OF) { - CSETw(s4, cVS); - BFIw(xFlags, s4, F_OF, 1); - } - IFX(X_SF) { - LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADD8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRB_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_add8); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_AF | X_OF) { - ORRw_REG(s3, s1, s2); // s3 = op1 | op2 - ANDw_REG(s4, s1, s2); // s4 = op1 & op2 - } - ADDw_REG(s1, s1, s2); - IFX(X_AF|X_OF) { - BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_CF) { - LSRw(s3, s1, 8); - BFIw(xFlags, s3, F_CF, 1); - } - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 7); //mask=0xff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADD8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch -void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) -{ - IFX(X_PEND) { - MOV32w(s4, c&0xff); - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRB_U12(s4, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_add8); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_AF | X_OF) { - if(X_PEND) {} else {MOV32w(s4, c&0xff);} - ORRw_REG(s3, s1, s4); // s3 = op1 | op2 - ANDw_REG(s4, s1, s4); // s4 = op1 & op2 - } - ADDw_U12(s1, s1, c); - - IFX(X_AF|X_OF) { - BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRw_REG(s3, s3, s4); // s4 = (op1 & op2) | ((op1 | op2) & ~ res) - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_CF) { - LSRw(s3, s1, 8); - BFIw(xFlags, s3, F_CF, 1); - } - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRB_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_sub8); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_AF|X_OF|X_CF) { - MVNw_REG(s3, s1); - ORRw_REG(s3, s3, s2); // s3 = ~op1 | op2 - BICw_REG(s4, s2, s1); // s4 = ~op1 & op2 - } - - SUBw_REG(s1, s1, s2); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF|X_CF) { - ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - IFX(X_CF) { - LSRw(s4, s3, 7); - BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 - } - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 7); //mask=0xff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SUB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch -void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5) -{ - MAYUSE(s5); - IFX(X_ALL|X_PEND) { - MOV32w(s5, c&0xff); - } - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRB_U12(s3, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_sub8); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_AF|X_OF|X_CF) { - MVNw_REG(s3, s1); - ORRw_REG(s3, s3, s5); // s3 = ~op1 | op2 - BICw_REG(s4, s5, s1); // s4 = ~op1 & op2 - } - IFX(X_ALL) { - SUBw_REG(s1, s1, s5); - } else { - SUBw_U12(s1, s1, c&0xff); - } - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF|X_CF) { - ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - IFX(X_CF) { - LSRw(s4, s3, 7); - BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 - } - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADD16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRH_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_add16); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_AF | X_OF) { - ORRw_REG(s3, s1, s2); // s3 = op1 | op2 - ANDw_REG(s4, s1, s2); // s4 = op1 & op2 - } - ADDw_REG(s1, s1, s2); - - IFX(X_AF|X_OF) { - BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 14); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 - } - } - IFX(X_CF) { - LSRw(s3, s1, 16); - BFIw(xFlags, s3, F_CF, 1); - } - IFX(X_PEND) { - STRw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 15); //mask=0xffff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 15); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADD16 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch -//void emit_add16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOVW(s3, c); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s4, d_add16); -// } else IFX(X_ALL) { -// SET_DFNONE(s4); -// } -// IFX(X_AF | X_OF) { -// MOV_REG(s4, s1); -// } -// if(c>=0 && c<256) { -// ADD_IMM8(s1, s1, c); -// } else { -// IFX(X_PEND) {} else {MOVW(s3, c);} -// ADD_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// -// IFX(X_AF|X_OF) { -// if(c>=0 && c<256) { -// ORR_IMM8(s3, s4, c, 0); // s3 = op1 | op2 -// AND_IMM8(s4, s4, c); // s4 = op1 & op2 -// } else { -// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = op1 | op2 -// PUSH(xSP, 1<> 14) ^ ((bc>>14)>>1)) & 1 -// } -// } -// IFX(X_CF) { -// MOV_REG_LSR_IMM5(s3, s1, 16); -// BFI(xFlags, s3, F_CF, 1); -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_ZF) { -// UXTH(s1, s1, 0); -// TSTS_REG_LSL_IMM5(s1, s1, 0); -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 14) ^ ((bc>>14)>>1)) & 1 - } - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 15); //mask=0xffff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 15); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SUB16 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch -//void emit_sub16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOVW(s3, c); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s4, d_sub16); -// } else IFX(X_ALL) { -// SET_DFNONE(s4); -// } -// IFX(X_AF|X_OF|X_CF) { -// MVN_REG_LSL_IMM5(s4, s1, 0); -// } -// if(c>=0 && c<255) { -// SUB_IMM8(s1, s1, c); -// } else { -// IFX(X_PEND) {} else {MOVW(s3, c);} -// SUB_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_AF|X_OF|X_CF) { -// if(c>=0 && c<256) { -// ORR_IMM8(s3, s4, c, 0); // s3 = ~op1 | op2 -// AND_IMM8(s4, s4, c); // s4 = ~op1 & op2 -// } else { -// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = ~op1 | op2 -// PUSH(xSP, 1<> 14) ^ ((bc>>14)>>1)) & 1 -// } -// } -// IFX(X_ZF) { -// UXTH(s1, s1, 0); -// TSTS_REG_LSL_IMM5(s1, s1, 0); -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 6) ^ ((bc>>6)>>1)) & 1 - } - } - - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 7); //mask=0xff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch -void emit_inc16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) -{ - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); - SET_DF(s3, d_inc16); - } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) { - SET_DFNONE(s3); - } - IFX(X_AF | X_OF) { - MOVw_REG(s4, s1); - } - ADDw_U12(s1, s1, 1); - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF) { - ORRw_mask(s3, s4, 0, 0); // s3 = op1 | op2 - ANDw_mask(s4, s4, 0, 0); // s4 = op1 & op2 - BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 14); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 - } - } - IFX(X_ZF) { - TSTw_mask(s1, 0, 0b001111); // mask=0xffff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 15); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit DEC32 instruction, from s1, store result in s1 using s3 and s4 as scratch -void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4) -{ - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - SET_DF(s4, rex.w?d_dec64:d_dec32); - } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) { - SET_DFNONE(s4); - } - IFX(X_AF) { - MVNxw_REG(s3, s1); - if(rex.w) { - ANDx_mask(s4, s3, 1, 0, 0); // s4 = ~op1 & op2 - ORRx_mask(s3, s3, 1, 0, 0); // s3 = ~op1 | op2 - } else { - ANDw_mask(s4, s3, 0, 0); // s4 = ~op1 & op2 - ORRw_mask(s3, s3, 0, 0); // s3 = ~op1 | op2 - } - } - IFX(X_ZF|X_OF) { - SUBSxw_U12(s1, s1, 1); - } else { - SUBxw_U12(s1, s1, 1); - } - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF) { - ANDxw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRxw_REG(s3, s3, s4); // s4 = (~op1 & op2) | ((~op1 | op2) & ~ res) - LSRxw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_OF) { - CSETw(s4, cVS); - BFIw(xFlags, s4, F_OF, 1); - } - IFX(X_SF) { - LSRxw(s3, s1, rex.w?63:31); - BFIxw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit DEC8 instruction, from s1, store result in s1 using s3 and s4 as scratch -void emit_dec8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) -{ - IFX(X_PEND) { - STRB_U12(s3, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_dec8); - } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) { - SET_DFNONE(s3); - } - IFX(X_AF|X_OF) { - MVNw_REG(s3, s1); - ANDw_mask(s4, s3, 0, 0); // s4 = ~op1 & op2 - ORRw_mask(s3, s3, 0, 0); // s3 = ~op1 | op2 - } - SUBSw_U12(s1, s1, 1); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF) { - ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_ZF) { - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit DEC16 instruction, from s1, store result in s1 using s3 and s4 as scratch -void emit_dec16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) -{ - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); - SET_DF(s3, d_dec16); - } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) { - SET_DFNONE(s3); - } - IFX(X_AF|X_OF) { - MVNw_REG(s4, s1); - } - SUBSw_U12(s1, s1, 1); - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF) { - ORRw_mask(s3, s4, 0, 0); // s3 = ~op1 | op2 - ANDw_mask(s4, s4, 0, 0); // s4 = ~op1 & op2 - ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 14); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 - } - } - IFX(X_ZF) { - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 15); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, rex.w?d_adc64:d_adc32b); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_AF) { - MOVxw_REG(s4, s1); - } - MRS_nzvc(s3); - BFIx(s3, xFlags, 29, 1); // set C - MSR_nzvc(s3); // load CC into ARM CF - IFX(X_ZF|X_CF|X_OF) { - ADCSxw_REG(s1, s1, s2); - } else { - ADCxw_REG(s1, s1, s2); - } - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF) { - ORRxw_REG(s3, s4, s2); // s3 = op1 | op2 - ANDxw_REG(s4, s4, s2); // s4 = op1 & op2 - BICxw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRxw_REG(s3, s3, s4); // s4 = (op1 & op2) | ((op1 | op2) & ~ res) - LSRxw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_ZF) { - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_CF) { - CSETw(s3, cCS); - BFIw(xFlags, s3, F_CF, 1); - } - IFX(X_OF) { - CSETw(s3, cVS); - BFIw(xFlags, s3, F_OF, 1); - } - IFX(X_SF) { - LSRx(s3, s1, rex.w?63:31); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADC32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch -//void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOV32(s3, c); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s4, d_adc32); -// } else IFX(X_ALL) { -// SET_DFNONE(s4); -// } -// IFX(X_AF) { -// MOV_REG(s4, s1); -// } -// MOVS_REG_LSR_IMM5(s3, xFlags, 1); // load CC into ARM CF -// if(c>=0 && c<256) { -// IFX(X_ZF|X_CF|X_OF) { -// ADCS_IMM8(s1, s1, c); -// } else { -// ADC_IMM8(s1, s1, c); -// } -// } else { -// MOV32(s3, c); -// IFX(X_ZF|X_CF|X_OF) { -// ADCS_REG_LSL_IMM5(s1, s1, s3, 0); -// } else { -// ADC_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_AF) { -// if(c>=0 && c<256) { -// ORR_IMM8(s3, s4, c, 0); // s3 = op1 | op2 -// AND_IMM8(s4, s4, c); // s4 = op1 & op2 -// } else { -// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = op1 | op2 -// PUSH(xSP, 1<> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_CF) { - LSRw(s3, s1, 8); - BFIw(xFlags, s3, F_CF, 1); - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 7); //mask=0xff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADC8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch -void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5) -{ - MAYUSE(s5); - MOV32w(s5, c&0xff); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRB_U12(s5, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, d_adc8); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - IFX(X_AF | X_OF) { - MOVw_REG(s4, s1); - } - MRS_nzvc(s3); - BFIx(s3, xFlags, 29, 1); // set C - MSR_nzvc(s3); // load CC into ARM CF - ADCw_REG(s1, s1, s5); - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF) { - ORRw_REG(s3, s4, s5); // s3 = op1 | op2 - ANDw_REG(s4, s4, s5); // s4 = op1 & op2 - BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRw_REG(s3, s3, s4); // s4 = (op1 & op2) | ((op1 | op2) & ~ res) - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_CF) { - LSRw(s3, s1, 8); - BFIw(xFlags, s3, F_CF, 1); - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADC16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRH_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_adc16); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_AF | X_OF) { - MOVw_REG(s4, s1); - } - MRS_nzvc(s3); - BFIx(s3, xFlags, 29, 1); // set C - MSR_nzvc(s3); // load CC into ARM CF - ADCw_REG(s1, s1, s2); - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF) { - ORRw_REG(s3, s4, s2); // s3 = op1 | op2 - ANDw_REG(s4, s4, s2); // s4 = op1 & op2 - BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRw_REG(s3, s3, s4); // s3 = (op1 & op2) | ((op1 | op2) & ~ res) - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 14); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 - } - } - IFX(X_CF) { - LSRw(s3, s1, 16); - BFIw(xFlags, s3, F_CF, 1); - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 15); //mask=0xffff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 15); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ADC16 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch -//void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOVW(s3, c); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s3, d_adc16); -// } else IFX(X_ALL) { -// SET_DFNONE(s3); -// } -// IFX(X_AF | X_OF) { -// MOV_REG(s4, s1); -// } -// MOVS_REG_LSR_IMM5(s3, xFlags, 1); // load CC into ARM CF -// if(c>=0 && c<256) { -// ADC_IMM8(s1, s1, c); -// } else { -// MOVW(s3, c); -// ADC_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_AF|X_OF) { -// if(c>=0 && c<256) { -// ORR_IMM8(s3, s4, c, 0); // s3 = op1 | op2 -// AND_IMM8(s4, s4, c); // s4 = op1 & op2 -// } else { -// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = op1 | op2 -// PUSH(xSP, 1<> 14) ^ ((bc>>14)>>1)) & 1 -// } -// } -// IFX(X_CF) { -// MOV_REG_LSR_IMM5(s3, s1, 16); -// BFI(xFlags, s3, F_CF, 1); -// } -// IFX(X_ZF) { -// UXTH(s1, s1, 0); -// TSTS_REG_LSL_IMM5(s1, s1, 0); -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<=0 && c<256) { -// IFX(X_ZF|X_CF|X_OF) { -// SBCS_IMM8(s1, s1, c); -// } else { -// SBC_IMM8(s1, s1, c); -// } -// } else { -// MOV32(s3, c); -// IFX(X_ZF|X_CF|X_OF) { -// SBCS_REG_LSL_IMM5(s1, s1, s3, 0); -// } else { -// SBC_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_AF) { -// if(c>=0 && c<256) { -// AND_IMM8(s4, s3, c); // s4 = ~op1 & op2 -// ORR_IMM8(s3, s3, c, 0); // s3 = ~op1 | op2 -// } else { -// ORR_REG_LSL_IMM5(s3, s4, s3, 0); -// PUSH(xSP, 1<> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 7); //mask=0xff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SBB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch -void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5) -{ - MAYUSE(s5); - MOV32w(s5, c&0xff); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRB_U12(s5, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_sbb8); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM - MRS_nzvc(s3); - BFIx(s3, s4, 29, 1); // set C, bit 29 - MSR_nzvc(s3); // load CC into ARM CF - IFX(X_AF|X_OF|X_CF) { - MVNw_REG(s4, s1); - } - SBCw_REG(s1, s1, s5); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF|X_CF) { - ORRw_REG(s3, s4, s5); // s3 = ~op1 | op2 - ANDw_REG(s4, s4, s5); // s4 = ~op1 & op2 - ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - IFX(X_CF) { - LSRw(s4, s3, 7); - BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 - } - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SBB16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRH_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_sbb16); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM - MRS_nzvc(s3); - BFIx(s3, s4, 29, 1); // set C, bit 29 - MSR_nzvc(s3); // load CC into ARM CF - IFX(X_AF|X_OF|X_CF) { - MVNw_REG(s4, s1); - } - SBCw_REG(s1, s1, s2); - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF|X_CF) { - ORRw_REG(s3, s4, s2); // s3 = ~op1 | op2 - ANDw_REG(s4, s2, s4); // s4 = ~op1 & op2 - ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - IFX(X_CF) { - LSRw(s4, s3, 15); - BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x8000 - } - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 14); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1 - } - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 15); //mask=0xffff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 15); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SBB16 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch -//void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOVW(s3, c); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s3, d_sbb16); -// } else IFX(X_ALL) { -// SET_DFNONE(s3); -// } -// IFX(X_AF|X_OF|X_CF) { -// MVN_REG_LSL_IMM5(s4, s1, 0); -// } -// XOR_IMM8(s3, xFlags, 1); // invert CC because it's reverted for SUB on ARM -// MOVS_REG_LSR_IMM5(s3, s3, 1); // load into ARM CF -// if(c>=0 && c<255) { -// SBC_IMM8(s1, s1, c); -// } else { -// MOVW(s3, c); -// SBC_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_AF|X_OF|X_CF) { -// if(c>=0 && c<256) { -// ORR_IMM8(s3, s4, c, 0); // s3 = ~op1 | op2 -// AND_IMM8(s4, s4, c); // s4 = ~op1 & op2 -// } else { -// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = ~op1 | op2 -// PUSH(xSP, 1<> 14) ^ ((bc>>14)>>1)) & 1 -// } -// } -// IFX(X_ZF) { -// UXTH(s1, s1, 0); -// TSTS_REG_LSL_IMM5(s1, s1, 0); -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 14) ^ ((bc>>14)>>1)) & 1 - } - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 15); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit NEG8 instruction, from s1, store result in s1 using s3 and s4 as scratch -void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) -{ - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - SET_DF(s3, d_neg8); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - IFX(X_CF) { - TSTw_REG(s1, s1); - CSETw(s4, cNE); - BFIw(xFlags, s4, F_CF, 1); - } - IFX(X_AF|X_OF) { - MOVw_REG(s3, s1); - } - NEGSw_REG(s1, s1); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF) { - ORRw_REG(s3, s3, s1); // bc = op1 | res - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORx_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} diff --git a/src/dynarec/dynarec_arm64_emit_shift.c b/src/dynarec/dynarec_arm64_emit_shift.c deleted file mode 100755 index 4382794d..00000000 --- a/src/dynarec/dynarec_arm64_emit_shift.c +++ /dev/null @@ -1,449 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "../tools/bridge_private.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -// emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3 and s4 as scratch. s3 can be same as s2 -void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - int64_t j64; - MAYUSE(j64); - - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_shl64:d_shl32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - IFX(F_OF) { - CMPSxw_U12(s2, 0); - IFX(F_OF) { - Bcond(cNE, +8); - BFCx(xFlags, F_OF, 1); - } - IFX(X_PEND) { - Bcond(cNE, +8); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - B_NEXT(cEQ); - } - IFX(X_CF | X_OF) { - MOV32w(s4, rex.w?64:32); - SUBxw_REG(s4, s4, s2); - LSRxw_REG(s4, s1, s4); - BFIw(xFlags, s4, F_CF, 1); - } - LSLxw_REG(s1, s1, s2); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); - } - IFX(X_OF) { - CMPSxw_U12(s2, 1); // if s2==1 - IFX(X_SF) {} else {LSRxw(s4, s1, (rex.w)?63:31);} - EORxw_REG(s4, s4, xFlags); // CF is set if OF is asked - CSELw(s4, s4, wZR, cEQ); - BFIw(xFlags, s4, F_OF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SHL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) -{ - IFX(X_PEND) { - MOV32w(s3, c); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_shl64:d_shl32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - if(c==0) { - IFX(F_OF) { - BFCx(xFlags, F_OF, 1); - } - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - return; - } - IFX(X_CF|X_OF) { - LSRxw(s3, s1, (rex.w?64:32)-c); - BFIxw(xFlags, s3, F_CF, 1); - } - LSLxw(s1, s1, c); - - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); - } - IFX(X_OF) { - if(c==1) { - IFX(X_SF) {} else {LSRxw(s4, s1, (rex.w)?63:31);} - EORxw_REG(s4, s4, xFlags); // CF is set if OF is asked - BFIw(xFlags, s4, F_OF, 1); - } else { - BFCw(xFlags, F_OF, 1); - } - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SHR32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, s2 can be same as s3 -void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) -{ - MAYUSE(s2); - int64_t j64; - MAYUSE(j64); - - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_shr64:d_shr32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - IFX(X_ALL) { - CMPSxw_U12(s2, 0); //if(!c) - IFX(X_PEND) { - Bcond(cNE, +12); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - B_NEXT(cEQ); - } - IFX(X_CF) { - SUBxw_U12(s3, s2, 1); - LSRxw_REG(s3, s1, s3); - BFIw(xFlags, s3, 0, 1); - } - LSRxw_REG(s1, s1, s2); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); - } - IFX(X_OF) { - CMPSxw_U12(s2, 1); // if s2==1 - Bcond(cNE, 4+3*4); - if(rex.w) { - LSRx(s4, s1, 62); - } else { - LSRw(s4, s1, 30); - } - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) -{ - IFX(X_PEND) { - MOV32w(s3, c); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_shr64:d_shr32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - if(!c) { - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - return; - } - IFX(X_CF) { - if(c>1) { - LSRxw(s3, s1, c-1); - } - BFIw(xFlags, (c>1)?s3:s1, 0, 1); - } - LSRxw(s1, s1, c); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); - } - IFX(X_OF) { - if(c==1) { - LSRxw(s4, s1, rex.w?62:30); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); - } - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) -{ - IFX(X_PEND) { - MOV32w(s3, c); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_sar64:d_sar32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - if(!c) { - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - return; - } - IFX(X_CF) { - if(c>1) { - ASRxw(s3, s1, c-1); - } - BFIw(xFlags, (c>1)?s3:s1, 0, 1); - } - ASRxw(s1, s1, c); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - TSTw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -// emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) -{ - MAYUSE(rex); MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); - IFX(X_PEND) { - MOV32w(s3, c); - STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, d_rol32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - if(!c) { - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - return; - } - RORxw(s1, s1, (rex.w?64:32)-c); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_CF) { - BFIw(xFlags, s1, F_CF, 1); - } - IFX(X_OF) { - if(c==1) { - ADDxw_REG_LSR(s3, s1, s1, rex.w?63:31); - BFIw(xFlags, s3, F_OF, 1); - } - } -} - -// emit ROR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) -{ - MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); - IFX(X_PEND) { - MOV32w(s3, c); - STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_ror64:d_ror32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - if(!c) { - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - return; - } - RORxw(s1, s1, c); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_CF) { - LSRxw(s3, s1, rex.w?63:31); - BFIw(xFlags, s3, F_CF, 1); - } - IFX(X_OF) { - if(c==1) { - LSRxw(s3, s1, rex.w?62:30); - EORxw_REG_LSR(s3, s3, s3, 1); - BFIw(xFlags, s4, F_OF, 1); - } - } -} - -// emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch -void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4) -{ - c&=(rex.w?0x3f:0x1f); - IFX(X_PEND) { - MOV32w(s3, c); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); - // same flags computation as with shl64/shl32 - SET_DF(s4, rex.w?d_shl64:d_shl32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - if(!c) { - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - return; - } - IFX(X_CF) { - if(c>1) { - LSRxw(s3, s1, c-1); - } - BFIw(xFlags, (c>1)?s3:s1, 0, 1); - } - LSRxw(s3, s1, c); - ORRxw_REG_LSL(s1, s3, s2, (rex.w?64:32)-c); - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); - } - IFX(X_OF) { - if(c==1) { - LSRxw(s4, s1, rex.w?62:30); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); - } - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} - -void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4) -{ - c&=(rex.w?0x3f:0x1f); - IFX(X_PEND) { - MOV32w(s3, c); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); - // same flags computation as with shl64/shl32 - SET_DF(s4, rex.w?d_shl64:d_shl32); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - if(c==0) { - IFX(F_OF) { - BFCx(xFlags, F_OF, 1); - } - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - return; - } - IFX(X_CF|X_OF) { - LSRxw(s3, s1, (rex.w?64:32)-c); - BFIxw(xFlags, s3, F_CF, 1); - } - LSLxw(s3, s1, c); - ORRxw_REG_LSR(s1, s3, s2, (rex.w?64:32)-c); - - IFX(X_PEND) { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); - } - IFX(X_OF) { - if(c==1) { - UBFXxw(s3, s1, rex.w?63:31, 1); - EORxw_REG(s3, s3, xFlags); // CF is set if OF is asked - BFIw(xFlags, s3, F_OF, 1); - } else { - BFCw(xFlags, F_OF, 1); - } - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } -} diff --git a/src/dynarec/dynarec_arm64_emit_tests.c b/src/dynarec/dynarec_arm64_emit_tests.c deleted file mode 100755 index 14fb366e..00000000 --- a/src/dynarec/dynarec_arm64_emit_tests.c +++ /dev/null @@ -1,374 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "../tools/bridge_private.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -// emit CMP32 instruction, from cmp s1, s2, using s3 and s4 as scratch -void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) -{ - MAYUSE(s1); MAYUSE(s2); - IFX_PENDOR0 { - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_cmp64:d_cmp32); - } else { - SET_DFNONE(s4); - } - IFX(X_AF) { - ORNxw_REG(s3, s2, s1); // s3 = ~op1 | op2 - BICxw(s4, s2, s1); // s4 = ~op1 & op2 - } - SUBSxw_REG(s5, s1, s2); // res = s1 - s2 - IFX_PENDOR0 { - STRxw_U12(s5, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF) { - ANDxw_REG(s3, s3, s5); // s3 = (~op1 | op2) & res - ORRxw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - LSRxw(s4, s3, 3); - BFIx(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_CF) { - // inverted carry - CSETw(s4, cCC); - BFIw(xFlags, s4, F_CF, 1); - } - IFX(X_OF) { - CSETw(s4, cVS); - BFIw(xFlags, s4, F_OF, 1); - } - IFX(X_SF) { - LSRxw(s3, s5, (rex.w)?63:31); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s5, s3, s4); - } -} - -// emit CMP32 instruction, from cmp s1 , 0, using s3 and s4 as scratch -void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4) -{ - IFX_PENDOR0 { - MOV64xw(s4, 0); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRxw_U12(s4, xEmu, offsetof(x64emu_t, op2)); - STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); - SET_DF(s4, rex.w?d_cmp64:d_cmp32); - } else { - SET_DFNONE(s4); - } - SUBSxw_U12(s3, s1, 0); // res = s1 - 0 - // and now the tricky ones (and mostly unused), PF and AF - // bc = (res & (~d | s)) | (~d & s) => is 0 here... - IFX(X_OF|X_AF) { - MOV32w(s4, (1<> 14) ^ ((bc>>14)>>1)) & 1 - } - } - IFX(X_PF) { - emit_pf(dyn, ninst, s5, s3, s4); - } -} - -// emit CMP16 instruction, from cmp s1 , #0, using s3 and s4 as scratch -void emit_cmp16_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) -{ - IFX_PENDOR0 { - MOV32w(s3, 0); - STRH_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRH_U12(s3, xEmu, offsetof(x64emu_t, op2)); - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - SET_DF(s3, d_cmp16); - } else { - SET_DFNONE(s3); - } - // bc = (res & (~d | s)) | (~d & s) = 0 - IFX(X_CF | X_AF | X_OF) { - MOV32w(s3, (1<> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_PF) { - emit_pf(dyn, ninst, s5, s3, s4); - } -} -// emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch -void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) -{ - IFX_PENDOR0 { - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - MOV32w(s4, 0); - STRB_U12(s4, xEmu, offsetof(x64emu_t, op2)); - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); - SET_DF(s3, d_cmp8); - } else { - SET_DFNONE(s4); - } - // bc = (res & (~d | s)) | (~d & s) = 0 - IFX(X_CF | X_AF | X_OF) { - MOV32w(s3, (1<x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0) - IFX(X_PF) { - ANDw_mask(s3, s3, 0b011011, 0b000010); // 0xE0 - LSRw(s3, s3, 5); - MOV64x(s4, (uintptr_t)GetParityTab()); - LDRw_REG_LSL2(s4, s4, s3); - ANDw_mask(s3, s1, 0, 0b000100); // 0x1f - LSRw_REG(s4, s4, s3); - MVNx_REG(s4, s4); - BFIw(xFlags, s4, F_PF, 1); - } -} - -// emit TEST16 instruction, from test s1, s2, using s3 and s4 as scratch -void emit_test16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) -{ - MAYUSE(s1); MAYUSE(s2); - IFX_PENDOR0 { - SET_DF(s3, d_tst16); - } else { - SET_DFNONE(s4); - } - IFX(X_OF) { - BFCw(xFlags, F_OF, 1); - } - IFX(X_CF) { - BFCw(xFlags, F_CF, 1); - } - ANDSw_REG(s5, s1, s2); // res = s1 & s2 - IFX_PENDOR0 { - STRH_U12(s5, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s4, s5, 15); - BFIw(xFlags, s4, F_SF, 1); - } - // PF: (((emu->x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0) - IFX(X_PF) { - emit_pf(dyn, ninst, s5, s3, s4); - } -} - -// emit TEST8 instruction, from test s1, s2, using s3 and s4 as scratch -void emit_test8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) -{ - MAYUSE(s1); MAYUSE(s2); - IFX_PENDOR0 { - SET_DF(s3, d_tst8); - } else { - SET_DFNONE(s4); - } - IFX(X_OF) { - BFCw(xFlags, F_OF, 1); - } - IFX(X_CF) { - BFCw(xFlags, F_CF, 1); - } - ANDSw_REG(s5, s1, s2); // res = s1 & s2 - IFX_PENDOR0 { - STRB_U12(s5, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s4, s5, 7); - BFIw(xFlags, s4, F_SF, 1); - } - // PF: (((emu->x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0) - IFX(X_PF) { - emit_pf(dyn, ninst, s5, s3, s4); - } -} diff --git a/src/dynarec/dynarec_arm64_f0.c b/src/dynarec/dynarec_arm64_f0.c deleted file mode 100644 index dbcf18ea..00000000 --- a/src/dynarec/dynarec_arm64_f0.c +++ /dev/null @@ -1,939 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" - -#include "dynarec_arm64_helper.h" -#include "dynarec_arm64_functions.h" - - -uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) -{ - (void)ip; (void)rep; (void)need_epilog; - - uint8_t opcode = F8; - uint8_t nextop; - uint8_t gd, ed, u8; - uint8_t wback, wb1, wb2, gb1, gb2; - int32_t i32; - int64_t i64, j64; - int64_t fixedaddress; - MAYUSE(gb1); - MAYUSE(gb2); - MAYUSE(wb1); - MAYUSE(wb2); - MAYUSE(j64); - - while((opcode==0xF2) || (opcode==0xF3)) { - rep = opcode-0xF1; - opcode = F8; - } - // REX prefix before the F0 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } - - switch(opcode) { - case 0x00: - INST_NAME("LOCK ADD Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - DMB_ISH(); - GETGB(x2); - if((nextop&0xC0)==0xC0) { - if(rex.rex) { - wback = xRAX + (nextop&7) + (rex.b<<3); - wb2 = 0; - } else { - wback = (nextop&7); - wb2 = (wback>>2); - wback = xRAX+(wback&3); - } - UBFXw(x1, wback, wb2*8, 8); - emit_add8(dyn, ninst, x1, x2, x4, x3); - BFIx(wback, x1, wb2*8, 8); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 0); - MARKLOCK; - LDAXRB(x1, wback); - emit_add8(dyn, ninst, x1, x2, x4, x3); - STLXRB(x4, x1, wback); - CBNZx_MARKLOCK(x4); - } - DMB_ISH(); - break; - case 0x01: - INST_NAME("LOCK ADD Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - DMB_ISH(); - if((nextop&0xC0)==0xC0) { - ed = xRAX+(nextop&7)+(rex.b<<3); - emit_add32(dyn, ninst, rex, ed, gd, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); - MARKLOCK; - LDAXRxw(x1, wback); - emit_add32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - DMB_ISH(); - break; - - case 0x09: - INST_NAME("LOCK OR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - DMB_ISH(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - emit_or32(dyn, ninst, rex, ed, gd, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); - MARKLOCK; - LDAXRxw(x1, wback); - emit_or32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - DMB_ISH(); - break; - - case 0x0F: - nextop = F8; - switch(nextop) { - - case 0xB1: - INST_NAME("LOCK CMPXCHG Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - DMB_ISH(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);} - MOVxw_REG(x1, ed); // save value - CMPSxw_REG(xRAX, x1); - B_MARK2(cNE); - MOVxw_REG(ed, gd); - MARK2; - MOVxw_REG(xRAX, x1); - B_NEXT_nocond; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK3(cNE); - // Aligned version - MARKLOCK; - LDAXRxw(x1, wback); - CMPSxw_REG(xRAX, x1); - B_MARK(cNE); - // EAX == Ed - STLXRxw(x4, gd, wback); - CBNZx_MARKLOCK(x4); - // done - B_MARK_nocond; - // Unaligned version - MARK3; - LDRxw_U12(x1, wback, 0); - LDAXRB(x3, wback); // dummy read, to arm the write... - CMPSxw_REG(xRAX, x1); - B_MARK(cNE); - // EAX == Ed - STLXRB(x4, gd, wback); - CBNZx_MARK3(x4); - STRxw_U12(gd, wback, 0); - MARK; - // Common part (and fallback for EAX != Ed) - UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} - MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what - } - DMB_ISH(); - break; - - case 0xC1: - INST_NAME("LOCK XADD Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - DMB_ISH(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - MOVxw_REG(x1, ed); - MOVxw_REG(ed, gd); - MOVxw_REG(gd, x1); - emit_add32(dyn, ninst, rex, ed, gd, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); // unaligned - MARKLOCK; - LDAXRxw(x1, wback); - ADDxw_REG(x4, x1, gd); - STLXRxw(x3, x4, wback); - CBNZx_MARKLOCK(x3); - B_MARK2_nocond; - MARK; - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); - ADDxw_REG(x4, x1, gd); - STLXRB(x3, x4, wback); - CBNZx_MARK(x3); - STRxw_U12(x4, wback, 0); - MARK2; - IFX(X_ALL|X_PEND) { - MOVxw_REG(x2, x1); - emit_add32(dyn, ninst, rex, x2, gd, x3, x4); - } - MOVxw_REG(gd, x1); - } - DMB_ISH(); - break; - - case 0xC7: - INST_NAME("LOCK CMPXCHG8B Gq, Eq"); - SETFLAGS(X_ZF, SF_SUBSET); - nextop = F8; - addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, 0, 0, rex, 0, 0); - DMB_ISH(); - MARKLOCK; - LDAXPxw(x2, x3, wback); - CMPSxw_REG(xRAX, x2); - B_MARK(cNE); // EAX != Ed[0] - CMPSxw_REG(xRDX, x3); - B_MARK(cNE); // EDX != Ed[1] - STLXPxw(x4, xRBX, xRCX, wback); - CBNZx_MARKLOCK(x4); - MOV32w(x1, 1); - B_MARK3_nocond; - MARK; - MOVxw_REG(xRAX, x2); - MOVxw_REG(xRDX, x3); - MOV32w(x1, 0); - MARK3; - DMB_ISH(); - BFIw(xFlags, x1, F_ZF, 1); - break; - - default: - DEFAULT; - } - break; - - case 0x29: - INST_NAME("LOCK SUB Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - DMB_ISH(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - emit_sub32(dyn, ninst, rex, ed, gd, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); - MARKLOCK; - LDAXRxw(x1, wback); - emit_sub32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - DMB_ISH(); - break; - - case 0x66: - opcode = F8; - switch(opcode) { - case 0x09: - INST_NAME("LOCK OR Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x5); - DMB_ISH(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - UXTHw(x6, ed); - emit_or16(dyn, ninst, x6, x5, x3, x4); - BFIx(ed, x6, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); - MARKLOCK; - LDAXRH(x1, wback); - emit_or16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - DMB_ISH(); - break; - - case 0x81: - case 0x83: - nextop = F8; - DMB_ISH(); - switch((nextop>>3)&7) { - case 0: //ADD - if(opcode==0x81) { - INST_NAME("LOCK ADD Ew, Iw"); - } else { - INST_NAME("LOCK ADD Ew, Iw"); - } - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i32 = F16S; else i32 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x5, i32); - UXTHw(x6, ed); - emit_add16(dyn, ninst, x6, x5, x3, x4); - BFIx(ed, x6, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); - if(opcode==0x81) i32 = F32S; else i32 = F8S; - MOV32w(x5, i32); - TSTx_mask(wback, 1, 0, 0); // mask=1 - B_MARK(cNE); - MARKLOCK; - LDAXRH(x1, wback); - emit_add16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); - B_NEXT_nocond; - MARK; // unaligned! also, not enough - LDRH_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIw(x1, x4, 0, 8); // re-inject - emit_add16(dyn, ninst, x1, x5, x3, x4); - STLXRB(x3, x1, wback); - CBNZx_MARK(x3); - STRH_U12(x1, wback, 0); // put the whole value - } - break; - case 1: //OR - if(opcode==0x81) {INST_NAME("LOCK OR Ew, Iw");} else {INST_NAME("LOCK OR Ew, Iw");} - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i32 = F16S; else i32 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x5, i32); - UXTHw(x6, ed); - emit_or16(dyn, ninst, x6, x5, x3, x4); - BFIx(ed, x6, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); - if(opcode==0x81) i32 = F16S; else i32 = F8S; - MOV32w(x5, i32); - MARKLOCK; - LDAXRH(x1, wback); - emit_or16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 2: //ADC - if(opcode==0x81) {INST_NAME("LOCK ADC Ew, Iw");} else {INST_NAME("LOCK ADC Ew, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i32 = F16S; else i32 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x5, i32); - UXTHw(x6, ed); - emit_adc16(dyn, ninst, x6, x5, x3, x4); - BFIx(ed, x6, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); - if(opcode==0x81) i32 = F16S; else i32 = F8S; - MOV32w(x5, i32); - MARKLOCK; - LDAXRH(x1, wback); - emit_adc16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 3: //SBB - if(opcode==0x81) {INST_NAME("LOCK SBB Ew, Iw");} else {INST_NAME("LOCK SBB Ew, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i32 = F16S; else i32 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x5, i32); - UXTHw(x6, ed); - emit_sbb16(dyn, ninst, x6, x5, x3, x4); - BFIx(ed, x6, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); - if(opcode==0x81) i32 = F16S; else i32 = F8S; - MOV32w(x5, i32); - MARKLOCK; - LDAXRH(x1, wback); - emit_sbb16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 4: //AND - if(opcode==0x81) {INST_NAME("LOCK AND Ew, Iw");} else {INST_NAME("LOCK AND Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i32 = F16S; else i32 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x5, i32); - UXTHw(x6, ed); - emit_and16(dyn, ninst, x6, x5, x3, x4); - BFIx(ed, x6, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); - if(opcode==0x81) i32 = F16S; else i32 = F8S; - MOV32w(x5, i32); - MARKLOCK; - LDAXRH(x1, wback); - emit_and16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 5: //SUB - if(opcode==0x81) {INST_NAME("LOCK SUB Ew, Iw");} else {INST_NAME("LOCK SUB Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i32 = F16S; else i32 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x5, i32); - UXTHw(x6, ed); - emit_sub16(dyn, ninst, x6, x5, x3, x4); - BFIx(ed, x6, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); - if(opcode==0x81) i32 = F16S; else i32 = F8S; - MOV32w(x5, i32); - TSTx_mask(wback, 1, 0, 0); // mask=1 - B_MARK(cNE); - MARKLOCK; - LDAXRH(x1, wback); - emit_sub16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); - B_NEXT_nocond; - MARK; // unaligned! also, not enough - LDRH_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIw(x1, x4, 0, 8); // re-inject - emit_sub16(dyn, ninst, x1, x5, x3, x4); - STLXRB(x3, x1, wback); - CBNZx_MARK(x3); - STRH_U12(x1, wback, 0); // put the whole value - } - break; - case 6: //XOR - if(opcode==0x81) {INST_NAME("LOCK XOR Ew, Iw");} else {INST_NAME("LOCK XOR Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i32 = F16S; else i32 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x5, i32); - UXTHw(x6, ed); - emit_xor16(dyn, ninst, x6, x5, x3, x4); - BFIx(ed, x6, 0, 16); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?2:1); - if(opcode==0x81) i32 = F16S; else i32 = F8S; - MOV32w(x5, i32); - MARKLOCK; - LDAXRH(x1, wback); - emit_xor16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 7: //CMP - if(opcode==0x81) {INST_NAME("(LOCK) CMP Ew, Iw");} else {INST_NAME("(LOCK) CMP Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEW(x6, (opcode==0x81)?2:1); - (void)wb1; - // No need to LOCK, this is readonly - if(opcode==0x81) i32 = F16S; else i32 = F8S; - if(i32) { - MOV32w(x5, i32); - UXTHw(x6, ed); - emit_cmp16(dyn, ninst, x6, x5, x3, x4, x6); - BFIx(ed, x6, 0, 16); - } else { - emit_cmp16_0(dyn, ninst, ed, x3, x4); - } - break; - } - DMB_ISH(); - break; - default: - DEFAULT; - } - break; - - case 0x80: - nextop = F8; - DMB_ISH(); - switch((nextop>>3)&7) { - case 0: //ADD - INST_NAME("ADD Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - GETEB(x1, 1); - u8 = F8; - emit_add8c(dyn, ninst, x1, u8, x2, x4); - wb1 = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_add8c(dyn, ninst, x1, u8, x2, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - EBBACK; - break; - case 1: //OR - INST_NAME("OR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - GETEB(x1, 1); - u8 = F8; - emit_or8c(dyn, ninst, x1, u8, x2, x4); - wb1 = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_or8c(dyn, ninst, x1, u8, x2, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - EBBACK; - break; - case 2: //ADC - INST_NAME("ADC Eb, Ib"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - GETEB(x1, 1); - u8 = F8; - emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); - wb1 = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - EBBACK; - break; - case 3: //SBB - INST_NAME("SBB Eb, Ib"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - GETEB(x1, 1); - u8 = F8; - emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); - wb1 = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - EBBACK; - break; - case 4: //AND - INST_NAME("AND Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - GETEB(x1, 1); - u8 = F8; - emit_and8c(dyn, ninst, x1, u8, x2, x4); - wb1 = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_and8c(dyn, ninst, x1, u8, x2, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - EBBACK; - break; - case 5: //SUB - INST_NAME("SUB Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - GETEB(x1, 1); - u8 = F8; - emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); - wb1 = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - EBBACK; - break; - case 6: //XOR - INST_NAME("XOR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - GETEB(x1, 1); - u8 = F8; - emit_xor8c(dyn, ninst, x1, u8, x2, x4); - wb1 = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, 0, 0, rex, 0, 1); - u8 = F8; - wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_xor8c(dyn, ninst, x1, u8, x2, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - EBBACK; - break; - case 7: //CMP - INST_NAME("CMP Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 1); - u8 = F8; - if(u8) { - MOV32w(x2, u8); - emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); - } else { - emit_cmp8_0(dyn, ninst, x1, x3, x4); - } - break; - default: - DEFAULT; - } - DMB_ISH(); - break; - case 0x81: - case 0x83: - nextop = F8; - DMB_ISH(); - switch((nextop>>3)&7) { - case 0: //ADD - if(opcode==0x81) { - INST_NAME("LOCK ADD Ed, Id"); - } else { - INST_NAME("LOCK ADD Ed, Ib"); - } - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i64 = F32S; else i64 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV64xw(x5, i64); - emit_add32(dyn, ninst, rex, ed, x5, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); - MARKLOCK; - LDAXRxw(x1, wback); - emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - DMB_ISH(); - B_NEXT_nocond; - MARK; // unaligned! also, not enough - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRB(x3, x1, wback); - CBNZx_MARK(x3); - STRxw_U12(x1, wback, 0); // put the whole value - } - break; - case 1: //OR - if(opcode==0x81) {INST_NAME("LOCK OR Ed, Id");} else {INST_NAME("LOCK OR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i64 = F32S; else i64 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV64xw(x5, i64); - emit_or32(dyn, ninst, rex, ed, x5, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - MARKLOCK; - LDAXRxw(x1, wback); - emit_or32(dyn, ninst, rex, x1, x5, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 2: //ADC - if(opcode==0x81) {INST_NAME("LOCK ADC Ed, Id");} else {INST_NAME("LOCK ADC Ed, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i64 = F32S; else i64 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV64xw(x5, i64); - emit_adc32(dyn, ninst, rex, ed, x5, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - MARKLOCK; - LDAXRxw(x1, wback); - emit_adc32(dyn, ninst, rex, x1, x5, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 3: //SBB - if(opcode==0x81) {INST_NAME("LOCK SBB Ed, Id");} else {INST_NAME("LOCK SBB Ed, Ib");} - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i64 = F32S; else i64 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV64xw(x5, i64); - emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - MARKLOCK; - LDAXRxw(x1, wback); - emit_sbb32(dyn, ninst, rex, x1, x5, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 4: //AND - if(opcode==0x81) {INST_NAME("LOCK AND Ed, Id");} else {INST_NAME("LOCK AND Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i64 = F32S; else i64 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV64xw(x5, i64); - emit_and32(dyn, ninst, rex, ed, x5, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - MARKLOCK; - LDAXRxw(x1, wback); - emit_and32(dyn, ninst, rex, x1, x5, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 5: //SUB - if(opcode==0x81) {INST_NAME("LOCK SUB Ed, Id");} else {INST_NAME("LOCK SUB Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i64 = F32S; else i64 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV64xw(x5, i64); - emit_sub32(dyn, ninst, rex, ed, x5, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); - MARKLOCK; - LDAXRxw(x1, wback); - emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - DMB_ISH(); - B_NEXT_nocond; - MARK; // unaligned! also, not enough - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRB(x3, x1, wback); - CBNZx_MARK(x3); - STRxw_U12(x1, wback, 0); // put the whole value - } - break; - case 6: //XOR - if(opcode==0x81) {INST_NAME("LOCK XOR Ed, Id");} else {INST_NAME("LOCK XOR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - if(MODREG) { - if(opcode==0x81) i64 = F32S; else i64 = F8S; - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV64xw(x5, i64); - emit_xor32(dyn, ninst, rex, ed, x5, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - MARKLOCK; - LDAXRxw(x1, wback); - emit_xor32(dyn, ninst, rex, x1, x5, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - } - break; - case 7: //CMP - if(opcode==0x81) {INST_NAME("(LOCK) CMP Ed, Id");} else {INST_NAME("(LOCK) CMP Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); - GETED((opcode==0x81)?4:1); - // No need to LOCK, this is readonly - if(opcode==0x81) i64 = F32S; else i64 = F8S; - if(i64) { - MOV64xw(x5, i64); - emit_cmp32(dyn, ninst, rex, ed, x5, x3, x4, x6); - } else { - emit_cmp32_0(dyn, ninst, rex, ed, x3, x4); - } - break; - } - DMB_ISH(); - break; - - case 0x87: - INST_NAME("LOCK XCHG Ed, Gd"); - nextop = F8; - if(MODREG) { - GETGD; - GETED(0); - MOVxw_REG(x1, gd); - MOVxw_REG(gd, ed); - MOVxw_REG(ed, x1); - } else { - GETGD; - DMB_ISH(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, 0, 0); - TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); - MARKLOCK; - LDAXRxw(x1, ed); - STLXRxw(x3, gd, ed); - CBNZx_MARKLOCK(x3); - B_MARK2_nocond; - MARK; - LDRxw_U12(x1, ed, 0); - STRxw_U12(gd, ed, 0); - MARK2; - DMB_ISH(); - MOVxw_REG(gd, x1); - } - break; - - case 0xFF: - nextop = F8; - switch((nextop>>3)&7) - { - case 0: // INC Ed - INST_NAME("LOCK INC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - DMB_ISH(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - emit_inc32(dyn, ninst, rex, ed, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); // unaligned - MARKLOCK; - LDAXRxw(x1, wback); - emit_inc32(dyn, ninst, rex, x1, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - B_NEXT_nocond; - MARK; - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_inc32(dyn, ninst, rex, x1, x3, x4); - STLXRB(x3, x1, wback); - CBNZw_MARK(x3); - STRxw_U12(x1, wback, 0); - } - DMB_ISH(); - break; - case 1: //DEC Ed - INST_NAME("LOCK DEC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET); - DMB_ISH(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - emit_dec32(dyn, ninst, rex, ed, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); // unaligned - MARKLOCK; - LDAXRxw(x1, wback); - emit_dec32(dyn, ninst, rex, x1, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - B_NEXT_nocond; - MARK; - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_dec32(dyn, ninst, rex, x1, x3, x4); - STLXRB(x3, x1, wback); - CBNZw_MARK(x3); - STRxw_U12(x1, wback, 0); - } - DMB_ISH(); - break; - default: - DEFAULT; - } - break; - - default: - DEFAULT; - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c deleted file mode 100755 index 2c2e5c63..00000000 --- a/src/dynarec/dynarec_arm64_f20f.c +++ /dev/null @@ -1,367 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -// Get Ex as a double, not a quad (warning, x2 get used) -#define GETEX(a, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ - } else { \ - a = fpu_get_scratch(dyn); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \ - VLDR64_U12(a, ed, fixedaddress); \ - } - -#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) - -#define GETGX(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg(dyn, ninst, x1, gd) - -#define GETGX_empty(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg_empty(dyn, ninst, x1, gd) - -#define GETGM(a) \ - gd = ((nextop&0x38)>>3); \ - a = mmx_get_reg(dyn, ninst, x1, gd) - -uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) -{ - (void)ip; (void)need_epilog; - - uint8_t opcode = F8; - uint8_t nextop; - uint8_t gd, ed; - uint8_t wback; - uint8_t u8; - uint64_t u64, j64; - int v0, v1; - int q0; - int d0, d1; - int64_t fixedaddress; - -#ifdef PRECISE_CVT - int j32; - MAYUSE(j32); -#endif - MAYUSE(d0); - MAYUSE(d1); - MAYUSE(q0); - MAYUSE(v0); - MAYUSE(v1); - - switch(opcode) { - - case 0x10: - INST_NAME("MOVSD Gx, Ex"); - nextop = F8; - GETG; - if(MODREG) { - ed = (nextop&7)+ (rex.b<<3); - v0 = sse_get_reg(dyn, ninst, x1, gd); - d0 = sse_get_reg(dyn, ninst, x1, ed); - VMOVeD(v0, 0, d0, 0); - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VLDR64_U12(v0, ed, fixedaddress); // upper part reseted - } - break; - case 0x11: - INST_NAME("MOVSD Ex, Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - ed = (nextop&7)+ (rex.b<<3); - d0 = sse_get_reg(dyn, ninst, x1, ed); - VMOVeD(d0, 0, v0, 0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VSTR64_U12(v0, ed, fixedaddress); - } - break; - case 0x12: - INST_NAME("MOVDDUP Gx, Ex"); - nextop = F8; - GETG; - if(MODREG) { - d0 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - VMOVeD(v0, 0, d0, 0); - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VLDR64_U12(v0, ed, fixedaddress); - } - VMOVeD(v0, 1, v0, 0); - break; - - case 0x2A: - INST_NAME("CVTSI2SD Gx, Ed"); - nextop = F8; - GETGX(v0); - GETED(0); - d1 = fpu_get_scratch(dyn); - if(rex.w) { - SCVTFDx(d1, ed); - } else { - SCVTFDw(d1, ed); - } - VMOVeD(v0, 0, d1, 0); - break; - - case 0x2C: - INST_NAME("CVTTSD2SI Gd, Ex"); - nextop = F8; - GETGD; - GETEX(q0, 0); - FCVTZSxwD(gd, q0); - break; - case 0x2D: - INST_NAME("CVTSD2SI Gd, Ex"); - nextop = F8; - GETGD; - GETEX(q0, 0); - #ifdef PRECISE_CVT - LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr)); - UBFXx(x1, x1, 13, 2); // extract round requested - LSLx_REG(x1, x1, 3); - // Construct a "switch case", with each case 2 instructions, so 8 bytes - ADR(xLR, GETMARK); - ADDx_REG(xLR, xLR, x1); - B(xLR); - MARK; - FCVTNSxwD(gd, q0); // 0: Nearest (even) - B_NEXT_nocond; - FCVTMSxwD(gd, q0); // 1: Toward -inf - B_NEXT_nocond; - FCVTPSxwD(gd, q0); // 2: Toward +inf - B_NEXT_nocond; - FCVTZSxwD(gd, q0); // 3: Toward 0 - #else - FCVTNSxwD(gd, q0); - #endif - break; - - - case 0x51: - INST_NAME("SQRTSD Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FSQRTD(d1, d0); - VMOVeD(v0, 0, d1, 0); - break; - - case 0x58: - INST_NAME("ADDSD Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FADDD(d1, v0, d0); // the high part of the vector is erased... - VMOVeD(v0, 0, d1, 0); - break; - case 0x59: - INST_NAME("MULSD Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FMULD(d1, v0, d0); - VMOVeD(v0, 0, d1, 0); - break; - case 0x5A: - INST_NAME("CVTSD2SS Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(d0, 0); - d1 = fpu_get_scratch(dyn); - FCVT_S_D(d1, d0); - VMOVeS(v0, 0, d1, 0); - break; - - case 0x5C: - INST_NAME("SUBSD Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FSUBD(d1, v0, d0); - VMOVeD(v0, 0, d1, 0); - break; - case 0x5D: - INST_NAME("MINSD Gx, Ex"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - GETEX(v1, 0); - // MINSD: if any input is NaN, or Ex[0] Gx[0] - #if 0 - d0 = fpu_get_scratch(dyn); - FMINNMD(d0, v0, v1); // NaN handling may be slightly different, is that a problem? - VMOVeD(v0, 0, d0, 0); // to not erase uper part - #else - FCMPD(v0, v1); - B_NEXT(cLS); //Less than or equal - VMOVeD(v0, 0, v1, 0); // to not erase uper part - #endif - break; - case 0x5E: - INST_NAME("DIVSD Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FDIVD(d1, v0, d0); - VMOVeD(v0, 0, d1, 0); - break; - case 0x5F: - INST_NAME("MAXSD Gx, Ex"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - GETEX(v1, 0); - // MAXSD: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0] - #if 0 - d0 = fpu_get_scratch(dyn); - FMAXNMD(d0, v0, v1); // NaN handling may be slightly different, is that a problem? - VMOVeD(v0, 0, d0, 0); // to not erase uper part - #else - FCMPD(v0, v1); - B_NEXT(cGE); //Greater than or equal - VMOVeD(v0, 0, v1, 0); // to not erase uper part - #endif - break; - - case 0x70: - INST_NAME("PSHUFLW Gx, Ex, Ib"); - nextop = F8; - GETEX(v1, 1); - GETGX(v0); - - u8 = F8; - // only low part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits - u64 = 0; - for (int i=0; i<4; ++i) { - u64 |= ((uint64_t)((u8>>(i*2))&3)*2+0)<<(i*16+0); - u64 |= ((uint64_t)((u8>>(i*2))&3)*2+1)<<(i*16+8); - } - MOV64x(x2, u64); - d0 = fpu_get_scratch(dyn); - VMOVQDfrom(d0, 0, x2); - VTBL1_8(d0, v1, d0); - VMOVeD(v0, 0, d0, 0); - if(v0!=v1) { - VMOVeD(v0, 1, v1, 1); - } - break; - - case 0x7C: - INST_NAME("HADDPS Gx, Ex"); - nextop = F8; - GETGX(v0); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - v1 = fpu_get_scratch(dyn); - VLDR128_U12(v1, ed, fixedaddress); - } - VFADDPQS(v0, v0, v1); - break; - - case 0xC2: - INST_NAME("CMPSD Gx, Ex, Ib"); - nextop = F8; - GETGX(v0); - GETEX(v1, 1); - u8 = F8; - FCMPD(v0, v1); - switch(u8&7) { - case 0: CSETMx(x2, cEQ); break; // Equal - case 1: CSETMx(x2, cCC); break; // Less than - case 2: CSETMx(x2, cLS); break; // Less or equal - case 3: CSETMx(x2, cVS); break; // NaN - case 4: CSETMx(x2, cNE); break; // Not Equal or unordered - case 5: CSETMx(x2, cCS); break; // Greater or equal or unordered - case 6: CSETMx(x2, cHI); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) - case 7: CSETMx(x2, cVC); break; // not NaN - } - VMOVQDfrom(v0, 0, x2); - break; - - case 0xD0: - INST_NAME("ADDSUBPS Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - q0 = fpu_get_scratch(dyn); - static float addsubps[4] = {-1.f, 1.f, -1.f, 1.f}; - MAYUSE(addsubps); - TABLE64(x2, (uintptr_t)&addsubps); - VLDR128_U12(q0, x2, 0); - VFMLAQS(v0, v1, q0); - break; - - case 0xD6: - INST_NAME("MOVDQ2Q Gm, Ex"); - nextop = F8; - GETGM(v0); - GETEX(v1, 0); - VMOV(v0, v1); - break; - - case 0xE6: - INST_NAME("CVTPD2DQ Gx, Ex"); - nextop = F8; - GETEX(v1, 0); - GETGX_empty(v0); - u8 = sse_setround(dyn, ninst, x1, x2, x3); - VFRINTIDQ(v0, v1); - x87_restoreround(dyn, ninst, u8); - VFCVTNSQD(v0, v0); // convert double -> int64 - SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part - break; - - case 0xF0: - INST_NAME("LDDQU Gx,Ex"); - nextop = F8; - GETG; - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - VMOVQ(v0, v1); - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 7, rex, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); - } - break; - - default: - DEFAULT; - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_f30f.c b/src/dynarec/dynarec_arm64_f30f.c deleted file mode 100755 index b4413a60..00000000 --- a/src/dynarec/dynarec_arm64_f30f.c +++ /dev/null @@ -1,440 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -// Get Ex as a single, not a quad (warning, x2 get used) -#define GETEX(a, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ - } else { \ - a = fpu_get_scratch(dyn); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ - VLDR32_U12(a, ed, fixedaddress); \ - } - -#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) - -#define GETGX(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg(dyn, ninst, x1, gd) - -#define GETGX_empty(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg_empty(dyn, ninst, x1, gd) - -uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) -{ - (void)ip; (void)need_epilog; - - uint8_t opcode = F8; - uint8_t nextop, u8; - uint8_t gd, ed; - uint8_t wback; - uint64_t u64; - int v0, v1; - int q0, q1; - int d0, d1; - int64_t fixedaddress; - int64_t j64; - - MAYUSE(d0); - MAYUSE(d1); - MAYUSE(q0); - MAYUSE(q1); - MAYUSE(v0); - MAYUSE(v1); - MAYUSE(j64); - - switch(opcode) { - - case 0x10: - INST_NAME("MOVSS Gx, Ex"); - nextop = F8; - GETG; - if(MODREG) { - v0 = sse_get_reg(dyn, ninst, x1, gd); - q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - VMOVeS(v0, 0, q0, 0); - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(v0, ed, fixedaddress); - } - break; - case 0x11: - INST_NAME("MOVSS Ex, Gx"); - nextop = F8; - GETG; - v0 = sse_get_reg(dyn, ninst, x1, gd); - if(MODREG) { - q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - VMOVeS(q0, 0, v0, 0); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VSTR32_U12(v0, ed, fixedaddress); - } - break; - case 0x12: - INST_NAME("MOVSLDUP Gx, Ex"); - nextop = F8; - if(MODREG) { - q1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - q1 = fpu_get_scratch(dyn); - VLDR128_U12(q1, ed, fixedaddress); - } - GETGX_empty(q0); - VTRNQ1_32(q0, q1, q1); - break; - - case 0x16: - INST_NAME("MOVSHDUP Gx, Ex"); - nextop = F8; - if(MODREG) { - q1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - q1 = fpu_get_scratch(dyn); - VLDR128_U12(q1, ed, fixedaddress); - } - GETGX_empty(q0); - VTRNQ2_32(q0, q1, q1); - break; - - case 0x1E: - INST_NAME("NOP / ENDBR32 / ENDBR64"); - nextop = F8; - FAKEED; - break; - - case 0x2A: - INST_NAME("CVTSI2SS Gx, Ed"); - nextop = F8; - GETGX(v0); - GETED(0); - d1 = fpu_get_scratch(dyn); - if(rex.w) { - SCVTFSx(d1, ed); - } else { - SCVTFSw(d1, ed); - } - VMOVeS(v0, 0, d1, 0); - break; - - case 0x2C: - INST_NAME("CVTTSS2SI Gd, Ex"); - nextop = F8; - GETGD; - GETEX(d0, 0); - FCVTZSxwS(gd, d0); - break; - case 0x2D: - INST_NAME("CVTSS2SI Gd, Ex"); - nextop = F8; - GETGD; - GETEX(q0, 0); - #ifdef PRECISE_CVT - LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr)); - UBFXx(x1, x1, 13, 2); // extract round requested - LSLx_REG(x1, x1, 3); - // Construct a "switch case", with each case 2 instructions, so 8 bytes - ADR(xLR, GETMARK); - ADDx_REG(xLR, xLR, x1); - B(xLR); - FCVTNSxwS(gd, q0); // 0: Nearest (even) - B_NEXT_nocond; - FCVTMSxwS(gd, q0); // 1: Toward -inf - B_NEXT_nocond; - FCVTPSxwS(gd, q0); // 2: Toward +inf - B_NEXT_nocond; - FCVTZSxwS(gd, q0); // 3: Toward 0 - #else - FCVTNSxwS(gd, q0); - #endif - break; - case 0x51: - INST_NAME("SQRTSS Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FSQRTS(d1, d0); - VMOVeS(v0, 0, d1, 0); - break; - case 0x52: - INST_NAME("RSQRTSS Gx, Ex"); - nextop = F8; - GETEX(v1, 0); - GETGX_empty(v0); - d0 = fpu_get_scratch(dyn); - d1 = fpu_get_scratch(dyn); - // so here: F32: Imm8 = abcd efgh that gives => aBbbbbbc defgh000 00000000 00000000 - // and want 1.0f = 0x3f800000 - // so 00111111 10000000 00000000 00000000 - // a = 0, b = 1, c = 1, d = 1, efgh=0 - // 0b01110000 - FMOVS_8(d0, 0b01110000); - FSQRTS(d1, v1); - FDIVS(d0, d0, d1); - VMOVeS(v0, 0, d0, 0); - break; - case 0x53: - INST_NAME("RCPSS Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - d0 = fpu_get_scratch(dyn); - FMOVS_8(d0, 0b01110000); //1.0f - FDIVS(d0, d0, v1); - VMOVeS(v0, 0, d0, 0); - break; - - case 0x58: - INST_NAME("ADDSS Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FADDS(d1, v0, d0); // the high part of the vector is erased... - VMOVeS(v0, 0, d1, 0); - break; - case 0x59: - INST_NAME("MULSS Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FMULS(d1, v0, d0); - VMOVeS(v0, 0, d1, 0); - break; - case 0x5A: - INST_NAME("CVTSS2SD Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - d0 = fpu_get_scratch(dyn); - FCVT_D_S(d0, v1); - VMOVeD(v0, 0, d0, 0); - break; - case 0x5B: - INST_NAME("CVTPS2DQ Gx, Ex"); - nextop = F8; - GETEX(d0, 0); - GETGX_empty(v0); - VFCVTZSQS(v0, d0); - break; - - case 0x5C: - INST_NAME("SUBSS Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FSUBS(d1, v0, d0); - VMOVeS(v0, 0, d1, 0); - break; - case 0x5D: - INST_NAME("MINSS Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - // MINSS: if any input is NaN, or Ex[0] Gx[0] - #if 0 - d0 = fpu_get_scratch(dyn); - FMINNMS(d0, v0, v1); // NaN handling may be slightly different, is that a problem? - VMOVeS(v0, 0, d0, 0); // to not erase uper part - #else - FCMPS(v0, v1); - B_NEXT(cLS); //Less than or equal - VMOVeS(v0, 0, v1, 0); // to not erase uper part - #endif - break; - case 0x5E: - INST_NAME("DIVSS Gx, Ex"); - nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); - GETEX(d0, 0); - FDIVS(d1, v0, d0); - VMOVeS(v0, 0, d1, 0); - break; - case 0x5F: - INST_NAME("MAXSS Gx, Ex"); - nextop = F8; - GETGX(v0); - GETEX(v1, 0); - // MAXSS: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0] - #if 0 - d0 = fpu_get_scratch(dyn); - FMAXNMS(d0, v0, v1); // NaN handling may be slightly different, is that a problem? - VMOVeS(v0, 0, d0, 0); // to not erase uper part - #else - FCMPS(v0, v1); - B_NEXT(cGE); //Greater than or equal - VMOVeS(v0, 0, v1, 0); // to not erase uper part - #endif - break; - - case 0x6F: - INST_NAME("MOVDQU Gx,Ex");// no alignment constraint on NEON here, so same as MOVDQA - nextop = F8; - GETG; - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); - VMOVQ(v0, v1); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); - } - break; - case 0x70: - INST_NAME("PSHUFHW Gx, Ex, Ib"); - nextop = F8; - GETEX(v1, 1); - GETGX(v0); - - u8 = F8; - // only high part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits - u64 = 0; - for (int i=0; i<4; ++i) { - u64 |= ((uint64_t)((u8>>(i*2))&3)*2+8)<<(i*16+0); - u64 |= ((uint64_t)((u8>>(i*2))&3)*2+9)<<(i*16+8); - } - MOV64x(x2, u64); - d0 = fpu_get_scratch(dyn); - VMOVQDfrom(d0, 0, x2); - VTBL1_8(d0, v1, d0); - VMOVeD(v0, 1, d0, 0); - if(v0!=v1) { - VMOVeD(v0, 0, v1, 0); - } - break; - - case 0x7E: - INST_NAME("MOVQ Gx, Ex"); - nextop = F8; - GETG; - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - FMOVD(v0, v1); - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VLDR64_U12(v0, ed, fixedaddress); - } - break; - case 0x7F: - INST_NAME("MOVDQU Ex,Gx"); - nextop = F8; - GETG; - if(MODREG) { - v0 = sse_get_reg(dyn, ninst, x1, gd); - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); - VMOVQ(v1, v0); - } else { - v0 = sse_get_reg(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); - } - break; - - case 0xBC: - INST_NAME("TZCNT Gd, Ed"); - SETFLAGS(X_CF|X_ZF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETED(0); - GETGD; - TSTxw_REG(ed, ed); - BFIw(xFlags, x1, F_CF, 1); // CF = is source 0? - RBITxw(x1, ed); // reverse - CLZxw(gd, x1); // x2 gets leading 0 == TZCNT - TSTxw_REG(gd, gd); - CSETw(x1, cEQ); - BFIw(xFlags, x1, F_ZF, 1); // ZF = is dest 0? - break; - case 0xBD: - INST_NAME("LZCNT Gd, Ed"); - SETFLAGS(X_CF|X_ZF, SF_SUBSET); - SET_DFNONE(x1); - nextop = F8; - GETED(0); - GETGD; - TSTxw_REG(ed, ed); - BFIw(xFlags, x1, F_CF, 1); // CF = is source 0? - CLZxw(gd, x1); // x2 gets leading 0 == LZCNT - TSTxw_REG(gd, gd); - CSETw(x1, cEQ); - BFIw(xFlags, x1, F_ZF, 1); // ZF = is dest 0? - break; - - case 0xC2: - INST_NAME("CMPSS Gx, Ex, Ib"); - nextop = F8; - GETGX(v0); - GETEX(v1, 1); - u8 = F8; - FCMPS(v0, v1); - switch(u8&7) { - case 0: CSETMw(x2, cEQ); break; // Equal - case 1: CSETMw(x2, cCC); break; // Less than - case 2: CSETMw(x2, cLS); break; // Less or equal - case 3: CSETMw(x2, cVS); break; // NaN - case 4: CSETMw(x2, cNE); break; // Not Equal or unordered - case 5: CSETMw(x2, cCS); break; // Greater or equal or unordered - case 6: CSETMw(x2, cHI); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) - case 7: CSETMw(x2, cVC); break; // not NaN - } - VMOVQSfrom(v0, 0, x2); - break; - - case 0xD6: - INST_NAME("MOVQ2DQ Gx, Em"); - nextop = F8; - GETGX_empty(v0); - if(MODREG) { - v1 = mmx_get_reg(dyn, ninst, x1, (nextop&7)); - VEORQ(v0, v0, v0); // usefull? - VMOV(v0, v1); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VLDR64_U12(v0, ed, fixedaddress); - } - break; - - case 0xE6: - INST_NAME("CVTDQ2PD Gx, Ex"); - nextop = F8; - GETEX(v1, 0); - GETGX_empty(v0); - d0 = fpu_get_scratch(dyn); - SXTL_32(v0, v1); - SCVTQFD(v0, v0); // there is only I64 -> Double vector conversion, not from i32 - break; - - default: - DEFAULT; - } - return addr; -} diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c deleted file mode 100755 index 5e6ecfd2..00000000 --- a/src/dynarec/dynarec_arm64_functions.c +++ /dev/null @@ -1,466 +0,0 @@ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "tools/bridge_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "emu/x87emu_private.h" -#include "x64trace.h" -#include "signals.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "dynarec_arm64_functions.h" -#include "custommem.h" -#include "bridge.h" - -void arm_fstp(x64emu_t* emu, void* p) -{ - if(ST0.q!=STld(0).uref) - D2LD(&ST0.d, p); - else - memcpy(p, &STld(0).ld, 10); -} - -void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) -{ - (void)emu; - dynarec_log(LOG_DEBUG, "R%lu=0x%lx (%lu)\n", n, reg, reg); -} - -void arm_f2xm1(x64emu_t* emu) -{ - ST0.d = exp2(ST0.d) - 1.0; -} -void arm_fyl2x(x64emu_t* emu) -{ - ST(1).d = log2(ST0.d)*ST(1).d; -} -void arm_ftan(x64emu_t* emu) -{ - ST0.d = tan(ST0.d); - emu->sw.f.F87_C2 = 0; -} -void arm_fpatan(x64emu_t* emu) -{ - ST1.d = atan2(ST1.d, ST0.d); -} -void arm_fxtract(x64emu_t* emu) -{ - int32_t tmp32s = (ST1.q&0x7ff0000000000000LL)>>52; - tmp32s -= 1023; - ST1.d /= exp2(tmp32s); - ST0.d = tmp32s; -} -void arm_fprem(x64emu_t* emu) -{ - int32_t tmp32s = ST0.d / ST1.d; - ST0.d -= ST1.d * tmp32s; - emu->sw.f.F87_C2 = 0; - emu->sw.f.F87_C0 = (tmp32s&1); - emu->sw.f.F87_C3 = ((tmp32s>>1)&1); - emu->sw.f.F87_C1 = ((tmp32s>>2)&1); -} -void arm_fyl2xp1(x64emu_t* emu) -{ - ST(1).d = log2(ST0.d + 1.0)*ST(1).d; -} -void arm_fsincos(x64emu_t* emu) -{ - sincos(ST1.d, &ST1.d, &ST0.d); - emu->sw.f.F87_C2 = 0; -} -void arm_frndint(x64emu_t* emu) -{ - ST0.d = fpu_round(emu, ST0.d); -} -void arm_fscale(x64emu_t* emu) -{ - if(ST0.d!=0.0) - ST0.d *= exp2(trunc(ST1.d)); -} -void arm_fsin(x64emu_t* emu) -{ - ST0.d = sin(ST0.d); - emu->sw.f.F87_C2 = 0; -} -void arm_fcos(x64emu_t* emu) -{ - ST0.d = cos(ST0.d); - emu->sw.f.F87_C2 = 0; -} - -void arm_fbld(x64emu_t* emu, uint8_t* ed) -{ - fpu_fbld(emu, ed); -} - -void arm_fild64(x64emu_t* emu, int64_t* ed) -{ - int64_t tmp; - memcpy(&tmp, ed, sizeof(tmp)); - ST0.d = tmp; - STll(0).sq = tmp; - STll(0).sref = ST0.sq; -} - -void arm_fbstp(x64emu_t* emu, uint8_t* ed) -{ - fpu_fbst(emu, ed); -} - -void arm_fistp64(x64emu_t* emu, int64_t* ed) -{ - // used of memcpy to avoid aligments issues - if(STll(0).sref==ST(0).sq) { - memcpy(ed, &STll(0).sq, sizeof(int64_t)); - } else { - int64_t tmp; - if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d)) - tmp = 0x8000000000000000LL; - else - tmp = fpu_round(emu, ST0.d); - memcpy(ed, &tmp, sizeof(tmp)); - } -} - -void arm_fistt64(x64emu_t* emu, int64_t* ed) -{ - // used of memcpy to avoid aligments issues - int64_t tmp = ST0.d; - memcpy(ed, &tmp, sizeof(tmp)); -} - -void arm_fld(x64emu_t* emu, uint8_t* ed) -{ - memcpy(&STld(0).ld, ed, 10); - LD2D(&STld(0), &ST(0).d); - STld(0).uref = ST0.q; -} - -void arm_ud(x64emu_t* emu) -{ - emit_signal(emu, SIGILL, (void*)R_RIP, 0); -} - -void arm_fsave(x64emu_t* emu, uint8_t* ed) -{ - fpu_savenv(emu, (char*)ed, 0); - - uint8_t* p = ed; - p += 28; - for (int i=0; i<8; ++i) { - LD2D(p, &ST(i).d); - p+=10; - } -} -void arm_frstor(x64emu_t* emu, uint8_t* ed) -{ - fpu_loadenv(emu, (char*)ed, 0); - - uint8_t* p = ed; - p += 28; - for (int i=0; i<8; ++i) { - D2LD(&ST(i).d, p); - p+=10; - } - -} - -void arm_fprem1(x64emu_t* emu) -{ - // simplified version - int32_t tmp32s = round(ST0.d / ST1.d); - ST0.d -= ST1.d*tmp32s; - emu->sw.f.F87_C2 = 0; - emu->sw.f.F87_C0 = (tmp32s&1); - emu->sw.f.F87_C3 = ((tmp32s>>1)&1); - emu->sw.f.F87_C1 = ((tmp32s>>2)&1); -} - -static uint8_t ff_mult(uint8_t a, uint8_t b) -{ - int retval = 0; - - for(int i = 0; i < 8; i++) { - if((b & 1) == 1) - retval ^= a; - - if((a & 0x80)) { - a <<= 1; - a ^= 0x1b; - } else { - a <<= 1; - } - - b >>= 1; - } - - return retval; -} - -void arm_aesimc(x64emu_t* emu, int xmm) -{ - sse_regs_t eax1 = emu->xmm[xmm]; - - for(int j=0; j<4; ++j) { - emu->xmm[xmm].ub[0+j*4] = ff_mult(0x0E, eax1.ub[0+j*4]) ^ ff_mult(0x0B, eax1.ub[1+j*4]) ^ ff_mult(0x0D, eax1.ub[2+j*4]) ^ ff_mult(0x09, eax1.ub[3+j*4]); - emu->xmm[xmm].ub[1+j*4] = ff_mult(0x09, eax1.ub[0+j*4]) ^ ff_mult(0x0E, eax1.ub[1+j*4]) ^ ff_mult(0x0B, eax1.ub[2+j*4]) ^ ff_mult(0x0D, eax1.ub[3+j*4]); - emu->xmm[xmm].ub[2+j*4] = ff_mult(0x0D, eax1.ub[0+j*4]) ^ ff_mult(0x09, eax1.ub[1+j*4]) ^ ff_mult(0x0E, eax1.ub[2+j*4]) ^ ff_mult(0x0B, eax1.ub[3+j*4]); - emu->xmm[xmm].ub[3+j*4] = ff_mult(0x0B, eax1.ub[0+j*4]) ^ ff_mult(0x0D, eax1.ub[1+j*4]) ^ ff_mult(0x09, eax1.ub[2+j*4]) ^ ff_mult(0x0E, eax1.ub[3+j*4]); - } -} -void arm_aesmc(x64emu_t* emu, int xmm) -{ - sse_regs_t eax1 = emu->xmm[xmm]; - - for(int j=0; j<4; ++j) { - emu->xmm[xmm].ub[0+j*4] = ff_mult(0x02, eax1.ub[0+j*4]) ^ ff_mult(0x03, eax1.ub[1+j*4]) ^ eax1.ub[2+j*4] ^ eax1.ub[3+j*4] ; - emu->xmm[xmm].ub[1+j*4] = eax1.ub[0+j*4] ^ ff_mult(0x02, eax1.ub[1+j*4]) ^ ff_mult(0x03, eax1.ub[2+j*4]) ^ eax1.ub[3+j*4] ; - emu->xmm[xmm].ub[2+j*4] = eax1.ub[0+j*4] ^ eax1.ub[1+j*4] ^ ff_mult(0x02, eax1.ub[2+j*4]) ^ ff_mult(0x03, eax1.ub[3+j*4]); - emu->xmm[xmm].ub[3+j*4] = ff_mult(0x03, eax1.ub[0+j*4]) ^ eax1.ub[1+j*4] ^ eax1.ub[2+j*4] ^ ff_mult(0x02, eax1.ub[3+j*4]); - } -} -void arm_aesdlast(x64emu_t* emu, int xmm) -{ - // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf - // A N K H E B O L I F C P M J G D - const uint8_t invshiftrows[] = {0,13,10, 7, 4, 1,14,11, 8, 5, 2,15,12, 9, 6, 3}; - const uint8_t invsubbytes[256] = { - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, - }; - - sse_regs_t eax1; - for(int i=0; i<16; ++i) - eax1.ub[i] = emu->xmm[xmm].ub[invshiftrows[i]]; - //STATE ← InvSubBytes( STATE ); - for(int i=0; i<16; ++i) - emu->xmm[xmm].ub[i] = invsubbytes[eax1.ub[i]]; - -} -void arm_aeselast(x64emu_t* emu, int xmm) -{ - // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf - // A F K P E J O D I N C H M B G L - const uint8_t shiftrows[] = {0, 5,10,15, 4, 9,14, 3, 8,13, 2, 7,12, 1, 6,11}; - const uint8_t subbytes[256] = { - 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, - 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, - 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, - 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, - 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, - 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, - 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, - 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, - 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, - 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, - 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, - 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, - 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, - 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, - 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, - 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, - }; - - sse_regs_t eax1; - for(int i=0; i<16; ++i) - eax1.ub[i] = emu->xmm[xmm].ub[shiftrows[i]]; - //STATE ← SubBytes( STATE ); - for(int i=0; i<16; ++i) - emu->xmm[xmm].ub[i] = subbytes[eax1.ub[i]]; -} -void arm_aesd(x64emu_t* emu, int xmm) -{ - arm_aesdlast(emu, xmm); - arm_aesimc(emu, xmm); -} -void arm_aese(x64emu_t* emu, int xmm) -{ - arm_aeselast(emu, xmm); - arm_aesmc(emu, xmm); -} - - -#define XMM0 0 -#define XMM8 16 -#define X870 8 -#define EMM0 8 -#define SCRATCH0 24 - -// Get a FPU scratch reg -int fpu_get_scratch(dynarec_arm_t* dyn) -{ - return SCRATCH0 + dyn->fpu_scratch++; // return an Sx -} -// Reset scratch regs counter -void fpu_reset_scratch(dynarec_arm_t* dyn) -{ - dyn->fpu_scratch = 0; -} -// Get a x87 double reg -int fpu_get_reg_x87(dynarec_arm_t* dyn) -{ - int i=X870; - while (dyn->fpuused[i]) ++i; - dyn->fpuused[i] = 1; - return i; // return a Dx -} -// Free a FPU double reg -void fpu_free_reg(dynarec_arm_t* dyn, int reg) -{ - // TODO: check upper limit? - dyn->fpuused[reg] = 0; -} -// Get an MMX double reg -int fpu_get_reg_emm(dynarec_arm_t* dyn, int emm) -{ - dyn->fpuused[EMM0 + emm] = 1; - return EMM0 + emm; -} -// Get an XMM quad reg -int fpu_get_reg_xmm(dynarec_arm_t* dyn, int xmm) -{ - if(xmm>7) { - dyn->fpuused[XMM8 + xmm - 8] = 1; - return XMM8 + xmm - 8; - } else { - dyn->fpuused[XMM0 + xmm] = 1; - return XMM0 + xmm; - } -} -// Reset fpu regs counter -void fpu_reset_reg(dynarec_arm_t* dyn) -{ - dyn->fpu_reg = 0; - for (int i=0; i<32; ++i) - dyn->fpuused[i]=0; -} - -#define F8 *(uint8_t*)(addr++) -#define F32 *(uint32_t*)(addr+=4, addr-4) -#define F32S64 (uint64_t)(int64_t)*(int32_t*)(addr+=4, addr-4) -// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD -int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta) -{ - (void)dyn; (void)ninst; - - uint32_t tested = (1<>3)&7; - if((sib&0x7)==5) { - uint64_t tmp = F32S64; - if (sib_reg!=4) { - // if XXXXXX+reg<>6)>=parity)?1:0; - } else { - // just a constant... - return (tmp&tested)?0:1; - } - } else { - if(sib_reg==4 && parity<3) - return 0; // simple [reg] - // don't try [reg1 + reg2<>6)>=parity)?1:0; - } - } else if((nextop&7)==5) { - uint64_t tmp = F32S64; - tmp+=addr+delta; - return (tmp&tested)?0:1; - } else { - return 0; - } - } else { - return 0; //Form [reg1 + reg2<CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) { - // found ! - if(retn) *retn = (b->C3==0xC2)?b->N:0; - if(calladdress) *calladdress = addr+1; - return 1; - } - return 0; -#undef PK32 -#undef PK -} diff --git a/src/dynarec/dynarec_arm64_functions.h b/src/dynarec/dynarec_arm64_functions.h deleted file mode 100755 index d4c861c9..00000000 --- a/src/dynarec/dynarec_arm64_functions.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef __DYNAREC_ARM_FUNCTIONS_H__ -#define __DYNAREC_ARM_FUNCTIONS_H__ - -typedef struct x64emu_s x64emu_t; - -void arm_fstp(x64emu_t* emu, void* p); - -void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n); - -void arm_f2xm1(x64emu_t* emu); -void arm_fyl2x(x64emu_t* emu); -void arm_ftan(x64emu_t* emu); -void arm_fpatan(x64emu_t* emu); -void arm_fxtract(x64emu_t* emu); -void arm_fprem(x64emu_t* emu); -void arm_fyl2xp1(x64emu_t* emu); -void arm_fsincos(x64emu_t* emu); -void arm_frndint(x64emu_t* emu); -void arm_fscale(x64emu_t* emu); -void arm_fsin(x64emu_t* emu); -void arm_fcos(x64emu_t* emu); -void arm_fbld(x64emu_t* emu, uint8_t* ed); -void arm_fild64(x64emu_t* emu, int64_t* ed); -void arm_fbstp(x64emu_t* emu, uint8_t* ed); -void arm_fistp64(x64emu_t* emu, int64_t* ed); -void arm_fistt64(x64emu_t* emu, int64_t* ed); -void arm_fld(x64emu_t* emu, uint8_t* ed); -void arm_fsave(x64emu_t* emu, uint8_t* ed); -void arm_frstor(x64emu_t* emu, uint8_t* ed); -void arm_fprem1(x64emu_t* emu); - -void arm_aesd(x64emu_t* emu, int xmm); -void arm_aese(x64emu_t* emu, int xmm); -void arm_aesdlast(x64emu_t* emu, int xmm); -void arm_aeselast(x64emu_t* emu, int xmm); -void arm_aesimc(x64emu_t* emu, int xmm); - - -void arm_ud(x64emu_t* emu); - -// Get an FPU scratch reg -int fpu_get_scratch(dynarec_arm_t* dyn); -// Reset scratch regs counter -void fpu_reset_scratch(dynarec_arm_t* dyn); -// Get an x87 double reg -int fpu_get_reg_x87(dynarec_arm_t* dyn); -// Get an MMX double reg -int fpu_get_reg_emm(dynarec_arm_t* dyn, int emm); -// Get an XMM quad reg -int fpu_get_reg_xmm(dynarec_arm_t* dyn, int xmm); -// Free a FPU/MMX/XMM reg -void fpu_free_reg(dynarec_arm_t* dyn, int reg); -// Reset fpu regs counter -void fpu_reset_reg(dynarec_arm_t* dyn); - -// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD -int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta); -// Do the GETED, but don't emit anything... -uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); - -// Is what pointed at addr a native call? And if yes, to what function? -int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); - -#endif //__DYNAREC_ARM_FUNCTIONS_H__ \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c deleted file mode 100755 index 8c65ac32..00000000 --- a/src/dynarec/dynarec_arm64_helper.c +++ /dev/null @@ -1,1280 +0,0 @@ -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "callback.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "dynablock_private.h" -#include "arm64_printer.h" -#include "../tools/bridge_private.h" -#include "custommem.h" - -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" - -/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ -uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta) -{ - MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta); - - uint8_t ret = x2; - uint8_t scratch = x2; - *fixaddress = 0; - if(hint>0) ret = hint; - if(hint>0 && hint>3)&7)+(rex.x<<3); - if((sib&0x7)==5) { - int64_t tmp = F32S; - if (sib_reg!=4) { - if(tmp && ((tmpabsmax) || (tmp&mask))) { - MOV64x(scratch, tmp); - ADDx_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); - } else { - LSLx(ret, xRAX+sib_reg, (sib>>6)); - *fixaddress = tmp; - } - } else { - MOV64x(ret, tmp); - } - } else { - if (sib_reg!=4) { - ADDx_REG_LSL(ret, xRAX+(sib&0x7)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); - } else { - ret = xRAX+(sib&0x7)+(rex.b<<3); - } - } - } else if((nextop&7)==5) { - uint64_t tmp = F32S64; - if((tmp>=absmin) && (tmp<=absmax) && !(tmp&mask)) { - GETIP(addr+delta); - ret = xRIP; - *fixaddress = tmp; - } else if(tmp<0x1000) { - GETIP(addr+delta); - ADDx_U12(ret, xRIP, tmp); - } else if(tmp+addr+delta<0x1000000000000LL) { // 3 opcodes to load immediate is cheap enough - tmp += addr+delta; - MOV64x(ret, tmp); - } else { - MOV64x(ret, tmp); - GETIP(addr+delta); - ADDx_REG(ret, ret, xRIP); - } - } else { - ret = xRAX+(nextop&7)+(rex.b<<3); - } - } else { - int64_t i64; - uint8_t sib = 0; - int sib_reg = 0; - if((nextop&7)==4) { - sib = F8; - sib_reg = ((sib>>3)&7)+(rex.x<<3); - } - if(nextop&0x80) - i64 = F32S; - else - i64 = F8S; - if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask))) { - *fixaddress = i64; - if((nextop&7)==4) { - if (sib_reg!=4) { - ADDx_REG_LSL(ret, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); - } else { - ret = xRAX+(sib&0x07)+(rex.b<<3); - } - } else - ret = xRAX+(nextop&0x07)+(rex.b<<3); - } else { - int64_t sub = (i64<0)?1:0; - if(sub) i64 = -i64; - if(i64<0x1000) { - if((nextop&7)==4) { - if (sib_reg!=4) { - ADDx_REG_LSL(scratch, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); - } else { - scratch = xRAX+(sib&0x07)+(rex.b<<3); - } - } else - scratch = xRAX+(nextop&0x07)+(rex.b<<3); - if(sub) { - SUBx_U12(ret, scratch, i64); - } else { - ADDx_U12(ret, scratch, i64); - } - } else { - MOV64x(scratch, i64); - if((nextop&7)==4) { - if (sib_reg!=4) { - if(sub) { - SUBx_REG(scratch, xRAX+(sib&0x07)+(rex.b<<3), scratch); - } else { - ADDx_REG(scratch, scratch, xRAX+(sib&0x07)+(rex.b<<3)); - } - ADDx_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); - } else { - PASS3(int tmp = xRAX+(sib&0x07)+(rex.b<<3)); - if(sub) { - SUBx_REG(ret, tmp, scratch); - } else { - ADDx_REG(ret, tmp, scratch); - } - } - } else { - PASS3(int tmp = xRAX+(nextop&0x07)+(rex.b<<3)); - if(sub) { - SUBx_REG(ret, tmp, scratch); - } else { - ADDx_REG(ret, tmp, scratch); - } - } - } - } - } - *ed = ret; - return addr; -} - -/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ -uintptr_t geted32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta) -{ - MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta); - - uint8_t ret = x2; - uint8_t scratch = x2; - *fixaddress = 0; - if(hint>0) ret = hint; - if(hint>0 && hint>3)&7)+(rex.x<<3); - if((sib&0x7)==5) { - int64_t tmp = F32S; - if (sib_reg!=4) { - if(tmp && ((tmpabsmax) || (tmp&mask))) { - MOV64x(scratch, tmp); - ADDw_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); - } else { - LSLw(ret, xRAX+sib_reg, (sib>>6)); - *fixaddress = tmp; - } - } else { - MOV64x(ret, tmp); - } - } else { - if (sib_reg!=4) { - ADDw_REG_LSL(ret, xRAX+(sib&0x7)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); - } else { - ret = xRAX+(sib&0x7)+(rex.b<<3); - } - } - } else if((nextop&7)==5) { - uint32_t tmp = F32; - MOV32w(ret, tmp); - GETIP(addr+delta); - ADDw_REG(ret, ret, xRIP); - } else { - ret = xRAX+(nextop&7)+(rex.b<<3); - if(ret==hint) { - MOVw_REG(hint, ret); //to clear upper part - } - } - } else { - int64_t i64; - uint8_t sib = 0; - int sib_reg = 0; - if((nextop&7)==4) { - sib = F8; - sib_reg = ((sib>>3)&7)+(rex.x<<3); - } - if(nextop&0x80) - i64 = F32S; - else - i64 = F8S; - if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask))) { - *fixaddress = i64; - if((nextop&7)==4) { - if (sib_reg!=4) { - ADDw_REG_LSL(ret, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); - } else { - ret = xRAX+(sib&0x07)+(rex.b<<3); - } - } else { - ret = xRAX+(nextop&0x07)+(rex.b<<3); - } - } else { - int64_t sub = (i64<0)?1:0; - if(sub) i64 = -i64; - if(i64<0x1000) { - if((nextop&7)==4) { - if (sib_reg!=4) { - ADDw_REG_LSL(scratch, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg, (sib>>6)); - } else { - scratch = xRAX+(sib&0x07)+(rex.b<<3); - } - } else - scratch = xRAX+(nextop&0x07)+(rex.b<<3); - if(sub) { - SUBw_U12(ret, scratch, i64); - } else { - ADDw_U12(ret, scratch, i64); - } - } else { - MOV32w(scratch, i64); - if((nextop&7)==4) { - if (sib_reg!=4) { - if(sub) { - SUBw_REG(scratch, xRAX+(sib&0x07)+(rex.b<<3), scratch); - } else { - ADDw_REG(scratch, scratch, xRAX+(sib&0x07)+(rex.b<<3)); - } - ADDw_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); - } else { - PASS3(int tmp = xRAX+(sib&0x07)+(rex.b<<3)); - if(sub) { - SUBw_REG(ret, tmp, scratch); - } else { - ADDw_REG(ret, tmp, scratch); - } - } - } else { - PASS3(int tmp = xRAX+(nextop&0x07)+(rex.b<<3)); - if(sub) { - SUBw_REG(ret, tmp, scratch); - } else { - ADDw_REG(ret, tmp, scratch); - } - } - } - } - } - *ed = ret; - return addr; -} - -/* setup r2 to address pointed by ED, r3 as scratch also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ -uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, int s) -{ - MAYUSE(dyn); MAYUSE(ninst); - - uint8_t ret = x2; - uint8_t scratch = x3; - *fixaddress = 0; - if(hint>0) ret = hint; - if(scratch==ret) scratch = x2; - MAYUSE(scratch); - uint32_t m = nextop&0xC7; - uint32_t n = (m>>6)&3; - int64_t offset = 0; - int absmin = 0; - if(s) absmin = -absmax; - if(!n && m==6) { - offset = F16; - MOVZw(ret, offset); - } else { - switch(n) { - case 0: offset = 0; break; - case 1: offset = F8S; break; - case 2: offset = F16S; break; - } - if(offset && (offset>absmax || offset-0x1000) { - SUBx_U12(ret, ret, -offset); - } else if(offset>0 && offset<0x1000) { - ADDx_U12(ret, ret, offset); - } else { - MOV64x(scratch, offset); - ADDx_REG(ret, ret, scratch); - } - } - } - - *ed = ret; - return addr; -} - -void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) -{ - MAYUSE(dyn); MAYUSE(ip); MAYUSE(ninst); - MESSAGE(LOG_DUMP, "Jump to epilog\n"); - - if(reg) { - if(reg!=xRIP) { - MOVx_REG(xRIP, reg); - } - } else { - GETIP_(ip); - } - TABLE64(x2, (uintptr_t)arm64_epilog); - BR(x2); -} - -void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) -{ - MAYUSE(dyn); MAYUSE(ninst); - MESSAGE(LOG_DUMP, "Jump to next\n"); - - if(reg) { - if(reg!=xRIP) { - MOVx_REG(xRIP, reg); - } - uintptr_t tbl = getJumpTable64(); - MAYUSE(tbl); - TABLE64(x2, tbl); - UBFXx(x3, xRIP, 48, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 32, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 16, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); - LDRx_REG_LSL3(x3, x2, x3); - } else { - uintptr_t p = getJumpTableAddress64(ip); - MAYUSE(p); - TABLE64(x2, p); - GETIP_(ip); - LDRx_U12(x3, x2, 0); - } - if(reg!=x1) { - MOVx_REG(x1, xRIP); - } - #ifdef HAVE_TRACE - //MOVx(x2, 15); no access to PC reg - #endif - BLR(x3); // save LR... -} - -void ret_to_epilog(dynarec_arm_t* dyn, int ninst) -{ - MAYUSE(dyn); MAYUSE(ninst); - MESSAGE(LOG_DUMP, "Ret to epilog\n"); - POP1(xRIP); - uintptr_t tbl = getJumpTable64(); - MOV64x(x2, tbl); - UBFXx(x3, xRIP, 48, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 32, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 16, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - MOVx_REG(x1, xRIP); - BLR(x2); // save LR -} - -void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n) -{ - MAYUSE(dyn); MAYUSE(ninst); - MESSAGE(LOG_DUMP, "Retn to epilog\n"); - POP1(xRIP); - if(n>0xfff) { - MOV32w(w1, n); - ADDx_REG(xRSP, xRSP, x1); - } else { - ADDx_U12(xRSP, xRSP, n); - } - uintptr_t tbl = getJumpTable64(); - MOV64x(x2, tbl); - UBFXx(x3, xRIP, 48, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 32, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 16, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - MOVx_REG(x1, xRIP); - BLR(x2); // save LR -} - -void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits) -{ - #warning TODO: is64bits - MAYUSE(ninst); - MESSAGE(LOG_DUMP, "IRet to epilog\n"); - // POP IP - POP1(xRIP); - // POP CS - POP1(x2); - STRH_U12(x2, xEmu, offsetof(x64emu_t, segs[_CS])); - MOVZw(x1, 0); - STRx_U12(x1, xEmu, offsetof(x64emu_t, segs_serial[_CS])); - STRx_U12(x1, xEmu, offsetof(x64emu_t, segs_serial[_SS])); - // POP EFLAGS - POP1(xFlags); - MOV32w(x1, 0x3F7FD7); - ANDx_REG(xFlags, xFlags, x1); - ORRx_mask(xFlags, xFlags, 1, 0b111111, 0); - SET_DFNONE(x1); - // POP RSP - POP1(x3); - // POP SS - POP1(x2); - STRH_U12(x2, xEmu, offsetof(x64emu_t, segs[_SS])); - // set new RSP - MOVx_REG(xRSP, x3); - // Ret.... - MOV64x(x2, (uintptr_t)arm64_epilog); // epilog on purpose, CS might have changed! - BR(x2); -} - -void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg) -{ - MAYUSE(fnc); - if(savereg==0) - savereg = 7; - if(saveflags) { - STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); - } - fpu_pushcache(dyn, ninst, reg, 0); - if(ret!=-2) { - STPx_S7_preindex(xEmu, savereg, xSP, -16); // ARM64 stack needs to be 16byte aligned - STPx_S7_offset(xRAX, xRCX, xEmu, offsetof(x64emu_t, regs[_AX])); // x9..x15, x16,x17,x18 those needs to be saved by caller - STPx_S7_offset(xRDX, xRBX, xEmu, offsetof(x64emu_t, regs[_DX])); // but x18 is R8 wich is lost, so it's fine to not save it? - STPx_S7_offset(xRSP, xRBP, xEmu, offsetof(x64emu_t, regs[_SP])); - STPx_S7_offset(xRSI, xRDI, xEmu, offsetof(x64emu_t, regs[_SI])); - STPx_S7_offset(xR8, xR9, xEmu, offsetof(x64emu_t, regs[_R8])); - } - TABLE64(reg, (uintptr_t)fnc); - BLR(reg); - if(ret>=0) { - MOVx_REG(ret, xEmu); - } - if(ret!=-2) { - LDPx_S7_postindex(xEmu, savereg, xSP, 16); - #define GO(A, B) if(ret==x##A) { \ - LDRx_U12(x##B, xEmu, offsetof(x64emu_t, regs[_##B])); \ - } else if(ret==x##B) { \ - LDRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])); \ - } else { \ - LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])); \ - } - GO(RAX, RCX); - GO(RDX, RBX); - GO(RSP, RBP); - GO(RSI, RDI); - GO(R8, R9); - #undef GO - } - fpu_popcache(dyn, ninst, reg, 0); - if(saveflags) { - LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); - } - SET_NODF(); -} - -void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w) -{ - MAYUSE(fnc); - STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); - fpu_pushcache(dyn, ninst, x3, 1); - // x9..x15, x16,x17,x18 those needs to be saved by caller - // RDI, RSI, RDX, RCX, R8, R9 are used for function call - STPx_S7_preindex(xEmu, xRBX, xSP, -16); // ARM64 stack needs to be 16byte aligned - STPx_S7_offset(xRSP, xRBP, xEmu, offsetof(x64emu_t, regs[_SP])); - // float and double args - if(abs(w)>1) { - MESSAGE(LOG_DUMP, "Getting %d XMM args\n", abs(w)-1); - for(int i=0; i0) { - MOVx_REG(xRAX, 0); - MOVx_REG(xRDX, x1); - } - // all done, restore all regs - LDPx_S7_postindex(xEmu, xRBX, xSP, 16); - #define GO(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) - GO(RSP, RBP); - #undef GO - - fpu_popcache(dyn, ninst, x3, 1); - LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); - SET_NODF(); -} - -void grab_segdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg, int segment) -{ - (void)addr; - int64_t j64; - MAYUSE(j64); - MESSAGE(LOG_DUMP, "Get %s Offset\n", (segment==_FS)?"FS":"GS"); - int t1 = x1, t2 = x4; - if(reg==t1) ++t1; - if(reg==t2) ++t2; - LDRw_U12(t2, xEmu, offsetof(x64emu_t, segs_serial[segment])); - LDRx_U12(reg, xEmu, offsetof(x64emu_t, segs_offs[segment])); - if(segment==_GS) { - CBNZw_MARKSEG(t2); // fast check - } else { - LDRx_U12(t1, xEmu, offsetof(x64emu_t, context)); - LDRw_U12(t1, t1, offsetof(box64context_t, sel_serial)); - SUBw_REG(t1, t1, t2); - CBZw_MARKSEG(t1); - } - MOVZw(x1, segment); - call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 1, 0); - MARKSEG; - MESSAGE(LOG_DUMP, "----%s Offset\n", (segment==_FS)?"FS":"GS"); -} - -// x87 stuffs -static void x87_reset(dynarec_arm_t* dyn, int ninst) -{ - (void)ninst; -#if STEP > 1 - for (int i=0; i<8; ++i) - dyn->x87cache[i] = -1; - dyn->x87stack = 0; -#else - (void)dyn; -#endif -} - -void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch) -{ -#if STEP > 1 - MAYUSE(scratch); - if(!dyn->x87stack) - return; - MESSAGE(LOG_DUMP, "\tSynch x87 Stackcount (%d)\n", dyn->x87stack); - int a = dyn->x87stack; - // Add x87stack to emu fpu_stack - LDRw_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack)); - if(a>0) { - ADDw_U12(scratch, scratch, a); - } else { - SUBw_U12(scratch, scratch, -a); - } - STRw_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack)); - // Sub x87stack to top, with and 7 - LDRw_U12(scratch, xEmu, offsetof(x64emu_t, top)); - if(a>0) { - SUBw_U12(scratch, scratch, a); - } else { - ADDw_U12(scratch, scratch, -a); - } - ANDw_mask(scratch, scratch, 0, 2); //mask=7 - STRw_U12(scratch, xEmu, offsetof(x64emu_t, top)); - // reset x87stack - dyn->x87stack = 0; - MESSAGE(LOG_DUMP, "\t------x87 Stackcount\n"); -#else - (void)dyn; (void)ninst; (void)scratch; -#endif -} - -int x87_do_push(dynarec_arm_t* dyn, int ninst) -{ - (void)ninst; -#if STEP > 1 - dyn->x87stack+=1; - // move all regs in cache, and find a free one - int ret = -1; - for(int i=0; i<8; ++i) - if(dyn->x87cache[i]!=-1) - ++dyn->x87cache[i]; - else if(ret==-1) { - dyn->x87cache[i] = 0; - ret=dyn->x87reg[i]=fpu_get_reg_x87(dyn); - } - return ret; -#else - (void)dyn; - return 0; -#endif -} -void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1) -{ -#if STEP > 1 - dyn->x87stack+=1; - // move all regs in cache - for(int i=0; i<8; ++i) - if(dyn->x87cache[i]!=-1) - ++dyn->x87cache[i]; - if(s1) - x87_stackcount(dyn, ninst, s1); -#else - (void)dyn; (void)ninst; (void)s1; -#endif -} -void x87_do_pop(dynarec_arm_t* dyn, int ninst) -{ - (void)ninst; -#if STEP > 1 - dyn->x87stack-=1; - // move all regs in cache, poping ST0 - for(int i=0; i<8; ++i) - if(dyn->x87cache[i]!=-1) { - --dyn->x87cache[i]; - if(dyn->x87cache[i]==-1) { - fpu_free_reg(dyn, dyn->x87reg[i]); - dyn->x87reg[i] = -1; - } - } -#else - (void)dyn; -#endif -} - -void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) -{ - (void)ninst; -#if STEP > 1 - MAYUSE(s1); MAYUSE(s2); MAYUSE(s3); - int ret = 0; - for (int i=0; i<8 && !ret; ++i) - if(dyn->x87cache[i] != -1) - ret = 1; - if(!ret && !dyn->x87stack) // nothing to do - return; - MESSAGE(LOG_DUMP, "\tPurge x87 Cache and Synch Stackcount (%+d)\n", dyn->x87stack); - int a = dyn->x87stack; - if(a!=0) { - // reset x87stack - dyn->x87stack = 0; - // Add x87stack to emu fpu_stack - LDRw_U12(s2, xEmu, offsetof(x64emu_t, fpu_stack)); - if(a>0) { - ADDw_U12(s2, s2, a); - } else { - SUBw_U12(s2, s2, -a); - } - STRw_U12(s2, xEmu, offsetof(x64emu_t, fpu_stack)); - // Sub x87stack to top, with and 7 - LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); - // update tags (and top at the same time) - if(a>0) { - // new tag to fulls - MOVZw(s3, 0); - ADDx_U12(s1, xEmu, offsetof(x64emu_t, p_regs)); - for (int i=0; itop + st)&7 - STRw_REG_LSL2(s3, s1, s2); - } - } else { - // empty tags - MOVZw(s3, 0b11); - ADDx_U12(s1, xEmu, offsetof(x64emu_t, p_regs)); - for (int i=0; i<-a; ++i) { - STRw_REG_LSL2(s3, s1, s2); - ADDw_U12(s2, s2, 1); - ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + st)&7 - } - } - STRw_U12(s2, xEmu, offsetof(x64emu_t, top)); - } else { - LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); - } - if(ret!=0) { - // --- set values - // prepare offset to fpu => s1 - ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); - // Get top - // loop all cache entries - for (int i=0; i<8; ++i) - if(dyn->x87cache[i]!=-1) { - ADDw_U12(s3, s2, dyn->x87cache[i]); - ANDw_mask(s3, s3, 0, 2); //mask=7 // (emu->top + st)&7 - VSTR64_REG_LSL3(dyn->x87reg[i], s1, s3); - fpu_free_reg(dyn, dyn->x87reg[i]); - dyn->x87reg[i] = -1; - dyn->x87cache[i] = -1; - } - } -#else - (void)dyn; (void)s1; (void)s2; (void)s3; -#endif -} - -#ifdef HAVE_TRACE -static void x87_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) -{ -#if STEP > 1 - MAYUSE(s2); MAYUSE(s3); - x87_stackcount(dyn, ninst, s1); - int ret = 0; - for (int i=0; (i<8) && (!ret); ++i) - if(dyn->x87cache[i] != -1) - ret = 1; - if(!ret) // nothing to do - return; - // prepare offset to fpu => s1 - ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); - // Get top - LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); - // loop all cache entries - for (int i=0; i<8; ++i) - if(dyn->x87cache[i]!=-1) { - ADDw_U12(s3, s2, dyn->x87cache[i]); - ANDw_mask(s3, s3, 0, 2); // mask=7 // (emu->top + i)&7 - VSTR64_REG_LSL3(dyn->x87reg[i], s1, s3); - } -#else - (void)dyn; (void)ninst; (void)s1; (void)s2; (void)s3; -#endif -} -#endif - -int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) -{ - (void)ninst; -#if STEP > 1 - MAYUSE(s1); MAYUSE(s2); - // search in cache first - for (int i=0; i<8; ++i) - if(dyn->x87cache[i]==st) - return i; - MESSAGE(LOG_DUMP, "\tCreate x87 Cache for ST%d\n", st); - // get a free spot - int ret = -1; - for (int i=0; (i<8) && (ret==-1); ++i) - if(dyn->x87cache[i]==-1) - ret = i; - // found, setup and grab the value - dyn->x87cache[ret] = st; - dyn->x87reg[ret] = fpu_get_reg_x87(dyn); - ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); - LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); - int a = st - dyn->x87stack; - if(a) { - if(a<0) { - SUBw_U12(s2, s2, -a); - } else { - ADDw_U12(s2, s2, a); - } - ANDw_mask(s2, s2, 0, 2); //mask=7 - } - VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2); - MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st); - - return ret; -#else - (void)dyn; (void)s1; (void)s2; (void)st; - return 0; -#endif -} - -int x87_get_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a) -{ -#if STEP > 1 - return dyn->x87reg[x87_get_cache(dyn, ninst, s1, s2, a)]; -#else - (void)dyn; (void)ninst; (void)s1; (void)s2; (void)a; - return 0; -#endif -} - - -void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) -{ -#if STEP > 1 - MAYUSE(s2); - x87_stackcount(dyn, ninst, s1); - int ret = -1; - for (int i=0; (i<8) && (ret==-1); ++i) - if(dyn->x87cache[i] == st) - ret = i; - if(ret==-1) // nothing to do - return; - MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st); - // prepare offset to fpu => s1 - ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); - // Get top - LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); - // Update - if(st) { - ADDw_U12(s2, s2, st); - ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 - } - VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2); - MESSAGE(LOG_DUMP, "\t--------x87 Cache for ST%d\n", st); -#else - (void)dyn; (void)ninst; (void)s1; (void)s2; (void)st; -#endif -} - -void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) -{ -#if STEP > 1 - MAYUSE(s2); - x87_stackcount(dyn, ninst, s1); - int ret = -1; - for (int i=0; (i<8) && (ret==-1); ++i) - if(dyn->x87cache[i] == st) - ret = i; - if(ret==-1) // nothing to do - return; - MESSAGE(LOG_DUMP, "\tForget x87 Cache for ST%d\n", st); - // prepare offset to fpu => s1 - ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); - // Get top - LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); - // Update - if(st) { - ADDw_U12(s2, s2, st); - ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 - } - VSTR64_REG_LSL3(dyn->x87reg[ret], s1, s2); - MESSAGE(LOG_DUMP, "\t--------x87 Cache for ST%d\n", st); - // and forget that cache - fpu_free_reg(dyn, dyn->x87reg[ret]); - dyn->x87cache[ret] = -1; - dyn->x87reg[ret] = -1; -#else - (void)dyn; (void)ninst; (void)s1; (void)s2; (void)st; -#endif -} - -void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) -{ - (void)ninst; -#if STEP > 1 - MAYUSE(s1); MAYUSE(s2); - // search in cache first - for (int i=0; i<8; ++i) - if(dyn->x87cache[i]==st) { - // refresh the value - MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st); - ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); - LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); - int a = st - dyn->x87stack; - if(a<0) { - SUBw_U12(s2, s2, -a); - } else { - ADDw_U12(s2, s2, a); - } - ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 - VLDR64_REG_LSL3(dyn->x87reg[i], s1, s2); - MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st); - // ok - return; - } - // Was not in the cache? creating it.... - MESSAGE(LOG_DUMP, "\tCreate x87 Cache for ST%d\n", st); - // get a free spot - int ret = -1; - for (int i=0; (i<8) && (ret==-1); ++i) - if(dyn->x87cache[i]==-1) - ret = i; - // found, setup and grab the value - dyn->x87cache[ret] = st; - dyn->x87reg[ret] = fpu_get_reg_x87(dyn); - ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); - LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); - int a = st - dyn->x87stack; - if(a<0) { - SUBw_U12(s2, s2, -a); - } else { - ADDw_U12(s2, s2, a); - } - ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 - VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2); - MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st); -#else - (void)dyn; (void)s1; (void)s2; (void)st; -#endif -} - -// Set rounding according to cw flags, return reg to restore flags -int x87_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) -{ - MAYUSE(dyn); MAYUSE(ninst); - MAYUSE(s1); MAYUSE(s2); - LDRw_U12(s1, xEmu, offsetof(x64emu_t, round)); - UBFXw(s2, s1, 1, 1); // bit 1 of round in bit 0 (zero extented) of s2 - BFIw(s2, s1, 1, 1); // bit 0 of round in bit 1 of s2 - MRS_fpcr(s1); // get fpscr - MOVx_REG(s3, s1); - BFIx(s1, s2, 22, 2); // inject new round - MSR_fpcr(s1); // put new fpscr - return s3; -} - -// Set rounding according to mxcsr flags, return reg to restore flags -int sse_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) -{ - MAYUSE(dyn); MAYUSE(ninst); - MAYUSE(s1); MAYUSE(s2); - LDRH_U12(s1, xEmu, offsetof(x64emu_t, mxcsr)); - RBITw(s2, s1); // round is on bits 13-14 on x86, - LSRw(s2, s2, 17); // but we want the reverse of that - MRS_fpcr(s1); // get fpscr - MOVx_REG(s3, s1); - BFIx(s1, s2, 22, 2); // inject new round - MSR_fpcr(s1); // put new fpscr - return s3; -} - -// Restore round flag -void x87_restoreround(dynarec_arm_t* dyn, int ninst, int s1) -{ - MAYUSE(dyn); MAYUSE(ninst); - MAYUSE(s1); - MSR_fpcr(s1); // put back fpscr -} - -// MMX helpers -static void mmx_reset(dynarec_arm_t* dyn, int ninst) -{ - (void)ninst; -#if STEP > 1 - MAYUSE(dyn); - for (int i=0; i<8; ++i) - dyn->mmxcache[i] = -1; -#else - (void)dyn; -#endif -} -// get neon register for a MMX reg, create the entry if needed -int mmx_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a) -{ - (void)ninst; (void)s1; -#if STEP > 1 - if(dyn->mmxcache[a]!=-1) - return dyn->mmxcache[a]; - int ret = dyn->mmxcache[a] = fpu_get_reg_emm(dyn, a); - VLDR64_U12(ret, xEmu, offsetof(x64emu_t, mmx[a])); - return ret; -#else - (void)dyn; (void)a; - return 0; -#endif -} -// get neon register for a MMX reg, but don't try to synch it if it needed to be created -int mmx_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a) -{ - (void)ninst; (void)s1; -#if STEP > 1 - if(dyn->mmxcache[a]!=-1) - return dyn->mmxcache[a]; - int ret = dyn->mmxcache[a] = fpu_get_reg_emm(dyn, a); - return ret; -#else - (void)dyn; (void)a; - return 0; -#endif -} -// purge the MMX cache only(needs 3 scratch registers) -void mmx_purgecache(dynarec_arm_t* dyn, int ninst, int s1) -{ - (void)ninst; (void)s1; -#if STEP > 1 - int old = -1; - for (int i=0; i<8; ++i) - if(dyn->mmxcache[i]!=-1) { - if (old==-1) { - MESSAGE(LOG_DUMP, "\tPurge MMX Cache ------\n"); - ++old; - } - VSTR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i])); - fpu_free_reg(dyn, dyn->mmxcache[i]); - dyn->mmxcache[i] = -1; - } - if(old!=-1) { - MESSAGE(LOG_DUMP, "\t------ Purge MMX Cache\n"); - } -#else - (void)dyn; -#endif -} -#ifdef HAVE_TRACE -static void mmx_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) -{ - (void) ninst; (void)s1; -#if STEP > 1 - for (int i=0; i<8; ++i) - if(dyn->mmxcache[i]!=-1) { - VLDR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i])); - } -#else - (void)dyn; -#endif -} -#endif - - -// SSE / SSE2 helpers -static void sse_reset(dynarec_arm_t* dyn, int ninst) -{ - (void)ninst; -#if STEP > 1 - for (int i=0; i<16; ++i) - dyn->ssecache[i] = -1; -#else - (void)dyn; -#endif -} -// get neon register for a SSE reg, create the entry if needed -int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a) -{ - (void) ninst; (void)s1; -#if STEP > 1 - if(dyn->ssecache[a]!=-1) - return dyn->ssecache[a]; - int ret = dyn->ssecache[a] = fpu_get_reg_xmm(dyn, a); - VLDR128_U12(ret, xEmu, offsetof(x64emu_t, xmm[a])); - return ret; -#else - (void)dyn; (void)a; - return 0; -#endif -} -// get neon register for a SSE reg, but don't try to synch it if it needed to be created -int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a) -{ - (void) ninst; (void)s1; -#if STEP > 1 - if(dyn->ssecache[a]!=-1) - return dyn->ssecache[a]; - int ret = dyn->ssecache[a] = fpu_get_reg_xmm(dyn, a); - return ret; -#else - (void)dyn; (void)a; - return 0; -#endif -} -// forget neon register for a SSE reg, create the entry if needed -void sse_forget_reg(dynarec_arm_t* dyn, int ninst, int a) -{ - (void) ninst; -#if STEP > 1 - if(dyn->ssecache[a]==-1) - return; - VSTR128_U12(dyn->ssecache[a], xEmu, offsetof(x64emu_t, xmm[a])); - fpu_free_reg(dyn, dyn->ssecache[a]); - dyn->ssecache[a] = -1; -#else - (void)dyn; (void)a; -#endif - return; -} -// purge the SSE cache for XMM0..XMM7 (to use before function native call) -void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1) -{ - (void) ninst; (void)s1; -#if STEP > 1 - int old = -1; - for (int i=0; i<8; ++i) - if(dyn->ssecache[i]!=-1) { - if (old==-1) { - MESSAGE(LOG_DUMP, "\tPurge XMM0..7 Cache ------\n"); - ++old; - } - VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); - fpu_free_reg(dyn, dyn->ssecache[i]); - dyn->ssecache[i] = -1; - } - if(old!=-1) { - MESSAGE(LOG_DUMP, "\t------ Purge XMM0..7 Cache\n"); - } -#else - (void)dyn; -#endif -} - -// purge the SSE cache only -static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1) -{ - (void) ninst; (void)s1; -#if STEP > 1 - int old = -1; - for (int i=0; i<16; ++i) - if(dyn->ssecache[i]!=-1) { - if (old==-1) { - MESSAGE(LOG_DUMP, "\tPurge SSE Cache ------\n"); - ++old; - } - VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); - fpu_free_reg(dyn, dyn->ssecache[i]); - dyn->ssecache[i] = -1; - } - if(old!=-1) { - MESSAGE(LOG_DUMP, "\t------ Purge SSE Cache\n"); - } -#else - (void)dyn; -#endif -} -#ifdef HAVE_TRACE -static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) -{ - (void) ninst; (void)s1; -#if STEP > 1 - for (int i=0; i<16; ++i) - if(dyn->ssecache[i]!=-1) { - VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); - } -#else - (void)dyn; -#endif -} -#endif - -void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not07) -{ - (void) ninst; (void)s1; -#if STEP > 1 - int start = not07?8:0; - // only SSE regs needs to be push back to xEmu - int n=0; - for (int i=start; i<16; i++) - if(dyn->ssecache[i]!=-1) - ++n; - if(!n) - return; - MESSAGE(LOG_DUMP, "\tPush XMM Cache (%d)------\n", n); - for (int i=start; i<16; ++i) - if(dyn->ssecache[i]!=-1) { - VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); - } - MESSAGE(LOG_DUMP, "\t------- Push XMM Cache (%d)\n", n); -#else - (void)dyn; -#endif -} - -void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not07) -{ - (void) ninst; (void)s1; -#if STEP > 1 - int start = not07?8:0; - // only SSE regs needs to be pop back from xEmu - int n=0; - for (int i=start; i<16; i++) - if(dyn->ssecache[i]!=-1) - ++n; - if(!n) - return; - MESSAGE(LOG_DUMP, "\tPop XMM Cache (%d)------\n", n); - for (int i=start; i<16; ++i) - if(dyn->ssecache[i]!=-1) { - VLDR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); - } - MESSAGE(LOG_DUMP, "\t------- Pop XMM Cache (%d)\n", n); -#else - (void)dyn; -#endif -} - -void fpu_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) -{ - x87_purgecache(dyn, ninst, s1, s2, s3); - mmx_purgecache(dyn, ninst, s1); - sse_purgecache(dyn, ninst, s1); - fpu_reset_reg(dyn); -} - -#ifdef HAVE_TRACE -void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) -{ - x87_reflectcache(dyn, ninst, s1, s2, s3); - if(trace_emm) - mmx_reflectcache(dyn, ninst, s1); - if(trace_xmm) - sse_reflectcache(dyn, ninst, s1); -} -#endif - -void fpu_reset(dynarec_arm_t* dyn, int ninst) -{ - x87_reset(dyn, ninst); - mmx_reset(dyn, ninst); - sse_reset(dyn, ninst); - fpu_reset_reg(dyn); -} - -void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) -{ - MAYUSE(dyn); MAYUSE(ninst); - MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); - // PF: (((emu->x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0) - ANDw_mask(s3, s1, 0b011011, 0b000010); // mask=0xE0 - LSRw(s3, s3, 5); - MOV64x(s4, (uintptr_t)GetParityTab()); - LDRw_REG_LSL2(s4, s4, s3); - ANDw_mask(s3, s1, 0, 0b000100); //0x1f - LSRw_REG(s4, s4, s3); - MVNw_REG(s4, s4); - BFIw(xFlags, s4, F_PF, 1); -} diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h deleted file mode 100755 index d15902f4..00000000 --- a/src/dynarec/dynarec_arm64_helper.h +++ /dev/null @@ -1,1087 +0,0 @@ -#ifndef __DYNAREC_ARM64_HELPER_H__ -#define __DYNAREC_ARM64_HELPER_H__ - -// undef to get Close to SSE Float->int conversions -//#define PRECISE_CVT - -#if STEP == 0 -#include "dynarec_arm64_pass0.h" -#elif STEP == 1 -#include "dynarec_arm64_pass1.h" -#elif STEP == 2 -#include "dynarec_arm64_pass2.h" -#elif STEP == 3 -#include "dynarec_arm64_pass3.h" -#endif - -#include "debug.h" -#include "arm64_emitter.h" -#include "../emu/x64primop.h" - -#define F8 *(uint8_t*)(addr++) -#define F8S *(int8_t*)(addr++) -#define F16 *(uint16_t*)(addr+=2, addr-2) -#define F16S *(int16_t*)(addr+=2, addr-2) -#define F32 *(uint32_t*)(addr+=4, addr-4) -#define F32S *(int32_t*)(addr+=4, addr-4) -#define F32S64 (uint64_t)(int64_t)F32S -#define F64 *(uint64_t*)(addr+=8, addr-8) -#define PK(a) *(uint8_t*)(addr+a) -#define PK16(a) *(uint16_t*)(addr+a) -#define PK32(a) *(uint32_t*)(addr+a) -#define PK64(a) *(uint64_t*)(addr+a) -#define PKip(a) *(uint8_t*)(ip+a) - -// GETGD get x64 register in gd -#define GETGD gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3) -//GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI -#define GETED(D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ - LDRxw_U12(x1, wback, fixedaddress); \ - ed = x1; \ - } -#define GETEDx(D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \ - LDRx_U12(x1, wback, fixedaddress); \ - ed = x1; \ - } -#define GETEDw(D) if((nextop&0xC0)==0xC0) { \ - ed = xEAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ - LDRw_U12(x1, wback, fixedaddress); \ - ed = x1; \ - } -#define GETSEDw(D) if((nextop&0xC0)==0xC0) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - SXTWx(x1, ed); \ - wb = x1; \ - wback = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ - LDRSW_U12(x1, wback, fixedaddress); \ - wb = ed = x1; \ - } -#define GETED32(D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - addr = geted32(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ - LDRxw_U12(x1, wback, fixedaddress); \ - ed = x1; \ - } -#define GETSED32w(D) if((nextop&0xC0)==0xC0) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - SXTWx(x1, ed); \ - wb = x1; \ - wback = 0; \ - } else { \ - addr = geted32(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ - LDRSW_U12(x1, wback, fixedaddress); \ - wb = ed = x1; \ - } -//GETEDH can use hint for ed, and r1 or r2 for wback (depending on hint). wback is 0 if ed is xEAX..xEDI -#define GETEDH(hint, D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ - LDRxw_U12(hint, wback, fixedaddress); \ - ed = hint; \ - } -#define GETED32H(hint, D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - addr = geted32(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ - LDRxw_U12(hint, wback, fixedaddress); \ - ed = hint; \ - } -//GETEDW can use hint for wback and ret for ed. wback is 0 if ed is xEAX..xEDI -#define GETEDW(hint, ret, D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - MOVxw_REG(ret, ed); \ - wback = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ - ed = ret; \ - LDRxw_U12(ed, wback, fixedaddress); \ - } -#define GETED32W(hint, ret, D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - MOVxw_REG(ret, ed); \ - wback = 0; \ - } else { \ - addr = geted32(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ - ed = ret; \ - LDRxw_U12(ed, wback, fixedaddress); \ - } -// Write back ed in wback (if wback not 0) -#define WBACK if(wback) {STRxw_U12(ed, wback, fixedaddress);} -// Write back ed in wback (if wback not 0) -#define WBACKx if(wback) {STRx_U12(ed, wback, fixedaddress);} -// Write back ed in wback (if wback not 0) -#define WBACKw if(wback) {STRw_U12(ed, wback, fixedaddress);} -// Send back wb to either ed or wback -#define SBACK(wb) if(wback) {STRxw(wb, wback, fixedaddress);} else {MOVxw_REG(ed, wb);} -//GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI -#define GETEDO(O, D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, D); \ - LDRxw_REG(x1, wback, O); \ - ed = x1; \ - } -#define WBACKO(O) if(wback) {STRxw_REG(ed, wback, O);} -//GETEDOx can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI -#define GETEDOx(O, D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, D); \ - LDRx_REG(x1, wback, O); \ - ed = x1; \ - } -#define GETSEDOw(O, D) if((nextop&0xC0)==0xC0) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - SXTWx(x1, ed); \ - wb = x1; \ - wback = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, 0, D); \ - LDRSW_REG(x1, wback, O); \ - wb = ed = x1; \ - } -//FAKEELike GETED, but doesn't get anything -#define FAKEED if(!MODREG) { \ - addr = fakeed(dyn, addr, ninst, nextop); \ - } -// GETGW extract x64 register in gd, that is i -#define GETGW(i) gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); UXTHw(i, gd); gd = i; -// GETGW extract x64 register in gd, that is i, Signed extented -#define GETSGW(i) gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); SXTHw(i, gd); gd = i; -//GETEWW will use i for ed, and can use w for wback. -#define GETEWW(w, i, D) if(MODREG) { \ - wback = xRAX+(nextop&7)+(rex.b<<3);\ - UXTHw(i, wback); \ - ed = i; \ - wb1 = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, w, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, 0, D); \ - LDRH_U12(i, wback, fixedaddress); \ - ed = i; \ - wb1 = 1; \ - } -//GETEW will use i for ed, and can use r3 for wback. -#define GETEW(i, D) if(MODREG) { \ - wback = xRAX+(nextop&7)+(rex.b<<3);\ - UXTHw(i, wback); \ - ed = i; \ - wb1 = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, 0, D); \ - LDRH_U12(i, wback, fixedaddress); \ - ed = i; \ - wb1 = 1; \ - } -//GETSEW will use i for ed, and can use r3 for wback. This is the Signed version -#define GETSEW(i, D) if(MODREG) { \ - wback = xRAX+(nextop&7)+(rex.b<<3);\ - SXTHw(i, wback); \ - ed = i; \ - wb1 = 0; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, 0, D); \ - LDRSHx_U12(i, wback, fixedaddress);\ - ed = i; \ - wb1 = 1; \ - } -// Write ed back to original register / memory -#define EWBACK if(wb1) {STRH_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, 0, 16);} -// Write w back to original register / memory -#define EWBACKW(w) if(wb1) {STRH_U12(w, wback, fixedaddress);} else {BFIx(wback, w, 0, 16);} -// Write back gd in correct register -#define GWBACK BFIx((xRAX+((nextop&0x38)>>3)+(rex.r<<3)), gd, 0, 16); -//GETEB will use i for ed, and can use r3 for wback. -#define GETEB(i, D) if(MODREG) { \ - if(rex.rex) { \ - wback = xRAX+(nextop&7)+(rex.b<<3); \ - wb2 = 0; \ - } else { \ - wback = (nextop&7); \ - wb2 = (wback>>2)*8; \ - wback = xRAX+(wback&3); \ - } \ - UBFXx(i, wback, wb2, 8); \ - wb1 = 0; \ - ed = i; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, 0, D); \ - LDRB_U12(i, wback, fixedaddress); \ - wb1 = 1; \ - ed = i; \ - } -//GETEBO will use i for ed, i is also Offset, and can use r3 for wback. -#define GETEBO(i, D) if(MODREG) { \ - if(rex.rex) { \ - wback = xRAX+(nextop&7)+(rex.b<<3); \ - wb2 = 0; \ - } else { \ - wback = (nextop&7); \ - wb2 = (wback>>2)*8; \ - wback = xRAX+(wback&3); \ - } \ - UBFXx(i, wback, wb2, 8); \ - wb1 = 0; \ - ed = i; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, D); \ - ADDx_REG(wback, wback, i); \ - LDRB_U12(i, wback, fixedaddress); \ - wb1 = 1; \ - ed = i; \ - } -//GETSEB sign extend EB, will use i for ed, and can use r3 for wback. -#define GETSEB(i, D) if(MODREG) { \ - if(rex.rex) { \ - wback = xRAX+(nextop&7)+(rex.b<<3); \ - wb2 = 0; \ - } else { \ - wback = (nextop&7); \ - wb2 = (wback>>2)*8; \ - wback = xRAX+(wback&3); \ - } \ - SBFXx(i, wback, wb2, 8); \ - wb1 = 0; \ - ed = i; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, 0, D); \ - LDRSBx_U12(i, wback, fixedaddress); \ - wb1 = 1; \ - ed = i; \ - } -// Write eb (ed) back to original register / memory -#define EBBACK if(wb1) {STRB_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, wb2, 8);} -//GETGB will use i for gd -#define GETGB(i) if(rex.rex) { \ - gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3); \ - gb2 = 0; \ - } else { \ - gd = (nextop&0x38)>>3; \ - gb2 = ((gd&4)>>2); \ - gb1 = xRAX+(gd&3); \ - } \ - gd = i; \ - UBFXx(gd, gb1, gb2*8, 8); -//GETSGB signe extend GB, will use i for gd -#define GETSGB(i) if(rex.rex) { \ - gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3); \ - gb2 = 0; \ - } else { \ - gd = (nextop&0x38)>>3; \ - gb2 = ((gd&4)>>2); \ - gb1 = xRAX+(gd&3); \ - } \ - gd = i; \ - SBFXx(gd, gb1, gb2, 8); -// Write gb (gd) back to original register / memory -#define GBBACK BFIx(gb1, gd, gb2, 8); - -// Get Direction with size Z and based of F_DF flag, on register r ready for LDR/STR fetching -// F_DF is 1<<10, so 1 ROR 11*2 (so F_OF) -#define GETDIR(r, A) \ - MOV32w(r, A); /* mask=1<<10 */ \ - TSTw_mask(xFlags, 0b010110, 0); \ - CNEGx(r, r, cNE) - -// CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1) -// R0 will not be pushed/popd if ret is -2 -#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0) -// CALL_ will use x7 for the call address. Return value can be put in ret (unless ret is -1) -// R0 will not be pushed/popd if ret is -2 -#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x7, ret, 1, reg) -// CALL_S will use x7 for the call address. Return value can be put in ret (unless ret is -1) -// R0 will not be pushed/popd if ret is -2. Flags are not save/restored -#define CALL_S(F, ret) call_c(dyn, ninst, F, x7, ret, 0, 0) - -#define MARK dyn->insts[ninst].mark = dyn->arm_size -#define GETMARK dyn->insts[ninst].mark -#define MARK2 dyn->insts[ninst].mark2 = dyn->arm_size -#define GETMARK2 dyn->insts[ninst].mark2 -#define MARK3 dyn->insts[ninst].mark3 = dyn->arm_size -#define GETMARK3 dyn->insts[ninst].mark3 -#define MARKF dyn->insts[ninst].markf = dyn->arm_size -#define GETMARKF dyn->insts[ninst].markf -#define MARKSEG dyn->insts[ninst].markseg = dyn->arm_size -#define GETMARKSEG dyn->insts[ninst].markseg -#define MARKLOCK dyn->insts[ninst].marklock = dyn->arm_size -#define GETMARKLOCK dyn->insts[ninst].marklock - -// Branch to MARK if cond (use j64) -#define B_MARK(cond) \ - j64 = GETMARK-(dyn->arm_size); \ - Bcond(cond, j64) -// Branch to MARK unconditionnal (use j64) -#define B_MARK_nocond \ - j64 = GETMARK-(dyn->arm_size); \ - B(j64) -// Branch to MARK if reg is 0 (use j64) -#define CBZxw_MARK(reg) \ - j64 = GETMARK-(dyn->arm_size); \ - CBZxw(reg, j64) -// Branch to MARK if reg is not 0 (use j64) -#define CBNZx_MARK(reg) \ - j64 = GETMARK-(dyn->arm_size); \ - CBNZx(reg, j64) -// Branch to MARK if reg is not 0 (use j64) -#define CBNZw_MARK(reg) \ - j64 = GETMARK-(dyn->arm_size); \ - CBNZw(reg, j64) -// Test bit N of A and branch to MARK if not set -#define TBZ_MARK(A, N) \ - j64 = GETMARK-(dyn->arm_size); \ - TBZ(A, N, j64) -// Test bit N of A and branch to MARK if set -#define TBNZ_MARK(A, N) \ - j64 = GETMARK-(dyn->arm_size); \ - TBNZ(A, N, j64) -// Branch to MARK2 if cond (use j64) -#define B_MARK2(cond) \ - j64 = GETMARK2-(dyn->arm_size); \ - Bcond(cond, j64) -// Branch to MARK2 unconditionnal (use j64) -#define B_MARK2_nocond \ - j64 = GETMARK2-(dyn->arm_size); \ - B(j64) -// Branch to MARK2 if reg is not 0 (use j64) -#define CBNZx_MARK2(reg) \ - j64 = GETMARK2-(dyn->arm_size); \ - CBNZx(reg, j64) -// Test bit N of A and branch to MARK2 if set -#define TBNZ_MARK2(A, N) \ - j64 = GETMARK2-(dyn->arm_size); \ - TBNZ(A, N, j64) -// Branch to MARK3 if cond (use j64) -#define B_MARK3(cond) \ - j64 = GETMARK3-(dyn->arm_size); \ - Bcond(cond, j64) -// Test bit N of A and branch to MARK3 if not set -#define TBZ_MARK2(A, N) \ - j64 = GETMARK2-(dyn->arm_size); \ - TBZ(A, N, j64) -// Branch to MARK3 unconditionnal (use j64) -#define B_MARK3_nocond \ - j64 = GETMARK3-(dyn->arm_size); \ - B(j64) -// Branch to MARK3 if reg is not 0 (use j64) -#define CBNZx_MARK3(reg) \ - j64 = GETMARK3-(dyn->arm_size); \ - CBNZx(reg, j64) -// Branch to MARK3 if reg is 0 (use j64) -#define CBZx_MARK3(reg) \ - j64 = GETMARK3-(dyn->arm_size); \ - CBZx(reg, j64) -// Test bit N of A and branch to MARK3 if not set -#define TBZ_MARK3(A, N) \ - j64 = GETMARK3-(dyn->arm_size); \ - TBZ(A, N, j64) -// Test bit N of A and branch to MARK3 if set -#define TBNZ_MARK3(A, N) \ - j64 = GETMARK3-(dyn->arm_size); \ - TBNZ(A, N, j64) -// Branch to next instruction if cond (use j64) -#define B_NEXT(cond) \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ - Bcond(cond, j64) -// Branch to next instruction unconditionnal (use j64) -#define B_NEXT_nocond \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0;\ - B(j64) -// Branch to next instruction if reg is 0 (use j64) -#define CBZw_NEXT(reg) \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ - CBZw(reg, j64) -// Branch to next instruction if reg is 0 (use j64) -#define CBZx_NEXT(reg) \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ - CBZx(reg, j64) -// Branch to next instruction if reg is not 0 (use j64) -#define CBNZx_NEXT(reg) \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ - CBNZx(reg, j64) -// Test bit N of A and branch to next instruction if not set -#define TBZ_NEXT(A, N) \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ - TBZ(A, N, j64) -// Test bit N of A and branch to next instruction if set -#define TBNZ_NEXT(A, N) \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ - TBNZ(A, N, j64) -// Branch to MARKSEG if cond (use j64) -#define B_MARKSEG(cond) \ - j64 = GETMARKSEG-(dyn->arm_size); \ - Bcond(cond, j64) -// Branch to MARKSEG if reg is 0 (use j64) -#define CBZw_MARKSEG(reg) \ - j64 = GETMARKSEG-(dyn->arm_size); \ - CBZw(reg, j64) -// Branch to MARKSEG if reg is not 0 (use j64) -#define CBNZw_MARKSEG(reg) \ - j64 = GETMARKSEG-(dyn->arm_size); \ - CBNZw(reg, j64) -// Branch to MARKLOCK if cond (use j64) -#define B_MARKLOCK(cond) \ - j64 = GETMARKLOCK-(dyn->arm_size); \ - Bcond(cond, j64) -// Branch to MARKLOCK if reg is not 0 (use j64) -#define CBNZx_MARKLOCK(reg) \ - j64 = GETMARKLOCK-(dyn->arm_size); \ - CBNZx(reg, j64) - -#define IFX(A) if((dyn->insts[ninst].x64.need_flags&(A))) -#define IFX_PENDOR0 if((dyn->insts[ninst].x64.need_flags&(X_PEND) || !dyn->insts[ninst].x64.need_flags)) -#define IFXX(A) if((dyn->insts[ninst].x64.need_flags==(A))) -#define IFX2X(A, B) if((dyn->insts[ninst].x64.need_flags==(A) || dyn->insts[ninst].x64.need_flags==(B) || dyn->insts[ninst].x64.need_flags==((A)|(B)))) -#define IFXN(A, B) if((dyn->insts[ninst].x64.need_flags&(A) && !(dyn->insts[ninst].x64.need_flags&(B)))) - -// Generate FCOM with s1 and s2 scratch regs (the VCMP is already done) -#define FCOM(s1, s2, s3) \ - LDRH_U12(s3, xEmu, offsetof(x64emu_t, sw)); /*offset is 8bits right?*/\ - MOV32w(s1, 0b0100011100000000); \ - BICw_REG(s3, s3, s1); \ - CSETw(s1, cMI); /* 1 if less than, 0 else */ \ - MOV32w(s2, 0b01000101); /* unordered */ \ - CSELw(s1, s2, s1, cVS); \ - MOV32w(s2, 0b01000000); /* zero */ \ - CSELw(s1, s2, s1, cEQ); \ - /* greater than leave 0 */ \ - ORRw_REG_LSL(s3, s3, s1, 8); \ - STRH_U12(s3, xEmu, offsetof(x64emu_t, sw)) - -// Generate FCOMI with s1 and s2 scratch regs (the VCMP is already done) -#define FCOMI(s1, s2) \ - IFX(X_CF|X_PF|X_ZF|X_PEND) { \ - MOV32w(s2, 0b01000101); \ - BICw_REG(xFlags, xFlags, s2); \ - CSETw(s1, cMI); /* 1 if less than, 0 else */ \ - /*s2 already set */ /* unordered */ \ - CSELw(s1, s2, s1, cVS); \ - MOV32w(s2, 0b01000000); /* zero */ \ - CSELw(s1, s2, s1, cEQ); \ - /* greater than leave 0 */ \ - ORRw_REG(xFlags, xFlags, s1); \ - } \ - SET_DFNONE(s1); \ - IFX(X_OF|X_PEND) { \ - BFCw(xFlags, F_OF, 1); \ - } \ - IFX(X_AF|X_PEND) { \ - BFCw(xFlags, F_AF, 1); \ - } \ - IFX(X_SF|X_PEND) { \ - BFCw(xFlags, F_SF, 1); \ - } \ - - -#define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) -#define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) -#define LDP_REGS(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) -#define STORE_XEMU_REGS(A) \ - STORE_REG(RAX); \ - STORE_REG(RCX); \ - STORE_REG(RDX); \ - STORE_REG(RBX); \ - STORE_REG(RSP); \ - STORE_REG(RBP); \ - STORE_REG(RSI); \ - STORE_REG(RDI); \ - STORE_REG(R8); \ - STORE_REG(R9); \ - STORE_REG(R10); \ - STORE_REG(R11); \ - STORE_REG(R12); \ - STORE_REG(R13); \ - STORE_REG(R14); \ - STORE_REG(R15); \ - STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ - if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));} - -#define LOAD_REG(A) LDRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) -#define LOAD_XEMU_REGS(A) \ - LOAD_REG(RAX); \ - LOAD_REG(RCX); \ - LOAD_REG(RDX); \ - LOAD_REG(RBX); \ - LOAD_REG(RSP); \ - LOAD_REG(RBP); \ - LOAD_REG(RSI); \ - LOAD_REG(RDI); \ - LOAD_REG(R8); \ - LOAD_REG(R9); \ - LOAD_REG(R10); \ - LOAD_REG(R11); \ - LOAD_REG(R12); \ - LOAD_REG(R13); \ - LOAD_REG(R14); \ - LOAD_REG(R15); \ - LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ - if(A) {LDRx_U12(A, xEmu, offsetof(x64emu_t, ip));} - -#define STORE_XEMU_MINIMUM(A) \ - STORE_REG(RAX); \ - STORE_REG(RCX); \ - STORE_REG(RDX); \ - STORE_REG(RBX); \ - STORE_REG(RSP); \ - STORE_REG(RBP); \ - STORE_REG(RSI); \ - STORE_REG(RDI); \ - STORE_REG(R8); \ - STORE_REG(R9); \ - STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ - if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));} - -// Need to also store current value of some register, as they may be used by functions like setjump -// so RBX, RSP, RBP, R12..R15 (other are scratch or parameters), R10-R11 not usefull, but why not -// RBX, RSP and RBP are already saved in call function -#define STORE_XEMU_CALL(A) \ - STP_REGS(R10, R11); \ - STP_REGS(R12, R13); \ - STP_REGS(R14, R15); \ - if(A) {STPx_S7_offset(xFlags, A, xEmu, offsetof(x64emu_t, eflags));} \ - else {STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));} - -#define LOAD_XEMU_CALL(A) \ - if(A) {LDPx_S7_offset(xFlags, A, xEmu, offsetof(x64emu_t, eflags));} \ - else {LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));}; \ - if(A==xRIP) dyn->last_ip = 0 - -#define LOAD_XEMU_REM() \ - LDP_REGS(R10, R11); \ - LDP_REGS(R12, R13); \ - LDP_REGS(R14, R15) - -#define SET_DFNONE(S) if(!dyn->dfnone) {MOVZw(S, d_none); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=1;} -#define SET_DF(S, N) if((N)!=d_none) {MOVZw(S, (N)); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S) -#define SET_NODF() dyn->dfnone = 0 -#define SET_DFOK() dyn->dfnone = 1 - -#ifndef READFLAGS -#define READFLAGS(A) \ - if(((A)!=X_PEND) && dyn->state_flags!=SF_SET && dyn->state_flags!=SF_SET_PENDING) { \ - if(dyn->state_flags!=SF_PENDING) { \ - LDRw_U12(x3, xEmu, offsetof(x64emu_t, df)); \ - j64 = (GETMARKF)-(dyn->arm_size); \ - CBZw(x3, j64); \ - } \ - CALL_(UpdateFlags, -1, 0); \ - MARKF; \ - dyn->state_flags = SF_SET; \ - SET_DFOK(); \ - } -#endif -#ifndef SETFLAGS -#define SETFLAGS(A, B) \ - if(dyn->state_flags!=SF_SET && B==SF_SUBSET && (dyn->insts[ninst].x64.need_flags&(~((A)/*|X_PEND*/)))) \ - READFLAGS(dyn->insts[ninst].x64.need_flags&(~(A)|X_PEND)); \ - dyn->state_flags = (B==SF_SUBSET)?SF_SET: \ - ((B==SF_SET_PENDING && !(dyn->insts[ninst].x64.need_flags&X_PEND)?SF_SET:B)) - -#endif -#ifndef JUMP -#define JUMP(A) -#endif -#ifndef BARRIER -#define BARRIER(A) -#endif -#ifndef BARRIER_NEXT -#define BARRIER_NEXT(A) -#endif -#define UFLAG_OP1(A) if(dyn->insts[ninst].x64.need_flags) {STRxw_U12(A, xEmu, offsetof(x64emu_t, op1));} -#define UFLAG_OP2(A) if(dyn->insts[ninst].x64.need_flags) {STRxw_U12(A, xEmu, offsetof(x64emu_t, op2));} -#define UFLAG_OP12(A1, A2) if(dyn->insts[ninst].x64.need_flags) {STRxw_U12(A1, xEmu, offsetof(x64emu_t, op1));STRxw_U12(A2, 0, offsetof(x64emu_t, op2));} -#define UFLAG_RES(A) if(dyn->insts[ninst].x64.need_flags) {STRxw_U12(A, xEmu, offsetof(x64emu_t, res));} -#define UFLAG_DF(r, A) if(dyn->insts[ninst].x64.need_flags) {SET_DF(r, A)} -#define UFLAG_IF if(dyn->insts[ninst].x64.need_flags) -#ifndef DEFAULT -#define DEFAULT *ok = -1; BARRIER(2) -#endif -#ifndef NEW_BARRIER_INST -#define NEW_BARRIER_INST -#endif -#ifndef TABLE64 -#define TABLE64(A, V) -#endif -#ifndef FTABLE64 -#define FTABLE64(A, V) -#endif - -#if STEP < 2 -#define GETIP(A) -#define GETIP_(A) -#else -// put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later... -#define GETIP(A) \ - if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) { \ - uint64_t _delta_ip = (A)-dyn->last_ip; \ - dyn->last_ip += _delta_ip; \ - if(_delta_ip) { \ - ADDx_U12(xRIP, xRIP, _delta_ip); \ - } \ - } else { \ - dyn->last_ip = (A); \ - if(dyn->last_ip<0xffffffff) { \ - MOV64x(xRIP, dyn->last_ip); \ - } else \ - TABLE64(xRIP, dyn->last_ip); \ - } -#define GETIP_(A) \ - if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) { \ - uint64_t _delta_ip = (A)-dyn->last_ip; \ - if(_delta_ip) {ADDx_U12(xRIP, xRIP, _delta_ip);}\ - } else { \ - if((A)<0xffffffff) { \ - MOV64x(xRIP, (A)); \ - } else \ - TABLE64(xRIP, (A)); \ - } -#endif - -#if STEP < 2 -#define PASS2IF(A, B) if(A) -#elif STEP == 2 -#define PASS2IF(A, B) if(A) dyn->insts[ninst].pass2choice = B; if(dyn->insts[ninst].pass2choice == B) -#else -#define PASS2IF(A, B) if(dyn->insts[ninst].pass2choice == B) -#endif - -#define MODREG ((nextop&0xC0)==0xC0) - -void arm64_epilog(); -void* arm64_next(x64emu_t* emu, uintptr_t addr); - -#ifndef STEPNAME -#define STEPNAME3(N,M) N##M -#define STEPNAME2(N,M) STEPNAME3(N,M) -#define STEPNAME(N) STEPNAME2(N, STEP) -#endif - -#define arm_pass STEPNAME(arm_pass) - -#define dynarec64_00 STEPNAME(dynarec64_00) -#define dynarec64_0F STEPNAME(dynarec64_0F) -#define dynarec64_64 STEPNAME(dynarec64_64) -#define dynarec64_65 STEPNAME(dynarec64_65) -#define dynarec64_66 STEPNAME(dynarec64_66) -#define dynarec64_67 STEPNAME(dynarec64_67) -#define dynarec64_D8 STEPNAME(dynarec64_D8) -#define dynarec64_D9 STEPNAME(dynarec64_D9) -#define dynarec64_DA STEPNAME(dynarec64_DA) -#define dynarec64_DB STEPNAME(dynarec64_DB) -#define dynarec64_DC STEPNAME(dynarec64_DC) -#define dynarec64_DD STEPNAME(dynarec64_DD) -#define dynarec64_DE STEPNAME(dynarec64_DE) -#define dynarec64_DF STEPNAME(dynarec64_DF) -#define dynarec64_F0 STEPNAME(dynarec64_F0) -#define dynarec64_660F STEPNAME(dynarec64_660F) -#define dynarec64_6664 STEPNAME(dynarec64_6664) -#define dynarec64_F20F STEPNAME(dynarec64_F20F) -#define dynarec64_F30F STEPNAME(dynarec64_F30F) - -#define geted STEPNAME(geted) -#define geted32 STEPNAME(geted32) -#define geted16 STEPNAME(geted16) -#define jump_to_epilog STEPNAME(jump_to_epilog) -#define jump_to_next STEPNAME(jump_to_next) -#define ret_to_epilog STEPNAME(ret_to_epilog) -#define retn_to_epilog STEPNAME(retn_to_epilog) -#define iret_to_epilog STEPNAME(iret_to_epilog) -#define call_c STEPNAME(call_c) -#define call_n STEPNAME(call_n) -#define grab_segdata STEPNAME(grab_segdata) -#define emit_cmp8 STEPNAME(emit_cmp8) -#define emit_cmp16 STEPNAME(emit_cmp16) -#define emit_cmp32 STEPNAME(emit_cmp32) -#define emit_cmp8_0 STEPNAME(emit_cmp8_0) -#define emit_cmp16_0 STEPNAME(emit_cmp16_0) -#define emit_cmp32_0 STEPNAME(emit_cmp32_0) -#define emit_test8 STEPNAME(emit_test8) -#define emit_test16 STEPNAME(emit_test16) -#define emit_test32 STEPNAME(emit_test32) -#define emit_add32 STEPNAME(emit_add32) -#define emit_add32c STEPNAME(emit_add32c) -#define emit_add8 STEPNAME(emit_add8) -#define emit_add8c STEPNAME(emit_add8c) -#define emit_sub32 STEPNAME(emit_sub32) -#define emit_sub32c STEPNAME(emit_sub32c) -#define emit_sub8 STEPNAME(emit_sub8) -#define emit_sub8c STEPNAME(emit_sub8c) -#define emit_or32 STEPNAME(emit_or32) -#define emit_or32c STEPNAME(emit_or32c) -#define emit_xor32 STEPNAME(emit_xor32) -#define emit_xor32c STEPNAME(emit_xor32c) -#define emit_and32 STEPNAME(emit_and32) -#define emit_and32c STEPNAME(emit_and32c) -#define emit_or8 STEPNAME(emit_or8) -#define emit_or8c STEPNAME(emit_or8c) -#define emit_xor8 STEPNAME(emit_xor8) -#define emit_xor8c STEPNAME(emit_xor8c) -#define emit_and8 STEPNAME(emit_and8) -#define emit_and8c STEPNAME(emit_and8c) -#define emit_add16 STEPNAME(emit_add16) -#define emit_add16c STEPNAME(emit_add16c) -#define emit_sub16 STEPNAME(emit_sub16) -#define emit_sub16c STEPNAME(emit_sub16c) -#define emit_or16 STEPNAME(emit_or16) -#define emit_or16c STEPNAME(emit_or16c) -#define emit_xor16 STEPNAME(emit_xor16) -#define emit_xor16c STEPNAME(emit_xor16c) -#define emit_and16 STEPNAME(emit_and16) -#define emit_and16c STEPNAME(emit_and16c) -#define emit_inc32 STEPNAME(emit_inc32) -#define emit_inc16 STEPNAME(emit_inc16) -#define emit_inc8 STEPNAME(emit_inc8) -#define emit_dec32 STEPNAME(emit_dec32) -#define emit_dec16 STEPNAME(emit_dec16) -#define emit_dec8 STEPNAME(emit_dec8) -#define emit_adc32 STEPNAME(emit_adc32) -#define emit_adc32c STEPNAME(emit_adc32c) -#define emit_adc8 STEPNAME(emit_adc8) -#define emit_adc8c STEPNAME(emit_adc8c) -#define emit_adc16 STEPNAME(emit_adc16) -#define emit_adc16c STEPNAME(emit_adc16c) -#define emit_sbb32 STEPNAME(emit_sbb32) -#define emit_sbb32c STEPNAME(emit_sbb32c) -#define emit_sbb8 STEPNAME(emit_sbb8) -#define emit_sbb8c STEPNAME(emit_sbb8c) -#define emit_sbb16 STEPNAME(emit_sbb16) -#define emit_sbb16c STEPNAME(emit_sbb16c) -#define emit_neg32 STEPNAME(emit_neg32) -#define emit_neg16 STEPNAME(emit_neg16) -#define emit_neg8 STEPNAME(emit_neg8) -#define emit_shl32 STEPNAME(emit_shl32) -#define emit_shl32c STEPNAME(emit_shl32c) -#define emit_shr32 STEPNAME(emit_shr32) -#define emit_shr32c STEPNAME(emit_shr32c) -#define emit_sar32c STEPNAME(emit_sar32c) -#define emit_rol32c STEPNAME(emit_rol32c) -#define emit_ror32c STEPNAME(emit_ror32c) -#define emit_shrd32c STEPNAME(emit_shrd32c) -#define emit_shld32c STEPNAME(emit_shld32c) - -#define emit_pf STEPNAME(emit_pf) - -#define x87_do_push STEPNAME(x87_do_push) -#define x87_do_push_empty STEPNAME(x87_do_push_empty) -#define x87_do_pop STEPNAME(x87_do_pop) -#define x87_get_cache STEPNAME(x87_get_cache) -#define x87_get_st STEPNAME(x87_get_st) -#define x87_refresh STEPNAME(x87_refresh) -#define x87_forget STEPNAME(x87_forget) -#define x87_reget_st STEPNAME(x87_reget_st) -#define x87_stackcount STEPNAME(x87_stackcount) -#define x87_setround STEPNAME(x87_setround) -#define x87_restoreround STEPNAME(x87_restoreround) -#define sse_setround STEPNAME(sse_setround) -#define mmx_get_reg STEPNAME(mmx_get_reg) -#define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty) -#define sse_get_reg STEPNAME(sse_get_reg) -#define sse_get_reg_empty STEPNAME(sse_get_reg_empty) -#define sse_forget_reg STEPNAME(sse_forget_reg) -#define sse_purge07cache STEPNAME(sse_purge07cache) - -#define fpu_pushcache STEPNAME(fpu_pushcache) -#define fpu_popcache STEPNAME(fpu_popcache) -#define fpu_reset STEPNAME(fpu_reset) -#define fpu_purgecache STEPNAME(fpu_purgecache) -#define mmx_purgecache STEPNAME(mmx_purgecache) -#define x87_purgecache STEPNAME(x87_purgecache) -#ifdef HAVE_TRACE -#define fpu_reflectcache STEPNAME(fpu_reflectcache) -#endif - -/* setup r2 to address pointed by */ -uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta); - -/* setup r2 to address pointed by */ -uintptr_t geted32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta); - -/* setup r2 to address pointed by */ -uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, int s); - - -// generic x64 helper -void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst); -void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst); -void ret_to_epilog(dynarec_arm_t* dyn, int ninst); -void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n); -void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits); -void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); -void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w); -void grab_segdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg, int segment); -void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); -void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); -void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); -void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -void emit_cmp16_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); -void emit_test8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); -void emit_test16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); -void emit_test32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_add32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_add32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); -void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); -void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); -void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); -void emit_xor32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_xor32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); -void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); -void emit_or8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_xor8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_add16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_sub16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_sub16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_or16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_or16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_xor16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_xor16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_and16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_and16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_inc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); -void emit_inc16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -void emit_inc8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); -void emit_dec16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -void emit_dec8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -//void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); -void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -//void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); -void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); -void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); -void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); -void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); -void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); -void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); -void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4); -void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4); - -void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); - -// x87 helper -// cache of the local stack counter, to avoid upadte at every call -void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch); -// fpu push. Return the Dd value to be used -int x87_do_push(dynarec_arm_t* dyn, int ninst); -// fpu push. Do not allocate a cache register. Needs a scratch register to do x87stack synch (or 0 to not do it) -void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1); -// fpu pop. All previous returned Dd should be considered invalid -void x87_do_pop(dynarec_arm_t* dyn, int ninst); -// get cache index for a x87 reg, create the entry if needed -int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a); -// get vfpu register for a x87 reg, create the entry if needed -int x87_get_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a); -// refresh a value from the cache ->emu (nothing done if value is not cached) -void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st); -// refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached) -void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st); -// refresh the cache value from emu -void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st); -// Set rounding according to cw flags, return reg to restore flags -int x87_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); -// Restore round flag -void x87_restoreround(dynarec_arm_t* dyn, int ninst, int s1); -// Set rounding according to mxcsr flags, return reg to restore flags -int sse_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); - -//MMX helpers -// get neon register for a MMX reg, create the entry if needed -int mmx_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a); -// get neon register for a MMX reg, but don't try to synch it if it needed to be created -int mmx_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a); - -//SSE/SSE2 helpers -// get neon register for a SSE reg, create the entry if needed -int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a); -// get neon register for a SSE reg, but don't try to synch it if it needed to be created -int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a); -// forget neon register for a SSE reg, create the entry if needed -void sse_forget_reg(dynarec_arm_t* dyn, int ninst, int a); -// purge the XMM0..XMM7 cache (before function call) -void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1); - -// common coproc helpers -// reset the cache -void fpu_reset(dynarec_arm_t* dyn, int ninst); -// purge the FPU cache (needs 3 scratch registers) -void fpu_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); -// purge MMX cache -void mmx_purgecache(dynarec_arm_t* dyn, int ninst, int s1); -// purge x87 cache -void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); -#ifdef HAVE_TRACE -void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); -#endif -void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not07); -void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not07); - -uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog); -//uintptr_t dynarec64_65(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep,int* ok, int* need_epilog); -uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -//uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -//uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); -uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); - -#if STEP < 2 -#define PASS2(A) -#else -#define PASS2(A) A -#endif - -#if STEP < 3 -#define PASS3(A) -#else -#define PASS3(A) A -#endif - -#if STEP < 3 -#define MAYUSE(A) (void)A -#else -#define MAYUSE(A) -#endif - -#define GOCOND(B, T1, T2) \ - case B+0x0: \ - INST_NAME(T1 "O " T2); \ - GO( TSTw_mask(xFlags, 0b010101, 0) \ - , cEQ, cNE, X_OF) \ - break; \ - case B+0x1: \ - INST_NAME(T1 "NO " T2); \ - GO( TSTw_mask(xFlags, 0b010101, 0) \ - , cNE, cEQ, X_OF) \ - break; \ - case B+0x2: \ - INST_NAME(T1 "C " T2); \ - GO( TSTw_mask(xFlags, 0, 0) \ - , cEQ, cNE, X_CF) \ - break; \ - case B+0x3: \ - INST_NAME(T1 "NC " T2); \ - GO( TSTw_mask(xFlags, 0, 0) \ - , cNE, cEQ, X_CF) \ - break; \ - case B+0x4: \ - INST_NAME(T1 "Z " T2); \ - GO( TSTw_mask(xFlags, 0b011010, 0) \ - , cEQ, cNE, X_ZF) \ - break; \ - case B+0x5: \ - INST_NAME(T1 "NZ " T2); \ - GO( TSTw_mask(xFlags, 0b011010, 0) \ - , cNE, cEQ, X_ZF) \ - break; \ - case B+0x6: \ - INST_NAME(T1 "BE " T2); \ - GO( MOV32w(x1, (1< -#include -#include -#include -#include -#include - -#include "debug.h" -#include "box64context.h" -#include "dynarec.h" -#include "emu/x64emu_private.h" -#include "emu/x64run_private.h" -#include "x64run.h" -#include "x64emu.h" -#include "box64stack.h" -#include "emu/x64run_private.h" -#include "x64trace.h" -#include "dynablock.h" -#include "dynarec_arm64.h" -#include "dynarec_arm64_private.h" -#include "arm64_printer.h" -#include "dynarec_arm64_functions.h" -#include "dynarec_arm64_helper.h" -#include "custommem.h" - -#ifndef STEP -#error No STEP defined -#endif - -uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr) -{ - int ok = 1; - int ninst = 0; - uintptr_t ip = addr; - uintptr_t init_addr = addr; - rex_t rex; - int rep; // 0 none, 1=F2 prefix, 2=F3 prefix - int need_epilog = 1; - dyn->sons_size = 0; - // Clean up (because there are multiple passes) - dyn->state_flags = 0; - dyn->dfnone = 0; - dyn->last_ip = ip; // RIP is always set at start of block! - MAYUSE(init_addr); - fpu_reset(dyn, ninst); - // ok, go now - INIT; - while(ok) { - ip = addr; - if((dyn->insts[ninst].x64.barrier==1)) { - dyn->last_ip = 0; - NEW_BARRIER_INST; - } - NEW_INST; - fpu_reset_scratch(dyn); -#ifdef HAVE_TRACE - if(my_context->dec && box64_dynarec_trace) { - if((trace_end == 0) - || ((ip >= trace_start) && (ip < trace_end))) { - MESSAGE(LOG_DUMP, "TRACE ----\n"); - fpu_reflectcache(dyn, ninst, x1, x2, x3); - GETIP(ip); - MOVx_REG(x1, xRIP); - STORE_XEMU_CALL(xRIP); - MOV32w(x2, 1); - CALL(PrintTrace, -1); - LOAD_XEMU_CALL(xRIP); - MESSAGE(LOG_DUMP, "----------\n"); - } - } -#endif - - rep = 0; - uint8_t pk = PK(0); - while((pk==0xF2) || (pk==0xF3)) { - rep = pk-0xF1; - ++addr; - pk = PK(0); - } - while(pk==0x3E) { //Branch Taken Hint ignored - ++addr; - pk = PK(0); - } - rex.rex = 0; - while(pk>=0x40 && pk<=0x4f) { - rex.rex = pk; - ++addr; - pk = PK(0); - } - - addr = dynarec64_00(dyn, addr, ip, ninst, rex, rep, &ok, &need_epilog); - - INST_EPILOG; - - if(dyn->insts[ninst+1].x64.barrier) { - fpu_purgecache(dyn, ninst, x1, x2, x3); - if(dyn->insts[ninst+1].x64.barrier!=2) { - dyn->state_flags = 0; - dyn->dfnone = 0; - } - } - #if STEP == 0 - if(!ok && !need_epilog && box64_dynarec_bigblock && getProtection(addr+3)&~PROT_CUSTOM && !IsInHotPage(addr+3)) - if(*(uint32_t*)addr!=0) { // check if need to continue (but is next 4 bytes are 0, stop) - uintptr_t next = get_closest_next(dyn, addr); - if(next && ( - (((next-addr)<15) && is_nops(dyn, addr, next-addr)) - ||(((next-addr)<30) && is_instructions(dyn, addr, next-addr)) )) - { - dynarec_log(LOG_DEBUG, "Extend block %p, %p -> %p (ninst=%d)\n", dyn, (void*)addr, (void*)next, ninst); - ok = 1; - } else if(next && (next-addr)<30) { - dynarec_log(LOG_DEBUG, "Cannot extend block %p -> %p (%02X %02X %02X %02X %02X %02X %02X %02x)\n", (void*)addr, (void*)next, PK(0), PK(1), PK(2), PK(3), PK(4), PK(5), PK(6), PK(7)); - } - } - #else - if(!ok && !need_epilog && (addr < (dyn->start+dyn->isize))) { - ok = 1; - } - #endif - if(ok<0) {ok = 0; need_epilog=1;} - ++ninst; - #if STEP == 0 - if(ok && !isJumpTableDefault64((void*)addr)) - #else - if(ok && (ninst==dyn->size)) - #endif - { - #if STEP == 3 - dynarec_log(LOG_DEBUG, "Stopping block %p (%d / %d)\n",(void*)init_addr, ninst, dyn->size); - #endif - BARRIER(2); - fpu_purgecache(dyn, ninst, x1, x2, x3); - jump_to_next(dyn, addr, 0, ninst); - ok=0; need_epilog=0; - } - } - if(need_epilog) { - fpu_purgecache(dyn, ninst, x1, x2, x3); - jump_to_epilog(dyn, ip, 0, ninst); // no linker here, it's an unknow instruction - } - FINI; - MESSAGE(LOG_DUMP, "---- END OF BLOCK ---- (%d, %d sons)\n", dyn->size, dyn->sons_size); - return addr; -} \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_pass0.h b/src/dynarec/dynarec_arm64_pass0.h deleted file mode 100755 index d4818ac5..00000000 --- a/src/dynarec/dynarec_arm64_pass0.h +++ /dev/null @@ -1,39 +0,0 @@ - -#define INIT uintptr_t sav_addr=addr -#define FINI \ - dyn->isize = addr-sav_addr; \ - dyn->insts[ninst].x64.addr = addr; \ - if(ninst) dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr - -#define MESSAGE(A, ...) -#define SETFLAGS(A, B) -#define READFLAGS(A) -#define EMIT(A) -#define JUMP(A) add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A -#define BARRIER(A) dyn->insts[ninst].x64.barrier = A -#define BARRIER_NEXT(A) if(ninstsize) dyn->insts[ninst+1].x64.barrier = A -#define NEW_INST \ - if(dyn->size+3>=dyn->cap) { \ - dyn->insts = (instruction_arm64_t*)realloc(dyn->insts, sizeof(instruction_arm64_t)*dyn->cap*2); \ - memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_arm64_t)*dyn->cap); \ - dyn->cap *= 2; \ - } \ - ++dyn->size; \ - dyn->insts[ninst].x64.addr = ip; \ - if(ninst) dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr -#define INST_EPILOG -#define INST_NAME(name) -#define DEFAULT \ - --dyn->size; \ - *ok = -1; \ - if(box64_dynarec_log>=LOG_INFO) {\ - dynarec_log(LOG_NONE, "%p: Dynarec stopped because of Opcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ - (void*)ip, PKip(0), \ - PKip(1), PKip(2), PKip(3), \ - PKip(4), PKip(5), PKip(6), \ - PKip(7), PKip(8), PKip(9), \ - PKip(10),PKip(11),PKip(12), \ - PKip(13),PKip(14)); \ - printFunctionAddr(ip, " => "); \ - dynarec_log(LOG_NONE, "\n"); \ - } diff --git a/src/dynarec/dynarec_arm64_pass1.h b/src/dynarec/dynarec_arm64_pass1.h deleted file mode 100755 index a4abcf19..00000000 --- a/src/dynarec/dynarec_arm64_pass1.h +++ /dev/null @@ -1,10 +0,0 @@ -#define INIT -#define FINI -#define MESSAGE(A, ...) -#define EMIT(A) -#define READFLAGS(A) dyn->insts[ninst].x64.use_flags = A -#define SETFLAGS(A,B) {dyn->insts[ninst].x64.set_flags = A; dyn->insts[ninst].x64.state_flags = B;} - -#define NEW_INST -#define INST_EPILOG -#define INST_NAME(name) diff --git a/src/dynarec/dynarec_arm64_pass2.h b/src/dynarec/dynarec_arm64_pass2.h deleted file mode 100755 index 1be5a0de..00000000 --- a/src/dynarec/dynarec_arm64_pass2.h +++ /dev/null @@ -1,11 +0,0 @@ -#define INIT dyn->arm_size = 0 -#define FINI if(ninst) {dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);} - -#define MESSAGE(A, ...) -#define EMIT(A) dyn->insts[ninst].size+=4; dyn->arm_size+=4 -#define NEW_INST if(ninst) {dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);} -#define INST_EPILOG dyn->insts[ninst].epilog = dyn->arm_size; -#define INST_NAME(name) -#define NEW_BARRIER_INST if(ninst) ++dyn->sons_size -#define TABLE64(A, V) {Table64(dyn, (V)); EMIT(0);} -#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; Table64(dyn, v.q); EMIT(0);} \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_pass3.h b/src/dynarec/dynarec_arm64_pass3.h deleted file mode 100755 index 25b852e3..00000000 --- a/src/dynarec/dynarec_arm64_pass3.h +++ /dev/null @@ -1,36 +0,0 @@ -#define INIT -#define FINI -#define EMIT(A) \ - if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), arm64_print(A, (uintptr_t)dyn->block));} \ - *(uint32_t*)(dyn->block) = (uint32_t)(A); \ - dyn->block += 4; dyn->arm_size += 4;\ - dyn->insts[ninst].size2 += 4 - -#define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__) -#define NEW_INST -#define INST_EPILOG -#define INST_NAME(name) \ - if(box64_dynarec_dump) {\ - printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); \ - dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, state=%d/%d, set=%X, use=%X, need=%X%s\n", \ - (box64_dynarec_dump>1)?"\e[32m":"", \ - (void*)(dyn->arm_start+dyn->insts[ninst].address), \ - dyn->insts[ninst].size/4, \ - dyn->insts[ninst].x64.state_flags, \ - dyn->state_flags, \ - dyn->insts[ninst].x64.set_flags, \ - dyn->insts[ninst].x64.use_flags, \ - dyn->insts[ninst].x64.need_flags, \ - (box64_dynarec_dump>1)?"\e[m":""); \ - } - -#define NEW_BARRIER_INST \ - if(ninst) { \ - dyn->sons_x64[dyn->sons_size] = (uintptr_t)ip; \ - dyn->sons_arm[dyn->sons_size] = dyn->block; \ - MESSAGE(LOG_DUMP, "----> potential Son here\n");\ - ++dyn->sons_size; \ - } - -#define TABLE64(A, V) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} -#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q); MESSAGE(LOG_DUMP, " FTable64: %g\n", v.d); VLDR64_literal(A, val64offset);} \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_private.h b/src/dynarec/dynarec_arm64_private.h deleted file mode 100755 index cae1b4bd..00000000 --- a/src/dynarec/dynarec_arm64_private.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef __DYNAREC_ARM_PRIVATE_H_ -#define __DYNAREC_ARM_PRIVATE_H_ - -#include "dynarec_private.h" - -typedef struct x64emu_s x64emu_t; -typedef struct dynablock_s dynablock_t; - -typedef struct instruction_arm64_s { - instruction_x64_t x64; - uintptr_t address; // (start) address of the arm emited instruction - uintptr_t epilog; // epilog of current instruction (can be start of next, of barrier stuff) - int size; // size of the arm emited instruction - int size2; // size of the arm emited instrucion after pass2 - uintptr_t mark, mark2, mark3; - uintptr_t markf; - uintptr_t markseg; - uintptr_t marklock; - int pass2choice;// value for choices that are fixed on pass2 for pass3 - uintptr_t natcall; - int retn; -} instruction_arm64_t; - -typedef struct dynarec_arm_s { - instruction_arm64_t *insts; - int32_t size; - int32_t cap; - uintptr_t start; // start of the block - uint32_t isize; // size in byte of x64 instructions included - void* block; // memory pointer where next instruction is emited - uintptr_t arm_start; // start of the arm code - size_t arm_size; // size of emitted arm code - int state_flags;// actual state for on-demand flags - uintptr_t last_ip; // last set IP in RIP (or NULL if unclean state) - int8_t x87cache[8];// cache status for the 8 x87 register behind the fpu stack - int8_t x87reg[8]; // reg used for x87cache entry - int8_t mmxcache[8];// cache status for the 8 MMX registers - int8_t ssecache[16];// cache status for the 16 SSE(2) registers - int8_t fpuused[32];// all 8..31 Q reg from fpu, used by x87, sse and mmx - int x87stack; // cache stack counter - int fpu_scratch;// scratch counter - int fpu_reg; // x87/sse/mmx reg counter - int dfnone; // if defered flags is already set to df_none - uint64_t *table64; // table of 64bits value - int table64size;// size of table (will be appended at end of executable code) - int table64cap; - uintptr_t tablestart; - uintptr_t* next; // variable array of "next" jump address - int next_sz; - int next_cap; - uintptr_t* sons_x64; // the x64 address of potential dynablock sons - void** sons_arm; // the arm address of potential dynablock sons - int sons_size; // number of potential dynablock sons - dynablock_t* dynablock; -} dynarec_arm_t; - -void add_next(dynarec_arm_t *dyn, uintptr_t addr); -uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr); -int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n); -int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n); - -int Table64(dynarec_arm_t *dyn, uint64_t val); // add a value to etable64 (if needed) and gives back the imm19 to use in LDR_literal - -#endif //__DYNAREC_ARM_PRIVATE_H_ diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c new file mode 100755 index 00000000..2424e5ca --- /dev/null +++ b/src/dynarec/dynarec_native.c @@ -0,0 +1,535 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "custommem.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynablock.h" +#include "dynablock_private.h" +#include "elfloader.h" + +#include "dynarec_native.h" +#include "dynarec_arch.h" + +void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name) { + uint8_t *ip = (uint8_t*)inst->addr; + if(ip[0]==0xcc && ip[1]=='S' && ip[2]=='C') { + uintptr_t a = *(uintptr_t*)(ip+3); + if(a==0) { + dynarec_log(LOG_NONE, "%s%p: Exit x64emu%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (box64_dynarec_dump>1)?"\e[m":""); + } else { + dynarec_log(LOG_NONE, "%s%p: Native call to %p%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (void*)a, (box64_dynarec_dump>1)?"\e[m":""); + } + } else { + if(dec) { + dynarec_log(LOG_NONE, "%s%p: %s", (box64_dynarec_dump>1)?"\e[1m":"", ip, DecodeX64Trace(dec, inst->addr)); + } else { + dynarec_log(LOG_NONE, "%s%p: ", (box64_dynarec_dump>1)?"\e[1m":"", ip); + for(int i=0; isize; ++i) { + dynarec_log(LOG_NONE, "%02X ", ip[i]); + } + dynarec_log(LOG_NONE, " %s", name); + } + // print Call function name if possible + if(ip[0]==0xE8 || ip[0]==0xE9) { // Call / Jmp + uintptr_t nextaddr = (uintptr_t)ip + 5 + *((int32_t*)(ip+1)); + printFunctionAddr(nextaddr, "=> "); + } else if(ip[0]==0xFF) { + if(ip[1]==0x25) { + uintptr_t nextaddr = (uintptr_t)ip + 6 + *((int32_t*)(ip+2)); + printFunctionAddr(nextaddr, "=> "); + } + } + // end of line and colors + dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); + } +} + +void add_next(dynarec_arm_t *dyn, uintptr_t addr) { + if(!box64_dynarec_bigblock) + return; + for(int i=0; inext_sz; ++i) + if(dyn->next[i]==addr) + return; + if(dyn->next_sz == dyn->next_cap) { + dyn->next_cap += 16; + dyn->next = (uintptr_t*)realloc(dyn->next, dyn->next_cap*sizeof(uintptr_t)); + } + dyn->next[dyn->next_sz++] = addr; +} +uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr) { + // get closest, but no addresses befores + uintptr_t best = 0; + int i = 0; + while((inext_sz) && (best!=addr)) { + if(dyn->next[i]next+i, dyn->next+i+1, (dyn->next_sz-i-1)*sizeof(uintptr_t)); + --dyn->next_sz; + } else { + if((dyn->next[i]next[i]; + ++i; + } + } + return best; +} +#define PK(A) (*((uint8_t*)(addr+(A)))) +int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n) +{ + if(!n) + return 1; + if(PK(0)==0x90) + return is_nops(dyn, addr+1, n-1); + if(n>1 && PK(0)==0x66) // if opcode start with 0x66, and there is more after, than is *can* be a NOP + return is_nops(dyn, addr+1, n-1); + if(n>2 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x00) + return is_nops(dyn, addr+3, n-3); + if(n>2 && PK(0)==0x8d && PK(1)==0x76 && PK(2)==0x00) // lea esi, [esi] + return is_nops(dyn, addr+3, n-3); + if(n>3 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x40 && PK(3)==0x00) + return is_nops(dyn, addr+4, n-4); + if(n>3 && PK(0)==0x8d && PK(1)==0x74 && PK(2)==0x26 && PK(3)==0x00) + return is_nops(dyn, addr+4, n-4); + if(n>4 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x44 && PK(3)==0x00 && PK(4)==0x00) + return is_nops(dyn, addr+5, n-5); + if(n>5 && PK(0)==0x8d && PK(1)==0xb6 && PK(2)==0x00 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00) + return is_nops(dyn, addr+6, n-6); + if(n>6 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x80 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) + return is_nops(dyn, addr+7, n-7); + if(n>6 && PK(0)==0x8d && PK(1)==0xb4 && PK(2)==0x26 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) // lea esi, [esi+0] + return is_nops(dyn, addr+7, n-7); + if(n>7 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x84 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00 && PK(7)==0x00) + return is_nops(dyn, addr+8, n-8); + return 0; +} + +// return size of next instuciton, -1 is unknown +// not all instrction are setup +int next_instruction(dynarec_arm_t *dyn, uintptr_t addr) +{ + uint8_t opcode = PK(0); + uint8_t nextop; + switch (opcode) { + case 0x66: + opcode = PK(1); + switch(opcode) { + case 0x90: + return 2; + } + break; + case 0x81: + nextop = PK(1); + return fakeed(dyn, addr+2, 0, nextop)-addr + 4; + case 0x83: + nextop = PK(1); + return fakeed(dyn, addr+2, 0, nextop)-addr + 1; + case 0x84: + case 0x85: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + nextop = PK(1); + return fakeed(dyn, addr+2, 0, nextop)-addr; + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + return 1; + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + return 5; + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + return 2; + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + return 5; + case 0xFF: + nextop = PK(1); + switch((nextop>>3)&7) { + case 0: // INC Ed + case 1: //DEC Ed + case 2: // CALL Ed + case 4: // JMP Ed + case 6: // Push Ed + return fakeed(dyn, addr+2, 0, nextop)-addr; + } + break; + default: + break; + } + return -1; +} +#undef PK + +int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n) +{ + int i = 0; + while(isize) + return X_PEND; // no more instructions, or too many jmp loop, stop + + uint32_t needed = dyn->insts[ninst].x64.use_flags; + if(needed) { + setf &= ~needed; + if(!setf) // all flags already used, no need to continue + return needed; + } + + if(!needed && !dyn->insts[ninst].x64.set_flags && !dyn->insts[ninst].x64.jmp_insts) { + int start = ninst; + int end = ninst; + while(endsize && !dyn->insts[end].x64.use_flags && !dyn->insts[end].x64.set_flags && !dyn->insts[end].x64.jmp_insts) + ++end; + needed = needed_flags(dyn, end, setf, recurse); + for(int i=start; iinsts[i].x64.need_flags = needed; + return needed; + } + + if(dyn->insts[ninst].x64.set_flags && (dyn->insts[ninst].x64.state_flags!=SF_MAYSET)) { + if((setf & ~dyn->insts[ninst].x64.set_flags) == 0) + return needed; // all done, gives all the flags needed + setf |= dyn->insts[ninst].x64.set_flags; // add new flags to continue + } + + int jinst = dyn->insts[ninst].x64.jmp_insts; + if(dyn->insts[ninst].x64.jmp) { + dyn->insts[ninst].x64.need_flags = (jinst==-1)?X_PEND:needed_flags(dyn, jinst, setf, recurse+1); + if(dyn->insts[ninst].x64.use_flags) // conditionnal jump + dyn->insts[ninst].x64.need_flags |= needed_flags(dyn, ninst+1, setf, recurse); + } else + dyn->insts[ninst].x64.need_flags = needed_flags(dyn, ninst+1, setf, recurse); + if(dyn->insts[ninst].x64.state_flags==SF_MAYSET) + needed |= dyn->insts[ninst].x64.need_flags; + else + needed |= (dyn->insts[ninst].x64.need_flags & ~dyn->insts[ninst].x64.set_flags); + if(needed == (X_PEND|X_ALL)) + needed = X_ALL; + return needed; +} + +instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, int native_size) +{ + // x64 instruction is <16 bytes + int toadd; + if(x64_size>native_size) + toadd = 1 + x64_size/15; + else + toadd = 1 + native_size/15; + if((*size)+toadd>(*cap)) { + *cap = (*size)+toadd; + insts = (instsize_t*)realloc(insts, (*cap)*sizeof(instsize_t)); + } + while(toadd) { + if(x64_size>15) + insts[*size].x64 = 15; + else + insts[*size].x64 = x64_size; + x64_size -= insts[*size].x64; + if(native_size>15) + insts[*size].nat = 15; + else + insts[*size].nat = native_size; + native_size -= insts[*size].nat; + ++(*size); + --toadd; + } + return insts; +} + +// add a value to table64 (if needed) and gives back the imm19 to use in LDR_literal +int Table64(dynarec_arm_t *dyn, uint64_t val) +{ + // find the value if already present + int idx = -1; + for(int i=0; itable64size && (idx==-1); ++i) + if(dyn->table64[i] == val) + idx = i; + // not found, add it + if(idx==-1) { + if(dyn->table64size == dyn->table64cap) { + dyn->table64cap+=4; + dyn->table64 = (uint64_t*)realloc(dyn->table64, dyn->table64cap * sizeof(uint64_t)); + } + idx = dyn->table64size++; + dyn->table64[idx] = val; + } + // calculate offset + int delta = dyn->tablestart + idx*sizeof(uint64_t) - (uintptr_t)dyn->block; + return delta; +} + +__thread void* current_helper = NULL; + +void CancelBlock64() +{ + dynarec_arm_t* helper = (dynarec_arm_t*)current_helper; + current_helper = NULL; + if(!helper) + return; + free(helper->next); + free(helper->insts); + free(helper->table64); + free(helper->sons_x64); + free(helper->sons_native); + if(helper->dynablock && helper->dynablock->block) + FreeDynarecMap(helper->dynablock, (uintptr_t)helper->dynablock->block, helper->dynablock->size); +} + +void* FillBlock64(dynablock_t* block, uintptr_t addr) { + if(IsInHotPage(addr)) { + dynarec_log(LOG_DEBUG, "Cancelling dynarec FillBlock on hotpage for %p\n", (void*)addr); + return NULL; + } + if(addr>=box64_nodynarec_start && addrdone = 1; + return (void*)block; + } + // protect the 1st page + protectDB(addr, 1); + // init the helper + dynarec_arm_t helper = {0}; + current_helper = &helper; + helper.dynablock = block; + helper.start = addr; + uintptr_t start = addr; + helper.cap = 64; // needs epilog handling + helper.insts = (instruction_native_t*)calloc(helper.cap, sizeof(instruction_native_t)); + // pass 0, addresses, x86 jump addresses, overall size of the block + uintptr_t end = native_pass0(&helper, addr); + // no need for next anymore + free(helper.next); + helper.next_sz = helper.next_cap = 0; + helper.next = NULL; + // basic checks + if(!helper.size) { + dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr); + CancelBlock64(); + return (void*)block; + } + if(!isprotectedDB(addr, 1)) { + dynarec_log(LOG_INFO, "Warning, write on current page on pass0, aborting dynablock creation (%p)\n", (void*)addr); + CancelBlock64(); + return NULL; + } + // protect the block of it goes over the 1st page + if((addr&~0xfff)!=(end&~0xfff)) // need to protect some other pages too + protectDB(addr, end-addr); //end is 1byte after actual end + // compute hash signature + uint32_t hash = X31_hash_code((void*)addr, end-addr); + // calculate barriers + for(int i=0; i=end) + helper.insts[i].x64.jmp_insts = -1; + else { + // find jump address instruction + int k=-1; + for(int i2=0; (i21)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize); + printFunctionAddr(helper.start, " => "); + dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); + } + int oldtable64size = helper.table64size; + size_t oldarmsize = helper.native_size; + helper.native_size = 0; + helper.table64size = 0; // reset table64 (but not the cap) + native_pass3(&helper, addr); + if((oldarmsize!=helper.native_size) || (oldtable64size %d\n", helper.insts[i].size2, helper.insts[i].size); + } + printf_log(LOG_NONE, "Table64 \t%d -> %d\n", oldtable64size*8, helper.table64size*8); + printf_log(LOG_NONE, " ------------\n"); + //TODO: Cancel block and return empty one + } + // add table64 if needed + if(helper.table64size) { + memcpy((void*)helper.tablestart, helper.table64, helper.table64size*8); + } + // all done... + __clear_cache(p, p+sz); // need to clear the cache before execution... + // keep size of instructions for signal handling + { + size_t cap = 1; + for(int i=0; ihelper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15; + size_t size = 0; + block->instsize = (instsize_t*)calloc(cap, sizeof(instsize_t)); + for(int i=0; iinstsize = addInst(block->instsize, &size, &cap, helper.insts[i].x64.size, helper.insts[i].size/4); + block->instsize = addInst(block->instsize, &size, &cap, 0, 0); // add a "end of block" mark, just in case + } + // ok, free the helper now + free(helper.insts); + helper.insts = NULL; + free(helper.table64); + helper.table64 = NULL; + block->size = sz; + block->isize = helper.size; + block->block = p; + block->need_test = 0; + //block->x64_addr = (void*)start; + block->x64_size = end-start; + block->hash = X31_hash_code(block->x64_addr, block->x64_size); + // Check if something changed, to abbort if it as + if((block->hash != hash)) { + dynarec_log(LOG_INFO, "Warning, a block changed while beeing processed hash(%p:%ld)=%x/%x\n", block->x64_addr, block->x64_size, block->hash, hash); + CancelBlock64(); + return NULL; + } // fill sons if any + if(!isprotectedDB(addr, end-addr)) { + dynarec_log(LOG_INFO, "Warning, block unprotected while beeing processed %p:%ld, cancelling\n", block->x64_addr, block->x64_size); + CancelBlock64(); + return NULL; + //protectDB(addr, end-addr); + } + dynablock_t** sons = NULL; + int sons_size = 0; + if(helper.sons_size) { + sons = (dynablock_t**)calloc(helper.sons_size, sizeof(dynablock_t*)); + for (int i=0; iparent, helper.sons_x64[i], &created); + if(created) { // avoid breaking a working block! + son->block = helper.sons_native[i]; + son->x64_addr = (void*)helper.sons_x64[i]; + son->x64_size = end-helper.sons_x64[i]; + if(!son->x64_size) {printf_log(LOG_NONE, "Warning, son with null x64 size! (@%p / ARM=%p)", son->x64_addr, son->block);} + son->father = block; + son->size = sz + son->block - block->block; // update size count, for debugging + //son->done = 1; + if(!son->parent) + son->parent = block->parent; + sons[sons_size] = son; + ++sons_size; + } + } + if(sons_size) { + block->sons = sons; + block->sons_size = sons_size; + } else + free(sons); + } + free(helper.sons_x64); + helper.sons_x64 = NULL; + free(helper.sons_native); + helper.sons_native = NULL; + current_helper = NULL; + //block->done = 1; + return (void*)block; +} diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h new file mode 100755 index 00000000..056947fd --- /dev/null +++ b/src/dynarec/native_lock.h @@ -0,0 +1,25 @@ +#ifndef __NATIVE_LOCK__H__ +#define __NATIVE_LOCK__H__ + +#ifdef ARM64 +#include "arm64/arm64_lock.h" + +#define native_lock_read_b(A) arm64_lock_read_b(A) +#define native_lock_write_b(A, B) arm64_lock_write_b(A, B) +#define native_lock_read_h(A) arm64_lock_read_h(A) +#define native_lock_write_h(A, B) arm64_lock_write_h(A, B) +#define native_lock_read_d(A) arm64_lock_read_d(A) +#define native_lock_write_d(A, B) arm64_lock_write_d(A, B) +#define native_lock_read_dd(A) arm64_lock_read_dd(A) +#define native_lock_write_dd(A, B) arm64_lock_write_dd(A, B) +#define native_lock_read_dq(A, B, C) arm64_lock_read_dq(A, B, C) +#define native_lock_write_dq(A, B, C) arm64_lock_write_dq(A, B, C) +#define native_lock_xchg(A, B) arm64_lock_xchg(A, B) +#define native_lock_storeifref(A, B, C) arm64_lock_storeifref(A, B, C) +#define native_lock_storeifnull(A, B) arm64_lock_storeifnull(A, B) + +#else +#error Unsupported architecture +#endif + +#endif //#define __NATIVE_LOCK__H__ \ No newline at end of file diff --git a/src/emu/x64run.c b/src/emu/x64run.c index af5b0c8e..57238f86 100755 --- a/src/emu/x64run.c +++ b/src/emu/x64run.c @@ -21,7 +21,7 @@ #include "bridge.h" #include "signals.h" #ifdef DYNAREC -#include "../dynarec/arm64_lock.h" +#include "../dynarec/native_lock.h" #endif #include "modrm.h" diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c index 508b3484..7546957f 100644 --- a/src/emu/x64run0f.c +++ b/src/emu/x64run0f.c @@ -23,7 +23,7 @@ #include "signals.h" #ifdef DYNAREC #include "custommem.h" -#include "../dynarec/arm64_lock.h" +#include "../dynarec/native_lock.h" #endif #include "modrm.h" diff --git a/src/emu/x64run66.c b/src/emu/x64run66.c index 72c83e47..3b4ebf4c 100644 --- a/src/emu/x64run66.c +++ b/src/emu/x64run66.c @@ -20,7 +20,7 @@ #include "box64context.h" #include "bridge.h" #ifdef DYNAREC -#include "../dynarec/arm64_lock.h" +#include "../dynarec/native_lock.h" #endif #include "modrm.h" diff --git a/src/emu/x64run66f0.c b/src/emu/x64run66f0.c index 8de9e12a..3837eba1 100644 --- a/src/emu/x64run66f0.c +++ b/src/emu/x64run66f0.c @@ -19,7 +19,9 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -#include "dynarec/arm64_lock.h" +#ifdef DYNAREC +#include "dynarec/native_lock.h" +#endif #include "modrm.h" @@ -55,10 +57,10 @@ int Run66F0(x64emu_t *emu, rex_t rex) GETGW; #ifdef DYNAREC do { - tmp16u = arm64_lock_read_h(EW); + tmp16u = native_lock_read_h(EW); cmp16(emu, R_AX, tmp16u); if(ACCESS_FLAG(F_ZF)) { - tmp32s = arm64_lock_write_h(EW, GW->word[0]); + tmp32s = native_lock_write_h(EW, GW->word[0]); } else { R_AX = tmp16u; tmp32s = 0; @@ -89,14 +91,14 @@ int Run66F0(x64emu_t *emu, rex_t rex) GETGW; \ if(rex.w) { \ do { \ - tmp64u = arm64_lock_read_dd(ED); \ + tmp64u = native_lock_read_dd(ED); \ tmp64u = OP##64(emu, tmp64u, GD->q[0]); \ - } while (arm64_lock_write_dd(ED, tmp64u)); \ + } while (native_lock_write_dd(ED, tmp64u)); \ } else { \ do { \ - tmp16u = arm64_lock_read_h(ED); \ + tmp16u = native_lock_read_h(ED); \ tmp16u = OP##16(emu, tmp16u, GW->word[0]); \ - } while (arm64_lock_write_d(ED, tmp16u)); \ + } while (native_lock_write_d(ED, tmp16u)); \ if(MODREG) \ EW->word[1] = 0; \ } \ @@ -181,13 +183,13 @@ int Run66F0(x64emu_t *emu, rex_t rex) } else switch((nextop>>3)&7) { - case 0: do { tmp16u2 = arm64_lock_read_h(ED); tmp16u2 = add16(emu, tmp16u2, tmp64u);} while(arm64_lock_write_h(ED, tmp16u2)); break; - case 1: do { tmp16u2 = arm64_lock_read_h(ED); tmp16u2 = or16(emu, tmp16u2, tmp64u);} while(arm64_lock_write_h(ED, tmp16u2)); break; - case 2: do { tmp16u2 = arm64_lock_read_h(ED); tmp16u2 = adc16(emu, tmp16u2, tmp64u);} while(arm64_lock_write_h(ED, tmp16u2)); break; - case 3: do { tmp16u2 = arm64_lock_read_h(ED); tmp16u2 = sbb16(emu, tmp16u2, tmp64u);} while(arm64_lock_write_h(ED, tmp16u2)); break; - case 4: do { tmp16u2 = arm64_lock_read_h(ED); tmp16u2 = and16(emu, tmp16u2, tmp64u);} while(arm64_lock_write_h(ED, tmp16u2)); break; - case 5: do { tmp16u2 = arm64_lock_read_h(ED); tmp16u2 = sub16(emu, tmp16u2, tmp64u);} while(arm64_lock_write_h(ED, tmp16u2)); break; - case 6: do { tmp16u2 = arm64_lock_read_h(ED); tmp16u2 = xor16(emu, tmp16u2, tmp64u);} while(arm64_lock_write_h(ED, tmp16u2)); break; + case 0: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = add16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break; + case 1: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = or16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break; + case 2: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = adc16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break; + case 3: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = sbb16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break; + case 4: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = and16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break; + case 5: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = sub16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break; + case 6: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = xor16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break; case 7: cmp16(emu, ED->word[0], tmp64u); break; } #else @@ -217,29 +219,29 @@ int Run66F0(x64emu_t *emu, rex_t rex) // unaligned do { tmp64u = ED->q[0] & 0xffffffffffffff00LL; - tmp64u |= arm64_lock_read_b(ED); + tmp64u |= native_lock_read_b(ED); tmp64u = inc64(emu, tmp64u); - } while(arm64_lock_write_b(ED, tmp64u&0xff)); + } while(native_lock_write_b(ED, tmp64u&0xff)); ED->q[0] = tmp64u; } else do { - tmp64u = arm64_lock_read_dd(ED); - } while(arm64_lock_write_dd(ED, inc64(emu, tmp64u))); + tmp64u = native_lock_read_dd(ED); + } while(native_lock_write_dd(ED, inc64(emu, tmp64u))); else { if((uintptr_t)ED&1) { //meh. do { tmp16u = ED->word[0]; tmp16u &=~0xff; - tmp16u |= arm64_lock_read_b(ED); + tmp16u |= native_lock_read_b(ED); tmp16u = inc16(emu, tmp16u); - } while(arm64_lock_write_b(ED, tmp16u&0xff)); + } while(native_lock_write_b(ED, tmp16u&0xff)); ED->word[0] = tmp16u; } else { do { - tmp16u = arm64_lock_read_h(ED); - } while(arm64_lock_write_h(ED, inc16(emu, tmp16u))); + tmp16u = native_lock_read_h(ED); + } while(native_lock_write_h(ED, inc16(emu, tmp16u))); } } #else @@ -259,19 +261,19 @@ int Run66F0(x64emu_t *emu, rex_t rex) // unaligned do { tmp64u = ED->q[0] & 0xffffffffffffff00LL; - tmp64u |= arm64_lock_read_b(ED); + tmp64u |= native_lock_read_b(ED); tmp64u = dec64(emu, tmp64u); - } while(arm64_lock_write_b(ED, tmp64u&0xff)); + } while(native_lock_write_b(ED, tmp64u&0xff)); ED->q[0] = tmp64u; } else do { - tmp64u = arm64_lock_read_dd(ED); - } while(arm64_lock_write_dd(ED, dec64(emu, tmp64u))); + tmp64u = native_lock_read_dd(ED); + } while(native_lock_write_dd(ED, dec64(emu, tmp64u))); else { do { - tmp16u = arm64_lock_read_h(ED); - } while(arm64_lock_write_h(ED, dec16(emu, tmp16u))); + tmp16u = native_lock_read_h(ED); + } while(native_lock_write_h(ED, dec16(emu, tmp16u))); } #else pthread_mutex_lock(&emu->context->mutex_lock); diff --git a/src/emu/x64run670f.c b/src/emu/x64run670f.c index 28000ddd..1671da03 100644 --- a/src/emu/x64run670f.c +++ b/src/emu/x64run670f.c @@ -20,7 +20,7 @@ #include "box64context.h" #include "bridge.h" #ifdef DYNAREC -#include "../dynarec/arm64_lock.h" +#include "../dynarec/native_lock.h" #endif #include "modrm.h" diff --git a/src/emu/x64run6766.c b/src/emu/x64run6766.c index 2702012c..2d53e3ff 100644 --- a/src/emu/x64run6766.c +++ b/src/emu/x64run6766.c @@ -20,7 +20,7 @@ #include "box64context.h" #include "bridge.h" #ifdef DYNAREC -#include "../dynarec/arm64_lock.h" +#include "../dynarec/native_lock.h" #endif #include "modrm.h" diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c index 634d3d40..91626429 100644 --- a/src/emu/x64runf0.c +++ b/src/emu/x64runf0.c @@ -21,7 +21,7 @@ #include "my_cpuid.h" #include "bridge.h" #ifdef DYNAREC -#include "../dynarec/arm64_lock.h" +#include "../dynarec/native_lock.h" #endif #include "modrm.h" @@ -53,9 +53,9 @@ int RunF0(x64emu_t *emu, rex_t rex) GETEB(0); \ GETGB; \ do { \ - tmp8u = arm64_lock_read_b(EB); \ + tmp8u = native_lock_read_b(EB); \ tmp8u = OP##8(emu, tmp8u, GB); \ - } while (arm64_lock_write_b(EB, tmp8u)); \ + } while (native_lock_write_b(EB, tmp8u)); \ break; \ case B+1: \ nextop = F8; \ @@ -63,14 +63,14 @@ int RunF0(x64emu_t *emu, rex_t rex) GETGD; \ if(rex.w) { \ do { \ - tmp64u = arm64_lock_read_dd(ED); \ + tmp64u = native_lock_read_dd(ED); \ tmp64u = OP##64(emu, tmp64u, GD->q[0]); \ - } while (arm64_lock_write_dd(ED, tmp64u)); \ + } while (native_lock_write_dd(ED, tmp64u)); \ } else { \ do { \ - tmp32u = arm64_lock_read_d(ED); \ + tmp32u = native_lock_read_d(ED); \ tmp32u = OP##32(emu, tmp32u, GD->dword[0]); \ - } while (arm64_lock_write_d(ED, tmp32u)); \ + } while (native_lock_write_d(ED, tmp32u)); \ if(MODREG) \ ED->dword[1] = 0; \ } \ @@ -190,14 +190,14 @@ int RunF0(x64emu_t *emu, rex_t rex) } } else do { - tmp64u = arm64_lock_read_dd(ED); + tmp64u = native_lock_read_dd(ED); if(tmp64u & (1LL<dword[1] = 0; } else do { - tmp32u = arm64_lock_read_d(ED); + tmp32u = native_lock_read_d(ED); if(tmp32u & (1<q[0] & ~0xffLL; - tmp64u |= arm64_lock_read_b(ED); + tmp64u |= native_lock_read_b(ED); cmp64(emu, R_RAX, tmp64u); if(ACCESS_FLAG(F_ZF)) { - tmp32s = arm64_lock_write_b(ED, GD->q[0]&0xff); + tmp32s = native_lock_write_b(ED, GD->q[0]&0xff); if(!tmp32s) ED->q[0] = GD->q[0]; } else { @@ -297,10 +297,10 @@ int RunF0(x64emu_t *emu, rex_t rex) } while(tmp32s); } else do { - tmp64u = arm64_lock_read_dd(ED); + tmp64u = native_lock_read_dd(ED); cmp64(emu, R_RAX, tmp64u); if(ACCESS_FLAG(F_ZF)) { - tmp32s = arm64_lock_write_dd(ED, GD->q[0]); + tmp32s = native_lock_write_dd(ED, GD->q[0]); } else { R_RAX = tmp64u; tmp32s = 0; @@ -308,10 +308,10 @@ int RunF0(x64emu_t *emu, rex_t rex) } while(tmp32s); else { do { - tmp32u = arm64_lock_read_d(ED); + tmp32u = native_lock_read_d(ED); cmp32(emu, R_EAX, tmp32u); if(ACCESS_FLAG(F_ZF)) { - tmp32s = arm64_lock_write_d(ED, GD->dword[0]); + tmp32s = native_lock_write_d(ED, GD->dword[0]); } else { R_EAX = tmp32u; tmp32s = 0; @@ -359,11 +359,11 @@ int RunF0(x64emu_t *emu, rex_t rex) #ifdef DYNAREC if(rex.w) do { - tmp64u = arm64_lock_read_dd(ED); + tmp64u = native_lock_read_dd(ED); if(tmp64u & (1LL<context->mutex_lock); tmp8u = add8(emu, EB->byte[0], GB); @@ -610,23 +610,23 @@ int RunF0(x64emu_t *emu, rex_t rex) #ifdef DYNAREC if(rex.w) { do { - tmp64u = arm64_lock_read_dd(ED); + tmp64u = native_lock_read_dd(ED); tmp64u2 = add64(emu, tmp64u, GD->q[0]); - } while(arm64_lock_write_dd(ED, tmp64u2)); + } while(native_lock_write_dd(ED, tmp64u2)); GD->q[0] = tmp64u; } else { if(((uintptr_t)ED)&3) { do { tmp32u = ED->dword[0] & ~0xff; - tmp32u |= arm64_lock_read_b(ED); + tmp32u |= native_lock_read_b(ED); tmp32u2 = add32(emu, tmp32u, GD->dword[0]); - } while(arm64_lock_write_b(ED, tmp32u2&0xff)); + } while(native_lock_write_b(ED, tmp32u2&0xff)); ED->dword[0] = tmp32u2; } else { do { - tmp32u = arm64_lock_read_d(ED); + tmp32u = native_lock_read_d(ED); tmp32u2 = add32(emu, tmp32u, GD->dword[0]); - } while(arm64_lock_write_d(ED, tmp32u2)); + } while(native_lock_write_d(ED, tmp32u2)); } GD->q[0] = tmp32u; if(MODREG) @@ -660,10 +660,10 @@ int RunF0(x64emu_t *emu, rex_t rex) #ifdef DYNAREC if(rex.w) do { - arm64_lock_read_dq(&tmp64u, &tmp64u2, ED); + native_lock_read_dq(&tmp64u, &tmp64u2, ED); if(R_RAX == tmp64u && R_RDX == tmp64u2) { SET_FLAG(F_ZF); - tmp32s = arm64_lock_write_dq(R_RBX, R_RCX, ED); + tmp32s = native_lock_write_dq(R_RBX, R_RCX, ED); } else { CLEAR_FLAG(F_ZF); R_RAX = tmp64u; @@ -673,10 +673,10 @@ int RunF0(x64emu_t *emu, rex_t rex) } while(tmp32s); else do { - tmp64u = arm64_lock_read_dd(ED); + tmp64u = native_lock_read_dd(ED); if((R_EAX == (tmp64u&0xffffffff)) && (R_EDX == ((tmp64u>>32)&0xffffffff))) { SET_FLAG(F_ZF); - tmp32s = arm64_lock_write_dd(ED, R_EBX|(((uint64_t)R_ECX)<<32)); + tmp32s = native_lock_write_dd(ED, R_EBX|(((uint64_t)R_ECX)<<32)); } else { CLEAR_FLAG(F_ZF); R_RAX = tmp64u&0xffffffff; @@ -733,13 +733,13 @@ int RunF0(x64emu_t *emu, rex_t rex) tmp8u = F8; #ifdef DYNAREC switch((nextop>>3)&7) { - case 0: do { tmp8u2 = arm64_lock_read_b(EB); tmp8u2 = add8(emu, tmp8u2, tmp8u);} while(arm64_lock_write_b(EB, tmp8u2)); break; - case 1: do { tmp8u2 = arm64_lock_read_b(EB); tmp8u2 = or8(emu, tmp8u2, tmp8u);} while(arm64_lock_write_b(EB, tmp8u2)); break; - case 2: do { tmp8u2 = arm64_lock_read_b(EB); tmp8u2 = adc8(emu, tmp8u2, tmp8u);} while(arm64_lock_write_b(EB, tmp8u2)); break; - case 3: do { tmp8u2 = arm64_lock_read_b(EB); tmp8u2 = sbb8(emu, tmp8u2, tmp8u);} while(arm64_lock_write_b(EB, tmp8u2)); break; - case 4: do { tmp8u2 = arm64_lock_read_b(EB); tmp8u2 = and8(emu, tmp8u2, tmp8u);} while(arm64_lock_write_b(EB, tmp8u2)); break; - case 5: do { tmp8u2 = arm64_lock_read_b(EB); tmp8u2 = sub8(emu, tmp8u2, tmp8u);} while(arm64_lock_write_b(EB, tmp8u2)); break; - case 6: do { tmp8u2 = arm64_lock_read_b(EB); tmp8u2 = xor8(emu, tmp8u2, tmp8u);} while(arm64_lock_write_b(EB, tmp8u2)); break; + case 0: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = add8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break; + case 1: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = or8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break; + case 2: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = adc8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break; + case 3: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = sbb8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break; + case 4: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = and8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break; + case 5: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = sub8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break; + case 6: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = xor8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break; case 7: cmp8(emu, EB->byte[0], tmp8u); break; } #else @@ -769,13 +769,13 @@ int RunF0(x64emu_t *emu, rex_t rex) #ifdef DYNAREC if(rex.w) { switch((nextop>>3)&7) { - case 0: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = add64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; - case 1: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = or64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; - case 2: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = adc64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; - case 3: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = sbb64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; - case 4: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = and64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; - case 5: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = sub64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; - case 6: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = xor64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; + case 0: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = add64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break; + case 1: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = or64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break; + case 2: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = adc64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break; + case 3: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = sbb64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break; + case 4: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = and64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break; + case 5: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = sub64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break; + case 6: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = xor64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break; case 7: cmp64(emu, ED->q[0], tmp64u); break; } } else { @@ -792,13 +792,13 @@ int RunF0(x64emu_t *emu, rex_t rex) } else switch((nextop>>3)&7) { - case 0: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = add32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; - case 1: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = or32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; - case 2: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = adc32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; - case 3: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = sbb32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; - case 4: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = and32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; - case 5: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = sub32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; - case 6: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = xor32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; + case 0: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = add32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break; + case 1: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = or32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break; + case 2: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = adc32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break; + case 3: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = sbb32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break; + case 4: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = and32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break; + case 5: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = sub32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break; + case 6: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = xor32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break; case 7: cmp32(emu, ED->dword[0], tmp64u); break; } } @@ -859,11 +859,11 @@ int RunF0(x64emu_t *emu, rex_t rex) } } else { if(rex.w) { - GD->q[0] = arm64_lock_xchg(ED, GD->q[0]); + GD->q[0] = native_lock_xchg(ED, GD->q[0]); } else { do { - tmp32u = arm64_lock_read_d(ED); - } while(arm64_lock_write_d(ED, GD->dword[0])); + tmp32u = native_lock_read_d(ED); + } while(native_lock_write_d(ED, GD->dword[0])); GD->q[0] = tmp32u; } } @@ -897,29 +897,29 @@ int RunF0(x64emu_t *emu, rex_t rex) // unaligned do { tmp64u = ED->q[0] & 0xffffffffffffff00LL; - tmp64u |= arm64_lock_read_b(ED); + tmp64u |= native_lock_read_b(ED); tmp64u = inc64(emu, tmp64u); - } while(arm64_lock_write_b(ED, tmp64u&0xff)); + } while(native_lock_write_b(ED, tmp64u&0xff)); ED->q[0] = tmp64u; } else do { - tmp64u = arm64_lock_read_dd(ED); - } while(arm64_lock_write_dd(ED, inc64(emu, tmp64u))); + tmp64u = native_lock_read_dd(ED); + } while(native_lock_write_dd(ED, inc64(emu, tmp64u))); else { if((uintptr_t)ED&3) { //meh. do { tmp32u = ED->dword[0]; tmp32u &=~0xff; - tmp32u |= arm64_lock_read_b(ED); + tmp32u |= native_lock_read_b(ED); tmp32u = inc32(emu, tmp32u); - } while(arm64_lock_write_b(ED, tmp32u&0xff)); + } while(native_lock_write_b(ED, tmp32u&0xff)); ED->dword[0] = tmp32u; } else { do { - tmp32u = arm64_lock_read_d(ED); - } while(arm64_lock_write_d(ED, inc32(emu, tmp32u))); + tmp32u = native_lock_read_d(ED); + } while(native_lock_write_d(ED, inc32(emu, tmp32u))); } if(MODREG) ED->dword[1] = 0; } @@ -943,19 +943,19 @@ int RunF0(x64emu_t *emu, rex_t rex) // unaligned do { tmp64u = ED->q[0] & 0xffffffffffffff00LL; - tmp64u |= arm64_lock_read_b(ED); + tmp64u |= native_lock_read_b(ED); tmp64u = dec64(emu, tmp64u); - } while(arm64_lock_write_b(ED, tmp64u&0xff)); + } while(native_lock_write_b(ED, tmp64u&0xff)); ED->q[0] = tmp64u; } else do { - tmp64u = arm64_lock_read_dd(ED); - } while(arm64_lock_write_dd(ED, dec64(emu, tmp64u))); + tmp64u = native_lock_read_dd(ED); + } while(native_lock_write_dd(ED, dec64(emu, tmp64u))); else { do { - tmp32u = arm64_lock_read_d(ED); - } while(arm64_lock_write_d(ED, dec32(emu, tmp32u))); + tmp32u = native_lock_read_d(ED); + } while(native_lock_write_d(ED, dec32(emu, tmp32u))); if(MODREG) ED->dword[1] = 0; } #else diff --git a/src/include/dynarec_arm64.h b/src/include/dynarec_arm64.h deleted file mode 100755 index bc4cf3f6..00000000 --- a/src/include/dynarec_arm64.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __DYNAREC_ARM_H_ -#define __DYNAREC_ARM_H_ - -typedef struct dynablock_s dynablock_t; -typedef struct x64emu_s x64emu_t; - -void CancelBlock64(); -void* FillBlock64(dynablock_t* block, uintptr_t addr); - -#endif //__DYNAREC_ARM_H_ \ No newline at end of file diff --git a/src/include/dynarec_native.h b/src/include/dynarec_native.h new file mode 100755 index 00000000..bc4cf3f6 --- /dev/null +++ b/src/include/dynarec_native.h @@ -0,0 +1,10 @@ +#ifndef __DYNAREC_ARM_H_ +#define __DYNAREC_ARM_H_ + +typedef struct dynablock_s dynablock_t; +typedef struct x64emu_s x64emu_t; + +void CancelBlock64(); +void* FillBlock64(dynablock_t* block, uintptr_t addr); + +#endif //__DYNAREC_ARM_H_ \ No newline at end of file -- cgit 1.4.1