diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 6 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67_32.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_consts.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_consts.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f20f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 22 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 13 | ||||
| -rw-r--r-- | src/dynarec/arm64/updateflags_arm64.c | 135 | ||||
| -rw-r--r-- | src/dynarec/arm64/updateflags_arm64_pass.c | 1005 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arch.h | 2 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.c | 2 |
14 files changed, 1183 insertions, 26 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index c1359e1b..9703ac95 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -47,10 +47,12 @@ int convert_bitmask(uint64_t bitmask); #define ADDx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, 0, Rn, Rd)) #define ADDSx_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(1, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, lsl, Rn, Rd)) +#define ADDx_REG_LSR(Rd, Rn, Rm, lsr) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b01, Rm, lsr, Rn, Rd)) #define ADDz_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 0, 0, 0b00, Rm, lsl, Rn, Rd)) #define ADDw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, 0, Rn, Rd)) #define ADDSw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, lsl, Rn, Rd)) +#define ADDw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b01, Rm, lsr, Rn, Rd)) #define ADDSw_REG_LSL(Rd, Rn, Rm, lsl) FEMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, lsl, Rn, Rd)) #define ADDxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd)) #define ADDz_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 0, 0, 0b00, Rm, 0, Rn, Rd)) @@ -120,9 +122,11 @@ int convert_bitmask(uint64_t bitmask); #define SBCSw_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(0, 1, 1, Rm, Rn, Rd)) #define SBCSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(rex.w, 1, 1, Rm, Rn, Rd)) -#define SUB_ext(sf, op, S, Rm, option, imm3, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | 1<<21 | (Rm)<<16 | (option)<<13 | (imm3)<<10 | (Rn)<<5 | (Rd)) -#define SUBxw_UXTB(Rd, Rn, Rm) EMIT(SUB_ext(rex.w, 1, 0, Rm, 0b000, 0, Rn, Rd)) -#define SUBw_UXTB(Rd, Rn, Rm) EMIT(SUB_ext(0, 1, 0, Rm, 0b000, 0, Rn, Rd)) +#define ADDSUB_ext(sf, op, S, Rm, option, imm3, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | 1<<21 | (Rm)<<16 | (option)<<13 | (imm3)<<10 | (Rn)<<5 | (Rd)) +#define SUBxw_UXTB(Rd, Rn, Rm) EMIT(ADDSUB_ext(rex.w, 1, 0, Rm, 0b000, 0, Rn, Rd)) +#define SUBw_UXTB(Rd, Rn, Rm) EMIT(ADDSUB_ext(0, 1, 0, Rm, 0b000, 0, Rn, Rd)) +#define ADDw_UXTH(Rd, Rn, Rm) EMIT(ADDSUB_ext(0, 0, 0, Rm, 0b001, 0, Rn, Rd)) +#define ADDx_UXTW(Rd, Rn, Rm) EMIT(ADDSUB_ext(1, 0, 0, Rm, 0b010, 0, Rn, Rd)) // CCMP compare if cond is true, set nzcv if false #define CCMP_reg(sf, Rm, cond, Rn, nzcv) ((sf)<<31 | 1<<30 | 1<<29 | 0b11010010<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | (nzcv)) @@ -160,6 +164,8 @@ int convert_bitmask(uint64_t bitmask); #define LDS_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b10<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) #define LDRSW_U12(Rt, Rn, imm12) EMIT(LDS_gen(0b10, 0b01, ((uint32_t)((imm12)>>2))&0xfff, Rn, Rt)) +#define LDRSH_U12(Rt, Rn, imm12) EMIT(LDS_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt)) +#define LDRSB_U12(Rt, Rn, imm12) EMIT(LDS_gen(0b00, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) #define LDR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b01<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) #define LDRx_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt)) diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index a547f2a1..90f633bd 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1086,7 +1086,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \ } else { \ /* inside the block, cache transform */ \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + CacheTransform(dyn, ninst, cacheupd); \ i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\ SKIP_SEVL(i32); \ B(i32); \ @@ -3293,7 +3293,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \ } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + CacheTransform(dyn, ninst, cacheupd); \ i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ SKIP_SEVL(i32); \ Bcond(c__, i32); \ @@ -3527,7 +3527,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin jump_to_next(dyn, j64, 0, ninst, rex.is32bits); } else { // inside the block - CacheTransform(dyn, ninst, CHECK_CACHE(), x1, x2, x3); + CacheTransform(dyn, ninst, CHECK_CACHE()); tmp = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); SKIP_SEVL(tmp); if(tmp==4) { diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index ecb48925..205efd1e 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -1699,7 +1699,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ jump_to_next(dyn, j64, 0, ninst, rex.is32bits); \ } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + CacheTransform(dyn, ninst, cacheupd); \ i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ SKIP_SEVL(i32); \ B(i32); \ diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index c0eaf181..05b18944 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -1444,7 +1444,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \ } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + CacheTransform(dyn, ninst, cacheupd); \ i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\ SKIP_SEVL(i32); \ B(i32); \ diff --git a/src/dynarec/arm64/dynarec_arm64_67_32.c b/src/dynarec/arm64/dynarec_arm64_67_32.c index ec1fa1e5..bb394e71 100644 --- a/src/dynarec/arm64/dynarec_arm64_67_32.c +++ b/src/dynarec/arm64/dynarec_arm64_67_32.c @@ -100,7 +100,7 @@ uintptr_t dynarec64_67_32(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \ } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + CacheTransform(dyn, ninst, cacheupd); \ i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\ SKIP_SEVL(i32); \ B(i32); \ diff --git a/src/dynarec/arm64/dynarec_arm64_consts.c b/src/dynarec/arm64/dynarec_arm64_consts.c index d30c0bd2..5a6e6d78 100644 --- a/src/dynarec/arm64/dynarec_arm64_consts.c +++ b/src/dynarec/arm64/dynarec_arm64_consts.c @@ -100,7 +100,7 @@ uintptr_t getConst(arm64_consts_t which) case const_helper_getcpu: return (uintptr_t)helper_getcpu; case const_cpuid: return (uintptr_t)my_cpuid; case const_getsegmentbase: return (uintptr_t)GetSegmentBaseEmu; - case const_updateflags: return (uintptr_t)UpdateFlags; + case const_updateflags_arm64: return (uintptr_t)create_updateflags(); case const_reset_fpu: return (uintptr_t)reset_fpu; case const_sha1msg2: return (uintptr_t)sha1msg2; case const_sha1rnds4: return (uintptr_t)sha1rnds4; diff --git a/src/dynarec/arm64/dynarec_arm64_consts.h b/src/dynarec/arm64/dynarec_arm64_consts.h index 9ba65aca..2543699e 100644 --- a/src/dynarec/arm64/dynarec_arm64_consts.h +++ b/src/dynarec/arm64/dynarec_arm64_consts.h @@ -64,7 +64,7 @@ typedef enum arm64_consts_s { const_helper_getcpu, const_cpuid, const_getsegmentbase, - const_updateflags, + const_updateflags_arm64, const_reset_fpu, const_sha1msg2, const_sha1rnds4, diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index bd103e93..a60da0b6 100644 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -468,7 +468,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ jump_to_next(dyn, j64, 0, ninst, rex.is32bits); \ } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + CacheTransform(dyn, ninst, cacheupd); \ i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ SKIP_SEVL(i32); \ B(i32); \ diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 74f353cd..d71603d4 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -2492,7 +2492,7 @@ static void fpuCacheTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2, int } MESSAGE(LOG_DUMP, "\t---- Cache Transform\n"); } -static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1) +static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst) { int j64; int jmp = dyn->insts[ninst].x64.jmp_insts; @@ -2516,15 +2516,16 @@ static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1) } if(go) { if(dyn->f.pending!=SF_PENDING) { - LDRw_U12(s1, xEmu, offsetof(x64emu_t, df)); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, df)); j64 = (GETMARKF2)-(dyn->native_size); - CBZw(s1, j64); + CBZw(x1, j64); } if(dyn->insts[ninst].need_nat_flags) - MRS_nzcv(s1); - CALL_(const_updateflags, -1, s1); + MRS_nzcv(x6); + TABLE64C(x1, const_updateflags_arm64); + BLR(x1); if(dyn->insts[ninst].need_nat_flags) - MSR_nzcv(s1); + MSR_nzcv(x6); MARKF2; } } @@ -2607,13 +2608,14 @@ static void nativeFlagsTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2) MESSAGE(LOG_DUMP, "\t---- Native Flags transform\n"); } -void CacheTransform(dynarec_arm_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) { +// Might use all Scratch registers! +void CacheTransform(dynarec_arm_t* dyn, int ninst, int cacheupd) { if(cacheupd&1) - flagsCacheTransform(dyn, ninst, s1); + flagsCacheTransform(dyn, ninst); if(cacheupd&2) - fpuCacheTransform(dyn, ninst, s1, s2, s3); + fpuCacheTransform(dyn, ninst, x1, x2, x3); if(cacheupd&4) - nativeFlagsTransform(dyn, ninst, s1, s2); + nativeFlagsTransform(dyn, ninst, x1, x2); } void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 8ba2407f..aa67cc49 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -4,6 +4,7 @@ // undef to get Close to SSE Float->int conversions //#define PRECISE_CVT +#ifndef STEP_PASS #if STEP == 0 #include "dynarec_arm64_pass0.h" #elif STEP == 1 @@ -13,6 +14,8 @@ #elif STEP == 3 #include "dynarec_arm64_pass3.h" #endif +#define STEP_PASS +#endif #include "debug.h" #include "arm64_emitter.h" @@ -1136,7 +1139,8 @@ MOVZw(S, (N)); \ STRw_U12(S, xEmu, offsetof(x64emu_t, df)); \ if (dyn->f.pending == SF_PENDING && dyn->insts[ninst].x64.need_after && !(dyn->insts[ninst].x64.need_after & X_PEND)) { \ - CALL_I(const_updateflags); \ + TABLE64C(x6, const_updateflags_arm64); \ + BLR(x6); \ dyn->f.pending = SF_SET; \ SET_NODF(); \ } \ @@ -1158,7 +1162,8 @@ j64 = (GETMARKF)-(dyn->native_size); \ CBZw(x3, j64); \ } \ - CALL_I(const_updateflags); \ + TABLE64C(x6, const_updateflags_arm64); \ + BLR(x6); \ MARKF; \ dyn->f.pending = SF_SET; \ SET_DFOK(); \ @@ -1287,6 +1292,8 @@ #define native_pass STEPNAME(native_pass) +#define updateflags_pass STEPNAME(updateflags_pass) + #define dynarec64_00 STEPNAME(dynarec64_00) #define dynarec64_0F STEPNAME(dynarec64_0F) #define dynarec64_64 STEPNAME(dynarec64_64) @@ -1650,7 +1657,7 @@ int sse_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); // purge ymm_zero mask according to purge_ymm void avx_purge_ymm(dynarec_arm_t* dyn, int ninst, uint16_t mask, int s1); -void CacheTransform(dynarec_arm_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3); +void CacheTransform(dynarec_arm_t* dyn, int ninst, int cacheupd); void arm64_move32(dynarec_arm_t* dyn, int ninst, int reg, uint32_t val); void arm64_move64(dynarec_arm_t* dyn, int ninst, int reg, uint64_t val); diff --git a/src/dynarec/arm64/updateflags_arm64.c b/src/dynarec/arm64/updateflags_arm64.c new file mode 100644 index 00000000..2e2af8e9 --- /dev/null +++ b/src/dynarec/arm64/updateflags_arm64.c @@ -0,0 +1,135 @@ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <assert.h> + +#include "os.h" +#include "debug.h" +#include "box64context.h" +#include "custommem.h" +#include "box64cpu.h" +#include "emu/x64emu_private.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynablock.h" +#include "../dynablock_private.h" + +#include "dynarec_native.h" +#include "../dynarec_arch.h" + +void updateflags_pass0(dynarec_arm_t* dyn, uint64_t jmp_df[]); +void updateflags_pass1(dynarec_arm_t* dyn, uint64_t jmp_df[]); +void updateflags_pass2(dynarec_arm_t* dyn, uint64_t jmp_df[]); +void updateflags_pass3(dynarec_arm_t* dyn, uint64_t jmp_df[]); + +static dynablock_t* updaflags_arm64 = NULL; + +static uint8_t dummy_code[] = {0x90, 0xc3}; // some dummy code so update_flags dynablock point to something + +void* create_updateflags() +{ + if(updaflags_arm64) + return updaflags_arm64->block; + uint64_t jmp_df[d_unknown+1] = {0}; + dynarec_arm_t helper = {0}; + instruction_arm64_t insts[1] = {0}; + helper.insts = insts; + helper.need_dump = BOX64ENV(dynarec_dump); + helper.cap = 1; + helper.f.dfnone = 1; + helper.f.pending = SF_NODF; + helper.insts[0].x64.gen_flags = X_ALL; + // pass 0 + updateflags_pass0(&helper, jmp_df); + // check if all flags are handled + int ok = 1; + for(int i=d_none; i<d_unknown; ++i) + if(!jmp_df[i]) { + printf_log(LOG_NONE, "Error, UpdateFlags case %d is not handled, will crash later\n", i); + ok = 0; + } + // pass 1 + updateflags_pass1(&helper, jmp_df); + // pass 2 + helper.native_size = 0; + updateflags_pass2(&helper, jmp_df); + // alloc memory for pass3 + size_t native_size = (helper.native_size+7)&~7; // round the size... + size_t sz = sizeof(void*) + native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + 0 + 0 + 0 + sizeof(dynablock_t) + 0; + // dynablock_t* block (arm insts) table64 jmpnext code instsize arch callrets dynablock relocs + void* actual_p = (void*)AllocDynarecMap((uintptr_t)&dummy_code, sz, 1); // arbitrary address + void* p = (void*)(((uintptr_t)actual_p) + sizeof(void*)); + void* tablestart = p + native_size; + void* next = tablestart + helper.table64size*sizeof(uint64_t); + void* instsize = next + 4*sizeof(void*); + void* arch = instsize + 0; + void* callrets = arch + 0; + if(actual_p==NULL) { + dynarec_log(LOG_INFO, "AllocDynarecMap(%zu) failed, canceling UpdateBlock\n", sz); + return NULL; + } + helper.block = p; + dynablock_t* block = (dynablock_t*)(callrets+0); + memset(block, 0, sizeof(dynablock_t)); + void* relocs = helper.need_reloc?(block+1):NULL; + // fill the block + block->x64_addr = &dummy_code; + block->isize = 0; + block->actual_block = actual_p; + helper.relocs = relocs; + block->relocs = relocs; + block->table64size = helper.table64size; + helper.native_start = (uintptr_t)p; + helper.tablestart = (uintptr_t)tablestart; + helper.jmp_next = (uintptr_t)next+sizeof(void*); + helper.instsize = (instsize_t*)instsize; + *(dynablock_t**)actual_p = block; + helper.table64cap = helper.table64size; + helper.table64 = (uint64_t*)helper.tablestart; + helper.callrets = (callret_t*)callrets; + block->table64 = helper.table64; + helper.callret_size = 0; + // pass 3, emit (log emit native opcode) + if(helper.need_dump) { + dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for UpdateFlags", (helper.need_dump>1)?"\e[01;36m":"", GetTID(), helper.native_size); + PrintFunctionAddr(helper.start, " => "); + dynarec_log_prefix(0, LOG_NONE, "%s\n", (helper.need_dump>1)?"\e[m":""); + } + helper.native_size = 0; + updateflags_pass3(&helper, jmp_df); + helper.jmp_sz = helper.jmp_cap = 0; + helper.jmps = NULL; + // keep size of instructions for signal handling + block->instsize = instsize; + helper.table64 = NULL; + helper.instsize = NULL; + helper.predecessor = NULL; + block->size = sz; + block->isize = helper.size; + block->block = p; + block->jmpnext = next+sizeof(void*); + block->always_test = helper.always_test; + block->dirty = block->always_test; + block->is32bits = 0; + block->relocsize = helper.reloc_size*sizeof(uint32_t); + block->arch = NULL; + block->arch_size = 0; + block->callret_size = helper.callret_size; + block->callrets = helper.callrets; + block->native_size = native_size; + *(dynablock_t**)next = block; + *(void**)(next+3*sizeof(void*)) = NULL; + CreateJmpNext(block->jmpnext, next+3*sizeof(void*)); + ClearCache(block->jmpnext, 4*sizeof(void*)); + block->x64_size = 0; + // all done... + ClearCache(actual_p+sizeof(void*), native_size); // need to clear the cache before execution... + + updaflags_arm64 = block; + return block->block; +} \ No newline at end of file diff --git a/src/dynarec/arm64/updateflags_arm64_pass.c b/src/dynarec/arm64/updateflags_arm64_pass.c new file mode 100644 index 00000000..8b4dfe39 --- /dev/null +++ b/src/dynarec/arm64/updateflags_arm64_pass.c @@ -0,0 +1,1005 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <string.h> + +#include "os.h" +#include "debug.h" +#include "box64context.h" +#include "box64cpu.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64emu.h" +#include "box64stack.h" +#include "x64trace.h" +#include "dynablock.h" +#include "dynarec_native.h" +#include "../dynablock_private.h" +#include "custommem.h" +#include "x64test.h" +#include "pe_tools.h" + +#include "../dynarec_arch.h" + +#if STEP == 0 + #define EMIT(A) dyn->native_size+=4 + #define SETMARK(A) jmp_df[A] = 1 +#elif STEP == 1 + #define EMIT(A) do {} while (0) + #define SETMARK(A) jmp_df[A] = 0 +#elif STEP == 2 + #define EMIT(A) dyn->native_size+=4 + #define SETMARK(A) jmp_df[A] = dyn->native_size +#elif STEP == 3 + #define MESSAGE(A, ...) \ + do { \ + if (dyn->need_dump) dynarec_log_prefix(0, LOG_NONE, __VA_ARGS__); \ + } while (0) + #define EMIT(A) \ + do{ \ + if(dyn->need_dump) print_opcode(dyn, ninst, (uint32_t)(A)); \ + *(uint32_t*)(dyn->block) = (uint32_t)(A); \ + dyn->block += 4; dyn->native_size += 4; \ + dyn->insts[ninst].size2 += 4; \ + }while(0) + #define SETMARK(A) MESSAGE(LOG_DUMP, "Mark(%d)=%p\n", A, dyn->block) +#else +#error Meh! +#endif +#define STEP_PASS +#include "../dynarec_helper.h" + +/* + Will generate a dynablock that does UpdateFlags. x0 = x64emu_t, x1 = df + So read the current df, set df to None, and jump to the correct function + using a static jump table. + Only x1..x5 regs will be used. No saving of SIMD regs needed. + LR will be used for return, and x0 needs to be corrctly setup as xEmu + flags will be changed +*/ + +void updateflags_pass(dynarec_arm_t* dyn, uint64_t jmp_df[]) +{ + int ninst = 0; + rex_t rex = {0}; + LDRw_U12(x1, xEmu, offsetof(x64emu_t, df)); + STRw_U12(xZR, xEmu, offsetof(x64emu_t, df)); + CMPSw_U12(x1, d_unknown); + Bcond(cLT, 4+4); + RET(xLR); + ADR_S20(x2, 4+8); + ADDx_REG_LSL(x1, x2, x1, 2); + BR(x1); + for(int i=d_none; i<d_unknown; ++i) + B(jmp_df[i] - dyn->native_size); +SETMARK(d_none); + RET(xLR); +SETMARK(d_add8); +SETMARK(d_add8b); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2)); + emit_add8(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_add16); +SETMARK(d_add16b); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2)); + emit_add16(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_add32); +SETMARK(d_add32b); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2)); + rex.w = 0; + emit_add32(dyn, ninst, rex, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_add64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2)); + rex.w = 1; + emit_add32(dyn, ninst, rex, x1, x2, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_and8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0xff); + emit_and8(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_and16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0xffff); + emit_and16(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_and32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0xffffffff); + rex.w = 0; + emit_and32(dyn, ninst, rex, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_and64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV64x(x2, 0xffffffffffffffffULL); + rex.w = 1; + emit_and32(dyn, ninst, rex, x1, x2, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_dec8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1)); + emit_dec8(dyn, ninst, x1, x3, x4); + RET(xLR); +SETMARK(d_dec16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1)); + emit_dec16(dyn, ninst, x1, x3, x4); + RET(xLR); +SETMARK(d_dec32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1)); + rex.w = 0; + emit_dec32(dyn, ninst, rex, x1, x3, x4); + RET(xLR); +SETMARK(d_dec64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1)); + rex.w = 1; + emit_dec32(dyn, ninst, rex, x1, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_inc8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1)); + emit_inc8(dyn, ninst, x1, x3, x4); + RET(xLR); +SETMARK(d_inc16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1)); + emit_inc16(dyn, ninst, x1, x3, x4); + RET(xLR); +SETMARK(d_inc32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1)); + rex.w = 0; + emit_inc32(dyn, ninst, rex, x1, x3, x4); + RET(xLR); +SETMARK(d_inc64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1)); + rex.w = 1; + emit_inc32(dyn, ninst, rex, x1, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_imul8); + LDRSH_U12(x1, xEmu, offsetof(x64emu_t, res)); + ASRxw(x2, x1, 8); + CMPSw_REG_ASR(x2, x1, 16); + CSETw(x3, cNE); + BFIw(xFlags, x3, F_CF, 1); + BFIw(xFlags, x3, F_OF, 1); + if(!BOX64ENV(cputype)) { + LSRw(x2, x1, 7); + BFIw(xFlags, x2, F_SF, 1); + BFCw(xFlags, F_ZF, 1); + BFCw(xFlags, F_AF, 1); + emit_pf(dyn, ninst, x1, x4); + } + RET(xLR); +SETMARK(d_imul16); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + ASRw(x2, x1, 16); + CMPSw_REG_ASR(x2, x1, 31); + CSETw(x3, cNE); + BFIw(xFlags, x3, F_CF, 1); + BFIw(xFlags, x3, F_OF, 1); + if(!BOX64ENV(cputype)) { + LSRw(x2, x1, 15); + BFIw(xFlags, x2, F_SF, 1); + BFCw(xFlags, F_ZF, 1); + BFCw(xFlags, F_AF, 1); + emit_pf(dyn, ninst, x1, x4); + } + RET(xLR); +SETMARK(d_imul32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1)); + CMPSw_REG_ASR(x2, x1, 31); + CSETw(x3, cNE); + BFIw(xFlags, x3, F_CF, 1); + BFIw(xFlags, x3, F_OF, 1); + if(!BOX64ENV(cputype)) { + LSRw(x2, x1, 31); + BFIw(xFlags, x2, F_SF, 1); + BFCw(xFlags, F_ZF, 1); + BFCw(xFlags, F_AF, 1); + emit_pf(dyn, ninst, x1, x4); + } + RET(xLR); +SETMARK(d_imul64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1)); + CMPSx_REG_ASR(x2, x1, 63); + CSETw(x3, cNE); + BFIw(xFlags, x3, F_CF, 1); + BFIw(xFlags, x3, F_OF, 1); + if(!BOX64ENV(cputype)) { + LSRx(x2, x1, 63); + BFIw(xFlags, x2, F_SF, 1); + BFCw(xFlags, F_ZF, 1); + BFCw(xFlags, F_AF, 1); + emit_pf(dyn, ninst, x1, x4); + } + RET(xLR); +SETMARK(d_or8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0); + emit_or8(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_or16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0); + emit_or16(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_or32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0); + rex.w = 0; + emit_or32(dyn, ninst, rex, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_or64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV64x(x2, 0); + rex.w = 1; + emit_or32(dyn, ninst, rex, x1, x2, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_mul8); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + CMPSw_REG_LSR(xZR, x1, 8); + CSETw(x3, cNE); + BFIw(xFlags, x3, F_CF, 1); + BFIw(xFlags, x3, F_OF, 1); + if(!BOX64ENV(cputype)) { + LSRw(x2, x1, 7); + BFIw(xFlags, x2, F_SF, 1); + BFCw(xFlags, F_ZF, 1); + BFCw(xFlags, F_AF, 1); + emit_pf(dyn, ninst, x1, x4); + } + RET(xLR); +SETMARK(d_mul16); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + CMPSw_REG_LSR(xZR, x1, 16); + CSETw(x3, cNE); + BFIw(xFlags, x3, F_CF, 1); + BFIw(xFlags, x3, F_OF, 1); + if(!BOX64ENV(cputype)) { + LSRw(x2, x1, 15); + BFIw(xFlags, x2, F_SF, 1); + BFCw(xFlags, F_ZF, 1); + BFCw(xFlags, F_AF, 1); + emit_pf(dyn, ninst, x1, x4); + } + RET(xLR); +SETMARK(d_mul32); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1)); + CMPSw_U12(x2, 0); + CSETw(x3, cNE); + BFIw(xFlags, x3, F_CF, 1); + BFIw(xFlags, x3, F_OF, 1); + if(!BOX64ENV(cputype)) { + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + LSRw(x2, x1, 31); + BFIw(xFlags, x2, F_SF, 1); + BFCw(xFlags, F_ZF, 1); + BFCw(xFlags, F_AF, 1); + emit_pf(dyn, ninst, x1, x4); + } + RET(xLR); +SETMARK(d_mul64); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1)); + CMPSx_U12(x2, 0); + CSETw(x3, cNE); + BFIw(xFlags, x3, F_CF, 1); + BFIw(xFlags, x3, F_OF, 1); + if(!BOX64ENV(cputype)) { + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + LSRx(x2, x1, 63); + BFIw(xFlags, x2, F_SF, 1); + BFCw(xFlags, F_ZF, 1); + BFCw(xFlags, F_AF, 1); + emit_pf(dyn, ninst, x1, x4); + } + RET(xLR); +SETMARK(d_neg8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1)); + emit_neg8(dyn, ninst, x1, x3, x4); + RET(xLR); +SETMARK(d_neg16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1)); + emit_neg16(dyn, ninst, x1, x3, x4); + RET(xLR); +SETMARK(d_neg32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1)); + rex.w = 0; + emit_neg32(dyn, ninst, rex, x1, x3, x4); + RET(xLR); +SETMARK(d_neg64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1)); + rex.w = 1; + emit_neg32(dyn, ninst, rex, x1, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_shl8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 4); + Bcond(cNE, 4+4); + RET(xLR); + emit_shl8(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_shl16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 4); + Bcond(cNE, 4+4); + RET(xLR); + emit_shl16(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_shl32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 4); + Bcond(cNE, 4+4); + RET(xLR); + rex.w = 0; + emit_shl32(dyn, ninst, rex, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_shl64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 5); + Bcond(cNE, 4+4); + RET(xLR); + rex.w = 1; + emit_shl32(dyn, ninst, rex, x1, x2, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_shr8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 4); + Bcond(cNE, 4+4); + RET(xLR); + emit_shr8(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_shr16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 4); + Bcond(cNE, 4+4); + RET(xLR); + emit_shr16(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_shr32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 4); + Bcond(cNE, 4+4); + RET(xLR); + rex.w = 0; + emit_shr32(dyn, ninst, rex, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_shr64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 5); + Bcond(cNE, 4+4); + RET(xLR); + rex.w = 1; + emit_shr32(dyn, ninst, rex, x1, x2, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_sar8); + LDRSB_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 4); + Bcond(cNE, 4+4); + RET(xLR); + emit_sar8(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_sar16); + LDRSH_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 4); + Bcond(cNE, 4+4); + RET(xLR); + emit_sar16(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_sar32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 4); + Bcond(cNE, 4+4); + RET(xLR); + rex.w = 0; + emit_sar32(dyn, ninst, rex, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_sar64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2)); + ANDSw_mask(x2, x2, 0, 5); + Bcond(cNE, 4+4); + RET(xLR); + rex.w = 1; + emit_sar32(dyn, ninst, rex, x1, x2, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_sub8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2)); + emit_sub8(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_sub16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2)); + emit_sub16(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_sub32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2)); + rex.w = 0; + emit_sub32(dyn, ninst, rex, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_sub64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2)); + rex.w = 1; + emit_sub32(dyn, ninst, rex, x1, x2, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_xor8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0); + emit_xor8(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_xor16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0); + emit_xor16(dyn, ninst, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_xor32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0); + rex.w = 0; + emit_xor32(dyn, ninst, rex, x1, x2, x3, x4); + RET(xLR); +SETMARK(d_xor64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV64x(x2, 0); + rex.w = 1; + emit_xor32(dyn, ninst, rex, x1, x2, x3, x4); + rex.w = 0; + RET(xLR); +SETMARK(d_cmp8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op2)); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + RET(xLR); +SETMARK(d_cmp16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op2)); + emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5); + RET(xLR); +SETMARK(d_cmp32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op2)); + rex.w = 0; + emit_cmp32(dyn, ninst, rex, x1, x2, x3, x4, x5); + RET(xLR); +SETMARK(d_cmp64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op2)); + rex.w = 1; + emit_cmp32(dyn, ninst, rex, x1, x2, x3, x4, x5); + rex.w = 0; + RET(xLR); +SETMARK(d_tst8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0xff); + emit_test8(dyn, ninst, x1, x2, x3, x4, x5); + RET(xLR); +SETMARK(d_tst16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0xffff); + emit_test16(dyn, ninst, x1, x2, x3, x4, x5); + RET(xLR); +SETMARK(d_tst32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV32w(x2, 0xffffffff); + rex.w = 0; + emit_test32(dyn, ninst, rex, x1, x2, x3, x4, x5); + RET(xLR); +SETMARK(d_tst64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + MOV64x(x2, 0xffffffffffffffffULL); + rex.w = 1; + emit_test32(dyn, ninst, rex, x1, x2, x3, x4, x5); + rex.w = 0; + RET(xLR); +// for ADC & SBB, the emit_adcX cannot be used because the CF state is not saved +SETMARK(d_adc8); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + BFXILw(xFlags, x1, 8, 1); //F_CF + LSRw(x2, x1, 7); + BFIw(xFlags, x2, F_SF, 1); + TSTw_mask(x1, 0, 7); // mask 0xff + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRB_U12(x3, xEmu, offsetof(x64emu_t, op2)); + ANDw_REG(x4, x2, x3); // op1 & op2 + ORRw_REG(x2, x2, x3); // op1 | op2 + BICw_REG(x2, x2, x1); // ~res & (op1 | op2) + ORRw_REG(x2, x2, x4); // CC + LSRw(x3, x2, 3); + BFIw(xFlags, x3, F_AF, 1); + LSRw(x3, x2, 6); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_adc16); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + BFXILw(xFlags, x1, 16, 1); //F_CF + LSRw(x2, x1, 15); + BFIw(xFlags, x2, F_SF, 1); + TSTw_mask(x1, 0, 15); // mask 0xffff + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x3, xEmu, offsetof(x64emu_t, op2)); + ANDw_REG(x4, x2, x3); // op1 & op2 + ORRw_REG(x2, x2, x3); // op1 | op2 + BICw_REG(x2, x2, x1); // ~res & (op1 | op2) + ORRw_REG(x2, x2, x4); // CC + LSRw(x3, x2, 3); + BFIw(xFlags, x3, F_AF, 1); + LSRw(x3, x2, 14); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_adc32); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + BFXILx(xFlags, x1, 32, 1); //F_CF + LSRw(x2, x1, 31); + BFIw(xFlags, x2, F_SF, 1); + TSTw_REG(x1, x1); + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2)); + ANDw_REG(x4, x2, x3); // op1 & op2 + ORRw_REG(x2, x2, x3); // op1 | op2 + BICw_REG(x2, x2, x1); // ~res & (op1 | op2) + ORRw_REG(x2, x2, x4); // CC + LSRw(x3, x2, 3); + BFIw(xFlags, x3, F_AF, 1); + LSRw(x3, x2, 30); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_adc32b); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + LSRw(x2, x1, 31); + BFIw(xFlags, x2, F_SF, 1); + TSTw_REG(x1, x1); + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2)); + ADDw_REG(x4, x2, x3); + CMPSw_REG(x1, x4); + CSETw(x4, cNE); + ADDx_REG(x4, x4, x2); + ADDx_REG(x4, x4, x3); + BFXILx(xFlags, x4, 32, 1); //F_CF + ANDw_REG(x4, x2, x3); // op1 & op2 + ORRw_REG(x2, x2, x3); // op1 | op2 + BICw_REG(x2, x2, x1); // ~res & (op1 | op2) + ORRw_REG(x2, x2, x4); // CC + LSRw(x3, x2, 3); + BFIw(xFlags, x3, F_AF, 1); + LSRw(x3, x2, 30); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_adc64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + LSRx(x2, x1, 63); + BFIw(xFlags, x2, F_SF, 1); + TSTx_REG(x1, x1); + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x3, xEmu, offsetof(x64emu_t, op2)); + ADDx_REG(x4, x2, x3); + CMPSx_REG(x1, x4); + CSETw(x4, cNE); + ADDx_UXTW(x4, x4, x2); + ADDx_UXTW(x4, x4, x3); // x4 = lo + LSRx(x4, x4, 32); + ADDx_REG_LSR(x4, x4, x2, 32); + ADDx_REG_LSR(x4, x4, x3, 32); // hi + BFXILx(xFlags, x4, 32, 1); //F_CF + ANDx_REG(x4, x2, x3); // op1 & op2 + ORRx_REG(x2, x2, x3); // op1 | op2 + BICx_REG(x2, x2, x1); // ~res & (op1 | op2) + ORRx_REG(x2, x2, x4); // CC + LSRx(x3, x2, 3); + BFIx(xFlags, x3, F_AF, 1); + LSRx(x3, x2, 62); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_sbb8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, res)); + LSRw(x2, x1, 7); + BFIw(xFlags, x2, F_SF, 1); + TSTw_mask(x1, 0, 7); // mask 0xff + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRB_U12(x3, xEmu, offsetof(x64emu_t, op2)); + BICw_REG(x4, x3, x2); // ~op1 & op2 + ORNw_REG(x2, x3, x2); // ~op1 | op2 + ANDw_REG(x2, x2, x1); // res & (~op1 | op2) + ORRw_REG(x2, x2, x4); // CC + BFXILw(xFlags, x2, 7, 1); + LSRw(x3, x2, 3); + BFIw(xFlags, x3, F_AF, 1); + LSRw(x3, x2, 6); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_sbb16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + LSRw(x2, x1, 15); + BFIw(xFlags, x2, F_SF, 1); + TSTw_mask(x1, 0, 15); // mask 0xffff + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x3, xEmu, offsetof(x64emu_t, op2)); + BICw_REG(x4, x3, x2); // ~op1 & op2 + ORNw_REG(x2, x3, x2); // ~op1 | op2 + ANDw_REG(x2, x2, x1); // res & (~op1 | op2) + ORRw_REG(x2, x2, x4); // CC + BFXILw(xFlags, x2, 15, 1); + LSRw(x3, x2, 3); + BFIw(xFlags, x3, F_AF, 1); + LSRw(x3, x2, 14); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_sbb32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + LSRw(x2, x1, 31); + BFIw(xFlags, x2, F_SF, 1); + TSTw_REG(x1, x1); + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2)); + BICw_REG(x4, x3, x2); // ~op1 & op2 + ORNw_REG(x2, x3, x2); // ~op1 | op2 + ANDw_REG(x2, x2, x1); // res & (~op1 | op2) + ORRw_REG(x2, x2, x4); // CC + BFXILw(xFlags, x2, 31, 1); + LSRw(x3, x2, 3); + BFIw(xFlags, x3, F_AF, 1); + LSRw(x3, x2, 30); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_sbb64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + LSRx(x2, x1, 63); + BFIw(xFlags, x2, F_SF, 1); + TSTx_REG(x1, x1); + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x3, xEmu, offsetof(x64emu_t, op2)); + BICx_REG(x4, x3, x2); // ~op1 & op2 + ORNx_REG(x2, x3, x2); // ~op1 | op2 + ANDx_REG(x2, x2, x1); // res & (~op1 | op2) + ORRx_REG(x2, x2, x4); // CC + BFXILx(xFlags, x2, 63, 1); + LSRw(x3, x2, 3); + BFIw(xFlags, x3, F_AF, 1); + LSRx(x3, x2, 62); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_rol8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, res)); + if(BOX64ENV(cputype)) { + EORw_REG_LSR(x2, x1, x1, 7); + BFIw(xFlags, x2, F_OF, 1); + } else { + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LSLw_IMM(x3, x2, 6); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + } + BFXILw(xFlags, x1, 0, 1); + RET(xLR); +SETMARK(d_rol16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + if(BOX64ENV(cputype)) { + EORw_REG_LSR(x2, x1, x1, 15); + BFIw(xFlags, x2, F_OF, 1); + } else { + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LSLw_IMM(x3, x2, 14); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + } + BFXILw(xFlags, x1, 0, 1); + RET(xLR); +SETMARK(d_rol32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + if(BOX64ENV(cputype)) { + EORw_REG_LSR(x2, x1, x1, 31); + BFIw(xFlags, x2, F_OF, 1); + } else { + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LSLw_IMM(x3, x2, 30); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + } + BFXILw(xFlags, x1, 0, 1); + RET(xLR); +SETMARK(d_rol64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + if(BOX64ENV(cputype)) { + EORx_REG_LSR(x2, x1, x1, 63); + BFIw(xFlags, x2, F_OF, 1); + } else { + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LSLx_IMM(x3, x2, 62); + EORw_REG_LSR(x3, x3, x3, 1); + BFIw(xFlags, x3, F_OF, 1); + } + BFXILw(xFlags, x1, 0, 1); + RET(xLR); +SETMARK(d_ror8); + LDRB_U12(x1, xEmu, offsetof(x64emu_t, res)); + if(BOX64ENV(cputype)) { + LSLw_IMM(x2, x1, 6); + EORw_REG_LSR(x3, x2, x2, 1); + BFIw(xFlags, x3, F_OF, 1); + } else { + LDRB_U12(x2, xEmu, offsetof(x64emu_t, op1)); + EORw_REG_LSR(x3, x2, x2, 7); + BFIw(xFlags, x3, F_OF, 1); + } + BFXILw(xFlags, x1, 7, 1); + RET(xLR); +SETMARK(d_ror16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + if(BOX64ENV(cputype)) { + LSLw_IMM(x2, x1, 14); + EORw_REG_LSR(x3, x2, x2, 1); + BFIw(xFlags, x3, F_OF, 1); + } else { + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1)); + EORw_REG_LSR(x3, x2, x2, 15); + BFIw(xFlags, x3, F_OF, 1); + } + BFXILw(xFlags, x1, 15, 1); + RET(xLR); +SETMARK(d_ror32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + if(BOX64ENV(cputype)) { + LSLw_IMM(x2, x1, 30); + EORw_REG_LSR(x3, x2, x2, 1); + BFIw(xFlags, x3, F_OF, 1); + } else { + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1)); + EORw_REG_LSR(x3, x2, x2, 31); + BFIw(xFlags, x3, F_OF, 1); + } + BFXILw(xFlags, x1, 31, 1); + RET(xLR); +SETMARK(d_ror64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + if(BOX64ENV(cputype)) { + LSLx_IMM(x2, x1, 62); + EORw_REG_LSR(x3, x2, x2, 1); + BFIw(xFlags, x3, F_OF, 1); + } else { + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1)); + EORx_REG_LSR(x3, x2, x2, 63); + BFIw(xFlags, x3, F_OF, 1); + } + BFXILx(xFlags, x1, 63, 1); + RET(xLR); +SETMARK(d_shrd16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x3, xEmu, offsetof(x64emu_t, op2)); + if(BOX64ENV(cputype)) { + LSRw(x4, x1, 14); + EORw_REG_LSR(x4, x4, x4, 1); + BFIw(xFlags, x4, F_OF, 1); + ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10 + } else { + ANDw_mask(x4, x3, 0, 3); // mask = 0x0f + SUBw_U12(x4, x4, 16); + NEGw_REG(x4, x4); + LSRw_REG(x4, x1, x4); + EORw_REG_LSR(x4, x4, x2, 15); + BFIw(xFlags, x4, F_OF, 1); + BFCw(xFlags, F_AF, 1); + } + CBZw(x3, 4+4); + RET(xLR); + SUBw_U12(x4, x3, 1); + LSRw_REG(x4, x2, x4); + if(BOX64ENV(cputype)) { + CMPSw_U12(x3, 15); + CSELw(x4, x4, xZR, cGT); + } + BFIw(xFlags, x4, F_CF, 1); + LSRw(x4, x1, 15); + BFIw(xFlags, x4, F_SF, 1); + TSTw_mask(x1, 0, 15); + CSETw(x4, cEQ); + BFIw(xFlags, x4, F_ZF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_shrd32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2)); + CBNZw(x3, 4+4); + RET(xLR); + if(BOX64ENV(cputype)) { + LSRw(x4, x1, 30); + EORw_REG_LSR(x4, x4, x4, 1); + BFIw(xFlags, x4, F_OF, 1); + ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10 + } else { + SUBw_U12(x4, x3, 32); + NEGw_REG(x4, x4); + LSRw_REG(x4, x1, x4); + EORw_REG_LSR(x4, x4, x2, 31); + BFIw(xFlags, x4, F_OF, 1); + BFCw(xFlags, F_AF, 1); + } + SUBw_U12(x4, x3, 1); + LSRw_REG(x4, x2, x4); + BFIw(xFlags, x4, F_CF, 1); + LSRw(x4, x1, 31); + BFIw(xFlags, x4, F_SF, 1); + TSTw_REG(x1, x1); + CSETw(x4, cEQ); + BFIw(xFlags, x4, F_ZF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_shrd64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x3, xEmu, offsetof(x64emu_t, op2)); + CBNZw(x3, 4+4); + RET(xLR); + if(BOX64ENV(cputype)) { + LSRx(x4, x1, 62); + EORw_REG_LSR(x4, x4, x4, 1); + BFIw(xFlags, x4, F_OF, 1); + ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10 + } else { + SUBw_U12(x4, x3, 64); + NEGw_REG(x4, x4); + LSRx_REG(x4, x1, x4); + EORx_REG_LSR(x4, x4, x2, 63); + BFIw(xFlags, x4, F_OF, 1); + BFCw(xFlags, F_AF, 1); + } + SUBx_U12(x4, x3, 1); + LSRx_REG(x4, x2, x4); + BFIw(xFlags, x4, F_CF, 1); + LSRx(x4, x1, 63); + BFIw(xFlags, x4, F_SF, 1); + TSTx_REG(x1, x1); + CSETw(x4, cEQ); + BFIw(xFlags, x4, F_ZF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_shld16); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, res)); + LDRH_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRH_U12(x3, xEmu, offsetof(x64emu_t, op2)); + CBNZw(x3, 4+4); + RET(xLR); + SUBw_U12(x4, x3, 16); + NEGw_REG(x4, x4); + LSRw_REG(x4, x2, x4); + BFIw(xFlags, x4, F_CF, 1); + if(BOX64ENV(cputype)) { + EORw_REG_LSR(x4, xFlags, x1, 15); + CMPSw_U12(x3, 15); + CSELw(x4, x4, xFlags, cGT); + BFIw(xFlags, x4, F_OF, 1); + ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10 + } else { + LSRw(x4, x2, 14); + EORw_REG_LSR(x4, x4, x4, 1); + BFIw(xFlags, x4, F_OF, 1); + BFCw(xFlags, F_AF, 1); + } + LSRw(x4, x1, 15); + BFIw(xFlags, x4, F_SF, 1); + TSTw_mask(x1, 0, 15); + CSETw(x4, cEQ); + BFIw(xFlags, x4, F_ZF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_shld32); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, res)); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRw_U12(x3, xEmu, offsetof(x64emu_t, op2)); + CBNZw(x3, 4+4); + RET(xLR); + SUBw_U12(x4, x3, 32); + NEGw_REG(x4, x4); + LSRw_REG(x4, x2, x4); + BFIw(xFlags, x4, F_CF, 1); + if(BOX64ENV(cputype)) { + EORw_REG_LSR(x4, xFlags, x1, 31); + BFIw(xFlags, x4, F_OF, 1); + ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10 + } else { + LSRw(x4, x2, 30); + EORw_REG_LSR(x4, x4, x4, 1); + BFIw(xFlags, x4, F_OF, 1); + BFCw(xFlags, F_AF, 1); + } + LSRw(x4, x1, 31); + BFIw(xFlags, x4, F_SF, 1); + TSTw_REG(x1, x1); + CSETw(x4, cEQ); + BFIw(xFlags, x4, F_ZF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +SETMARK(d_shld64); + LDRx_U12(x1, xEmu, offsetof(x64emu_t, res)); + LDRx_U12(x2, xEmu, offsetof(x64emu_t, op1)); + LDRx_U12(x3, xEmu, offsetof(x64emu_t, op2)); + CBNZx(x3, 4+4); + RET(xLR); + MOV32w(x4, 64); + SUBw_REG(x4, x4, x3); + LSRx_REG(x4, x2, x4); + BFIw(xFlags, x4, F_CF, 1); + if(BOX64ENV(cputype)) { + EORx_REG_LSR(x4, xFlags, x1, 63); + BFIw(xFlags, x4, F_OF, 1); + ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10 + } else { + LSRx(x4, x2, 62); + EORw_REG_LSR(x4, x4, x4, 1); + BFIw(xFlags, x4, F_OF, 1); + BFCw(xFlags, F_AF, 1); + } + LSRx(x4, x1, 63); + BFIw(xFlags, x4, F_SF, 1); + TSTx_REG(x1, x1); + CSETw(x4, cEQ); + BFIw(xFlags, x4, F_ZF, 1); + emit_pf(dyn, ninst, x1, x4); + RET(xLR); +// all done! +} \ No newline at end of file diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 761f8166..6df0b53c 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -34,6 +34,8 @@ #define ARCH_UNALIGNED(A, B) arch_unaligned(A, B) extern uint32_t arm64_crc(void* p, uint32_t len); #define ARCH_CRC(A, B) if(cpuext.crc32) return arm64_crc(A, B) +extern void* create_updateflags(); +#define ARCH_UPDATEFLAGS() create_updateflags() #define ARCH_NOP 0b11010101000000110010000000011111 #define ARCH_UDF 0xcafe diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c index 79e558d6..49ea4a40 100644 --- a/src/dynarec/dynarec_native_functions.c +++ b/src/dynarec/dynarec_native_functions.c @@ -38,7 +38,7 @@ void native_fstp(x64emu_t* emu, void* p) void native_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) { (void)emu; - dynarec_log(LOG_DEBUG, "R%lu=0x%lx (%lu)\n", n, reg, reg); + dynarec_log(LOG_INFO, "Debug Register R%lu=0x%lx (%lu)\n", n, reg, reg); } void native_f2xm1(x64emu_t* emu) |