From 65c2b103b16b321a81c8d26db7c228f0ab2d1201 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 17 Oct 2024 15:36:13 +0200 Subject: [ARM64_DYNAREC] Added directmapping of x86 flags to N, V and Z arm64 flags --- src/dynarec/arm64/arm64_emitter.h | 96 ++++++------ src/dynarec/arm64/dynarec_arm64_0f.c | 12 +- src/dynarec/arm64/dynarec_arm64_emit_logic.c | 220 +++++++++++++++++++-------- src/dynarec/arm64/dynarec_arm64_emit_math.c | 172 ++++++++++++++------- src/dynarec/arm64/dynarec_arm64_emit_shift.c | 199 ++++++++++++++++-------- src/dynarec/arm64/dynarec_arm64_emit_tests.c | 76 ++++++--- src/dynarec/arm64/dynarec_arm64_functions.c | 201 ++++++++++++++++++++++++ src/dynarec/arm64/dynarec_arm64_functions.h | 10 ++ src/dynarec/arm64/dynarec_arm64_helper.c | 58 +++++++ src/dynarec/arm64/dynarec_arm64_helper.h | 58 +++++++ src/dynarec/arm64/dynarec_arm64_pass0.h | 13 ++ src/dynarec/arm64/dynarec_arm64_private.h | 22 ++- src/dynarec/dynarec_arch.h | 15 +- src/dynarec/dynarec_native.c | 2 + 14 files changed, 897 insertions(+), 257 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 4dc681e7..f5f0cc96 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -178,35 +178,35 @@ int convert_bitmask(uint64_t bitmask); // ADD / SUB #define ADDSUB_REG_gen(sf, op, S, shift, Rm, imm6, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | (shift)<<22 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd)) #define ADDx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, 0, Rn, Rd)) -#define ADDSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 0, 1, 0b00, Rm, 0, Rn, Rd)) +#define ADDSx_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(1, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, lsl, Rn, Rd)) #define ADDw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, 0, Rn, Rd)) -#define ADDSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd)) +#define ADDSw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, lsl, Rn, Rd)) #define ADDxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd)) #define ADDz_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 0, 0, 0b00, Rm, 0, Rn, Rd)) -#define ADDSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd)) +#define ADDSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b01, Rm, lsr, Rn, Rd)) #define ADDSUB_IMM_gen(sf, op, S, shift, imm12, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b10001<<24 | (shift)<<22 | (imm12)<<10 | (Rn)<<5 | (Rd)) #define ADDx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define ADDSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDSx_U12(Rd, Rn, imm12) FEMIT(ADDSUB_IMM_gen(1, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) #define ADDw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define ADDSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDSw_U12(Rd, Rn, imm12) FEMIT(ADDSUB_IMM_gen(0, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) #define ADDxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define ADDSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDSxw_U12(Rd, Rn, imm12) FEMIT(ADDSUB_IMM_gen(rex.w, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) #define ADDz_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.is32bits?0:1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd)) -#define SUBSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd)) +#define SUBSx_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd)) #define SUBx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, lsl, Rn, Rd)) #define SUBw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, lsl, Rn, Rd)) -#define SUBSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd)) -#define SUBSw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd)) +#define SUBSw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd)) +#define SUBSw_REG_LSL(Rd, Rn, Rm, lsl) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd)) #define SUBxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBz_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 1, 0, 0b00, Rm, 0, Rn, Rd)) -#define SUBSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd)) +#define SUBSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd)) #define CMPSx_REG(Rn, Rm) SUBSx_REG(xZR, Rn, Rm) #define CMPSw_REG(Rn, Rm) SUBSw_REG(wZR, Rn, Rm) #define CMPSxw_REG(Rn, Rm) SUBSxw_REG(xZR, Rn, Rm) @@ -218,12 +218,12 @@ int convert_bitmask(uint64_t bitmask); #define NEGSxw_REG(Rd, Rm) SUBSxw_REG(Rd, xZR, Rm); #define SUBx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define SUBSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBSx_U12(Rd, Rn, imm12) FEMIT(ADDSUB_IMM_gen(1, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define SUBSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBSw_U12(Rd, Rn, imm12) FEMIT(ADDSUB_IMM_gen(0, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBz_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.is32bits?0:1, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) -#define SUBSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBSxw_U12(Rd, Rn, imm12) FEMIT(ADDSUB_IMM_gen(rex.w, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) #define CMPSx_U12(Rn, imm12) SUBSx_U12(xZR, Rn, imm12) #define CMPSw_U12(Rn, imm12) SUBSw_U12(wZR, Rn, imm12) #define CMPSxw_U12(Rn, imm12) SUBSxw_U12(xZR, Rn, imm12) @@ -235,18 +235,18 @@ int convert_bitmask(uint64_t bitmask); #define SBCx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 1, 0, Rm, Rn, Rd)) #define SBCw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 1, 0, Rm, Rn, Rd)) #define SBCxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 1, 0, Rm, Rn, Rd)) -#define ADCSx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 0, 1, Rm, Rn, Rd)) -#define ADCSw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 0, 1, Rm, Rn, Rd)) -#define ADCSxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 0, 1, Rm, Rn, Rd)) -#define SBCSx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 1, 1, Rm, Rn, Rd)) -#define SBCSw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 1, 1, Rm, Rn, Rd)) -#define SBCSxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 1, 1, Rm, Rn, Rd)) +#define ADCSx_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(1, 0, 1, Rm, Rn, Rd)) +#define ADCSw_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(0, 0, 1, Rm, Rn, Rd)) +#define ADCSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(rex.w, 0, 1, Rm, Rn, Rd)) +#define SBCSx_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(1, 1, 1, Rm, Rn, Rd)) +#define SBCSw_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(0, 1, 1, Rm, Rn, Rd)) +#define SBCSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(rex.w, 1, 1, Rm, Rn, Rd)) // CCMP compare if cond is true, set nzcv if false #define CCMP_reg(sf, Rm, cond, Rn, nzcv) ((sf)<<31 | 1<<30 | 1<<29 | 0b11010010<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | (nzcv)) -#define CCMPw(Wn, Wm, nzcv, cond) EMIT(CCMP_reg(0, Wm, cond, Wn, nzcv)) -#define CCMPx(Xn, Xm, nzcv, cond) EMIT(CCMP_reg(1, Xm, cond, Xn, nzcv)) -#define CCMPxw(Xn, Xm, nzcv, cond) EMIT(CCMP_reg(rex.w, Xm, cond, Xn, nzcv)) +#define CCMPw(Wn, Wm, nzcv, cond) FEMIT(CCMP_reg(0, Wm, cond, Wn, nzcv)) +#define CCMPx(Xn, Xm, nzcv, cond) FEMIT(CCMP_reg(1, Xm, cond, Xn, nzcv)) +#define CCMPxw(Xn, Xm, nzcv, cond) FEMIT(CCMP_reg(rex.w, Xm, cond, Xn, nzcv)) // ADR #define ADR_gen(immlo, immhi, Rd) ((immlo)<<29 | 0b10000<<24 | (immhi)<<5 | (Rd)) @@ -570,9 +570,9 @@ int convert_bitmask(uint64_t bitmask); #define ANDx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b00, N, immr, imms, Rn, Rd)) #define ANDw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b00, 0, immr, imms, Rn, Rd)) #define ANDxw_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(rex.w, 0b00, rex.w?(N):0, immr, imms, Rn, Rd)) -#define ANDSx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b11, N, immr, imms, Rn, Rd)) -#define ANDSw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd)) -#define ANDSxw_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(rex.w, 0b11, rex.w?(N):0, immr, imms, Rn, Rd)) +#define ANDSx_mask(Rd, Rn, N, immr, imms) FEMIT(LOGIC_gen(1, 0b11, N, immr, imms, Rn, Rd)) +#define ANDSw_mask(Rd, Rn, immr, imms) FEMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd)) +#define ANDSxw_mask(Rd, Rn, N, immr, imms) FEMIT(LOGIC_gen(rex.w, 0b11, rex.w?(N):0, immr, imms, Rn, Rd)) #define ORRx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b01, N, immr, imms, Rn, Rd)) #define ORRw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b01, 0, immr, imms, Rn, Rd)) #define ORRxw_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(rex.w, 0b01, rex.w?(N):0, immr, imms, Rn, Rd)) @@ -588,9 +588,9 @@ int convert_bitmask(uint64_t bitmask); #define ANDw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 0, Rm, 0, Rn, Rd)) #define ANDw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(0, 0b00, 0b01, 0, Rm, lsr, Rn, Rd)) #define ANDxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 0, Rm, 0, Rn, Rd)) -#define ANDSx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) -#define ANDSw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) -#define ANDSxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) +#define ANDSx_REG(Rd, Rn, Rm) FEMIT(LOGIC_REG_gen(1, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) +#define ANDSw_REG(Rd, Rn, Rm) FEMIT(LOGIC_REG_gen(0, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) +#define ANDSxw_REG(Rd, Rn, Rm) FEMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 0, Rm, 0, Rn, Rd)) #define ORRx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, 0, Rn, Rd)) #define ORRx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, lsl, Rn, Rd)) #define ORRw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 0, Rm, lsl, Rn, Rd)) @@ -641,10 +641,10 @@ int convert_bitmask(uint64_t bitmask); #define BICx(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) #define BICw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) #define BICw_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, lsl, Rn, Rd)) -#define BICSx(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b11, 0b00, 1, Rm, 0, Rn, Rd)) -#define BICSw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b11, 0b00, 1, Rm, 0, Rn, Rd)) -#define BICxw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) -#define BICSxw(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICSx(Rd, Rn, Rm) FEMIT(LOGIC_REG_gen(1, 0b11, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICSw(Rd, Rn, Rm) FEMIT(LOGIC_REG_gen(0, 0b11, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICxw(Rd, Rn, Rm) FEMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) +#define BICSxw(Rd, Rn, Rm) FEMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 1, Rm, 0, Rn, Rd)) #define BICx_REG BICx #define BICw_REG BICw #define BICxw_REG BICxw @@ -822,7 +822,7 @@ int convert_bitmask(uint64_t bitmask); // MRS : from System register #define MRS_nzvc(Rt) EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt)) // MSR : to System register -#define MSR_nzvc(Rt) EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt)) +#define MSR_nzvc(Rt) FEMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt)) // mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=0 #define MRS_fpcr(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt)) #define MSR_fpcr(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt)) @@ -849,6 +849,14 @@ int convert_bitmask(uint64_t bitmask); #define FPSR_DZC 1 // NEON Invalid Operation Cumulative #define FPSR_IOC 0 +// NZCV N +#define NZCV_N 31 +// NZCV Z +#define NZCV_Z 30 +// NZCV C +#define NZCV_C 29 +// NZCV V +#define NZCV_V 28 // FCSEL #define FCSEL_scalar(type, Rm, cond, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | (cond)<<12 | 0b11<<10 | (Rn)<<5 | (Rd)) @@ -1273,10 +1281,10 @@ int convert_bitmask(uint64_t bitmask); // CMP #define FCMP_scalar(type, Rn, Rm, opc) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b1000<<10 | (Rn)<<5 | (opc)<<3) -#define FCMPS(Sn, Sm) EMIT(FCMP_scalar(0b00, Sn, Sm, 0b00)) -#define FCMPD(Dn, Dm) EMIT(FCMP_scalar(0b01, Dn, Dm, 0b00)) -#define FCMPS_0(Sn) EMIT(FCMP_scalar(0b00, Sn, 0, 0b01)) -#define FCMPD_0(Dn) EMIT(FCMP_scalar(0b01, Dn, 0, 0b01)) +#define FCMPS(Sn, Sm) FEMIT(FCMP_scalar(0b00, Sn, Sm, 0b00)) +#define FCMPD(Dn, Dm) FEMIT(FCMP_scalar(0b01, Dn, Dm, 0b00)) +#define FCMPS_0(Sn) FEMIT(FCMP_scalar(0b00, Sn, 0, 0b01)) +#define FCMPD_0(Dn) FEMIT(FCMP_scalar(0b01, Dn, 0, 0b01)) // CVT #define FCVT_scalar(sf, type, rmode, opcode, Rn, Rd) ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd)) @@ -2202,7 +2210,7 @@ int convert_bitmask(uint64_t bitmask); #define LDSETLH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b011, Rn, Rt)) #define STSETH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b011, Rn, 0b11111)) #define STSETLH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b011, Rn, 0b11111)) -// Atomic Signel Max +// Atomic Signed Max #define LDSMAXxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b100, Rn, Rt)) #define LDSMAXAxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b100, Rn, Rt)) #define LDSMAXALxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b100, Rn, Rt)) @@ -2318,23 +2326,23 @@ int convert_bitmask(uint64_t bitmask); // FLAGM extension // Invert Carry Flag -#define CFINV() EMIT(0b1101010100<<22 | 0b0100<<12 | 0b000<<5 | 0b11111) +#define CFINV() FEMIT(0b1101010100<<22 | 0b0100<<12 | 0b000<<5 | 0b11111) #define RMIF_gen(imm6, Rn, mask) (0b10111010000<<21 | (imm6)<<15 | 0b00001<<10 | (Rn)<<5 | (mask)) // Rotate right reg and use as NZCV -#define RMIF(Xn, shift, mask) EMIT(RMIF_gen(shift, Xn, mask)) +#define RMIF(Xn, shift, mask) FEMIT(RMIF_gen(shift, Xn, mask)) #define SETF_gen(sz, Rn) (0b00111010000<<21 | (sz)<<14 | 0b0010<<10 | (Rn)<<5 | 0b1101) // Set NZVc with 8bit value of reg: N=bit7, Z=[0..7]==0, V=bit8 eor bit7, C unchanged -#define SETF8(Wn) EMIT(SETF_gen(0, Wn)) +#define SETF8(Wn) FEMIT(SETF_gen(0, Wn)) // Set NZVc with 16bit value of reg: N=bit15, Z=[0..15]==0, V=bit16 eor bit15, C unchanged -#define SETF16(Wn) EMIT(SETF_gen(1, Wn)) +#define SETF16(Wn) FEMIT(SETF_gen(1, Wn)) // FLAGM2 extension // NZCV -> N=0 Z=C|V C=C&!V V=0 -#define AXFLAG() EMIT(0b1101010100<<22 | 0b0100<<12 | 0b010<<5 | 0b11111) +#define AXFLAG() FEMIT(0b1101010100<<22 | 0b0100<<12 | 0b010<<5 | 0b11111) // NZCV -> N=!C&!Z Z=Z&C C=C|Z V=!C&Z -#define XAFLAG() EMIT(0b1101010100<<22 | 0b0100<<12 | 0b001<<5 | 0b11111) +#define XAFLAG() FEMIT(0b1101010100<<22 | 0b0100<<12 | 0b001<<5 | 0b11111) // FRINTTS extension #define FRINTxx_scalar(type, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b0100<<17 | (op)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 38a705b5..b3b2ac34 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -2162,8 +2162,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CLZxw(gd, x1); // x2 gets leading 0 == BSF MARK; IFX(X_ZF) { - CSETw(x1, cEQ); //other flags are undefined - BFIw(xFlags, x1, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETw(x1, cEQ); //other flags are undefined + BFIw(xFlags, x1, F_ZF, 1); + } } break; case 0xBD: @@ -2180,8 +2182,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin NEGxw_REG(gd, gd); // complement MARK; IFX(X_ZF) { - CSETw(x1, cEQ); //other flags are undefined - BFIw(xFlags, x1, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETw(x1, cEQ); //other flags are undefined + BFIw(xFlags, x1, F_ZF, 1); + } } break; case 0xBE: diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c index dcdffdab..5b9a08f2 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_logic.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c @@ -34,18 +34,33 @@ void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } + int need_tst = 0; + IFX(X_ZF) need_tst = 1; + IFXNATIVE(X_SF, NF_SF) need_tst = 1; + IFXNATIVE(X_OF, NF_VF) need_tst = 1; + if(need_tst) TSTxw_REG(s1, s1); IFX(X_CF | X_AF | X_OF) { - MOV32w(s3, (1<>6)&0x3F); } else { ANDw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F); } } else { MOV32w(s3, c&0xff); - IFX(X_ZF) { + IFX(X_ZF|X_SF) { ANDSw_REG(s1, s1, s3); } else { ANDw_REG(s1, s1, s3); @@ -399,12 +487,16 @@ void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4 BICw(xFlags, xFlags, s3); } IFX(X_ZF) { - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } } IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); + IFNATIVE(NF_SF) {} else { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } } IFX(X_PF) { emit_pf(dyn, ninst, s1, s4); @@ -571,13 +663,19 @@ void emit_and16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) MOV32w(s3, (1<0 - BFCw(xFlags, F_SF, 1); + IFNATIVE(NF_SF) {} else { + // no sign if c>0 + BFCw(xFlags, F_SF, 1); + } } if(box64_dynarec_test) IFX(X_AF) { @@ -259,14 +287,21 @@ void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } + int need_tst = 0; + IFX(X_ZF) need_tst = 1; + IFXNATIVE(X_SF, NF_SF) need_tst = 1; + if(need_tst) TSTxw_REG(s1, s1); IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } } IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); + IFNATIVE(NF_SF) {} else { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } } IFX(X_OF) if(c==1) { @@ -365,7 +400,11 @@ void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s LSLw(s3, s1, c-1); BFXILw(xFlags, s3, 7, 1); // insert F_CF from s3[7:1] } - MOVw_REG(s1, xZR); + IFXNATIVE(X_ZF|X_SF, NF_EQ|NF_SF) { + SUBSw_REG(s1, s1, s1); + } else { + MOVw_REG(s1, xZR); + } IFX(X_PEND) { STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); } @@ -373,15 +412,17 @@ void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s BFCw(xFlags, F_OF, 1); } IFX(X_SF) { - BFCw(xFlags, F_SF, 1); + IFNATIVE(NF_SF) {} else BFCw(xFlags, F_SF, 1); } if(box64_dynarec_test) IFX(X_AF) { BFCw(xFlags, F_AF, 1); } IFX(X_PF | X_ZF) { - MOV32w(s3, 1); - IFX(X_ZF) { - BFIw(xFlags, s3, F_ZF, 1); + IFNATIVE(NF_EQ) {IFX(X_PF) {MOV32w(s3, 1);}} else { + MOV32w(s3, 1); + IFX(X_ZF) { + BFIw(xFlags, s3, F_ZF, 1); + } } IFX(X_PF) { BFIw(xFlags, s3, F_PF, 1); @@ -490,8 +531,8 @@ void emit_sar8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } COMP_ZFSF(s1, 8) IFX(X_OF) { - CMPSw_U12(s2, 1); - Bcond(cNE, 4+4); + SUBw_U12(s4, s2, 1); + CBNZw(s4, 4+4); BFCw(xFlags, F_OF, 1); } if(box64_dynarec_test) @@ -632,7 +673,11 @@ void emit_shl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int LSLw(s3, s1, c-1); BFXILw(xFlags, s3, 15, 1); // insert F_CF from s3[15:1] } - MOVw_REG(s1, xZR); + IFXNATIVE(X_ZF|X_SF, NF_EQ|NF_SF) { + SUBSw_REG(s1, s1, s1); + } else { + MOVw_REG(s1, xZR); + } IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } @@ -640,12 +685,17 @@ void emit_shl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int BFCw(xFlags, F_OF, 1); } IFX(X_SF) { - BFCw(xFlags, F_SF, 1); + IFNATIVE(NF_SF) {} else BFCw(xFlags, F_SF, 1); + } + if(box64_dynarec_test) IFX(X_AF) { + BFCw(xFlags, F_AF, 1); } IFX(X_PF | X_ZF) { - MOV32w(s3, 1); - IFX(X_ZF) { - BFIw(xFlags, s3, F_ZF, 1); + IFNATIVE(NF_EQ) {IFX(X_PF) {MOV32w(s3, 1);}} else { + MOV32w(s3, 1); + IFX(X_ZF) { + BFIw(xFlags, s3, F_ZF, 1); + } } IFX(X_PF) { BFIw(xFlags, s3, F_PF, 1); @@ -670,8 +720,8 @@ void emit_shr16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) BFIw(xFlags, s4, 0, 1); } IFX(X_OF) { - CMPSw_U12(s2, 1); // if s2==1 - Bcond(cNE, 4+2*4); + SUBw_U12(s4, s2, 1); // if s2==1 + CBNZw(s4, 4+2*4); LSRw(s4, s1, 15); BFIw(xFlags, s4, F_OF, 1); } @@ -751,8 +801,8 @@ void emit_sar16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } COMP_ZFSF(s1, 16) IFX(X_OF) { - CMPSw_U12(s2, 1); - Bcond(cNE, 4+4); + SUBw_U12(s4, s2, 1); + CBNZw(s4, 4+4); BFCw(xFlags, F_OF, 1); } if(box64_dynarec_test) @@ -1154,14 +1204,21 @@ void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } + int need_tst = 0; + IFX(X_ZF) need_tst = 1; + IFXNATIVE(X_SF, NF_SF) need_tst = 1; + if(need_tst) TSTxw_REG(s1, s1); IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } } IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); + IFNATIVE(NF_SF) {} else { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } } IFX(X_OF) { if(c==1) { @@ -1257,18 +1314,25 @@ void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } + int need_tst = 0; + IFX(X_ZF) need_tst = 1; + IFXNATIVE(X_SF, NF_SF) need_tst = 1; + if(need_tst) TSTxw_REG(s1, s1); IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } } IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIw(xFlags, s4, F_SF, 1); + IFNATIVE(NF_SF) {} else { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } } IFX(X_OF) { - CMPSw_U12(s5, 1); - Bcond(cNE, 4+2*4); + SUBw_U12(s5, s5, 1); + CBNZw(s5, 4+2*4); //flagless jump UBFXx(s3, s1, rex.w?63:31, 1); EORw_REG_LSL(xFlags, xFlags, s3, F_OF); // OF is set if sign changed } @@ -1303,18 +1367,25 @@ void emit_shld32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } + int need_tst = 0; + IFX(X_ZF) need_tst = 1; + IFXNATIVE(X_SF, NF_SF) need_tst = 1; + if(need_tst) TSTxw_REG(s1, s1); IFX(X_ZF) { - TSTxw_REG(s1, s1); - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } } IFX(X_SF) { - LSRxw(s4, s1, (rex.w)?63:31); - BFIx(xFlags, s4, F_SF, 1); + IFNATIVE(NF_SF) {} else { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } } IFX(X_OF) { - CMPSw_U12(s5, 1); - Bcond(cNE, 4+2*4); + SUBw_U12(s5, s5, 1); + CBNZw(s5, 4+2*4); //flagless jump UBFXx(s3, s1, rex.w?63:31, 1); EORw_REG_LSL(xFlags, xFlags, s3, F_OF); // OF is set if sign changed } @@ -1394,8 +1465,8 @@ void emit_shrd16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, } COMP_ZFSF(s1, 16) IFX(X_OF) { - CMPSw_U12(s5, 1); - Bcond(cNE, 4+2*4); + SUBw_U12(s5, s5, 1); + CBNZw(s5, 4+2*4); UBFXw(s3, s1, 15, 1); EORw_REG_LSL(xFlags, xFlags, s3, F_OF); // OF is set if sign changed } @@ -1485,8 +1556,8 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, } COMP_ZFSF(s1, 16) IFX(X_OF) { - CMPSw_U12(s5, 1); - Bcond(cNE, 4+2*4); + SUBw_U12(s5, s5, 1); + CBNZw(s5, 4+2*4); UBFXw(s3, s1, 15, 1); EORw_REG_LSL(xFlags, xFlags, s3, F_OF); // OF is set if sign changed } diff --git a/src/dynarec/arm64/dynarec_arm64_emit_tests.c b/src/dynarec/arm64/dynarec_arm64_emit_tests.c index 6a78ce8e..51c9f1bf 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_tests.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_tests.c @@ -47,8 +47,10 @@ void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 BFIx(xFlags, s4, F_AF, 1); // AF: bc & 0x08 } IFX(X_ZF) { - CSETw(s4, cEQ); - BFIw(xFlags, s4, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } } IFX(X_CF) { // inverted carry @@ -56,12 +58,16 @@ void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 BFIw(xFlags, s4, F_CF, 1); } IFX(X_OF) { - CSETw(s4, cVS); - BFIw(xFlags, s4, F_OF, 1); + IFNATIVE(NF_VF) {} else { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } } IFX(X_SF) { - LSRxw(s3, s5, (rex.w)?63:31); - BFIw(xFlags, s3, F_SF, 1); + IFNATIVE(NF_SF) {} else { + LSRxw(s3, s5, (rex.w)?63:31); + BFIw(xFlags, s3, F_SF, 1); + } } IFX(X_PF) { emit_pf(dyn, ninst, s5, s4); @@ -84,16 +90,27 @@ void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int // and now the tricky ones (and mostly unused), PF and AF // bc = (res & (~d | s)) | (~d & s) => is 0 here... IFX(X_OF|X_AF|X_CF) { - MOV32w(s4, (1<insts[ninst].x64.need_before, dyn->insts[ninst].x64.need_after, dyn->smwrite, dyn->insts[ninst].will_write, dyn->insts[ninst].last_write); + if(dyn->insts[ninst].nat_flags_op) { + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && dyn->insts[ninst].before_nat_flags) + printf_log(LOG_NONE, " NF:%d/read:%x", dyn->insts[ninst].nat_flags_op, dyn->insts[ninst].before_nat_flags); + else + printf_log(LOG_NONE, " NF:%d", dyn->insts[ninst].nat_flags_op); + } + if(dyn->insts[ninst].use_nat_flags || dyn->insts[ninst].set_nat_flags || dyn->insts[ninst].need_nat_flags) + printf_log(LOG_NONE, " nf:%hhx/%hhx/%hhx", dyn->insts[ninst].set_nat_flags, dyn->insts[ninst].use_nat_flags, dyn->insts[ninst].need_nat_flags); if(dyn->insts[ninst].pred_sz) { dynarec_log(LOG_NONE, ", pred="); for(int ii=0; iiinsts[ninst].pred_sz; ++ii) @@ -796,3 +804,196 @@ int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st) { return (dyn->n.tags&(0b11<<(st*2)))?1:0; } + + +uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag) +{ + if(dyn->insts[ninst].x64.set_flags) { + dyn->insts[ninst].set_nat_flags |= flag; + if(dyn->insts[ninst].x64.use_flags) { + dyn->insts[ninst].use_nat_flags |= flag; + } + } else { + dyn->insts[ninst].use_nat_flags |= flag; + } + return flag; +} + +uint8_t flag2native(uint8_t flags) +{ + uint8_t ret = 0; + #ifdef ARM64 + if(flags&X_ZF) ret|=NF_EQ; + if(flags&X_SF) ret|=NF_SF; + if(flags&X_OF) ret|=NF_VF; + #else + // no native flags on rv64 or la64 + #endif + return ret; +} + +static int markNativeFlags(dynarec_native_t* dyn, int ninst, uint8_t flags, int start) +{ + while(ninst>=0) { +//printf_log(LOG_INFO, "markNativeFlags ninst=%d, flags=%x, start=%d, nat_flags_op=%d, need_nat_flag=%x, flag_gen=%x need_before=%x need_after=%x\n", ninst, flags, start, dyn->insts[ninst].nat_flags_op, dyn->insts[ninst].need_nat_flags, dyn->insts[ninst].x64.gen_flags, flag2native(dyn->insts[ninst].x64.need_before), flag2native(dyn->insts[ninst].x64.need_after)); + // propagation already done + uint8_t flag_entry = (start && dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)?dyn->insts[ninst].before_nat_flags:dyn->insts[ninst].need_nat_flags; + if((flag_entry&flags)==flags) return flag_entry; + // no more flag propagation + if(!start && !flag2native(dyn->insts[ninst].x64.need_after)) return flags; + // flags destroyed, cancel native flags + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_UNUSABLE) return 0; + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_CANCELED) return 0; + if(start) { + start = 0; + flags |= flag2native(dyn->insts[ninst].x64.need_before); + } else if(dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags)&flags)) { + // this is the emitter of the native flags! so, is it good or not? + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && (dyn->insts[ninst].set_nat_flags&flags)==flags) { + dyn->insts[ninst].need_nat_flags |= flags; + if(!dyn->insts[ninst].x64.may_set) // if flags just may be set, continue! + return flags; + } else + return 0; + } + if(dyn->insts[ninst].use_nat_flags) + flags |= dyn->insts[ninst].use_nat_flags; + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) // can happens on operation that read and generate flags + dyn->insts[ninst].before_nat_flags |= flags; + else + dyn->insts[ninst].need_nat_flags |= flags; + flags |= flag2native(dyn->insts[ninst].x64.need_before); + if(!dyn->insts[ninst].pred_sz) + return 0; + for(int i=1; iinsts[ninst].pred_sz; ++i) { + int ret_flags = markNativeFlags(dyn, dyn->insts[ninst].pred[i], flags, 0); + if(!ret_flags) + return 0; + flags|=ret_flags; + } + ninst = dyn->insts[ninst].pred[0]; + } + return 0; +} + +static void unmarkNativeFlags(dynarec_native_t* dyn, int ninst, int start) +{ +//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, will check forward the real start\n", ninst); + // need to check if branch also goes forward to really start from the beggining + while((ninstsize) && dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].nat_flags_op && dyn->insts[ninst+1].before_nat_flags) + ninst++; + + while(ninst>=0) { +//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, start=%d\n", ninst, start); + // no more flag propagation + if(!start && !flag2native(dyn->insts[ninst].x64.need_after)) return; + // flags destroyed, but maybe it's be used + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_UNUSABLE) return; + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_CANCELED) return; + if(start) + start = 0; + else if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) { + if(!dyn->insts[ninst].x64.may_set) { + dyn->insts[ninst].need_nat_flags = 0; + dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_CANCELED; + return; + } + } + dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_CANCELED; + #if 0 + // check forward + if(dyn->insts[ninst].x64.has_next && dyn->insts[ninst+1].need_nat_flags) + unmarkNativeFlags(dyn, ninst+1, 1); + if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1) { + int jmp = dyn->insts[ninst].x64.jmp_insts; + if(dyn->insts[jmp].need_nat_flags) + unmarkNativeFlags(dyn, jmp, 1); + } + #endif + // check if stop + if(((dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)?dyn->insts[ninst].before_nat_flags:dyn->insts[ninst].need_nat_flags)==0) + return; + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) // can happens on operation that read and generate flags + dyn->insts[ninst].before_nat_flags = 0; + else + dyn->insts[ninst].need_nat_flags = 0; + if(!flag2native(dyn->insts[ninst].x64.need_before)) return; + if(!dyn->insts[ninst].pred_sz) + return; + for(int i=1; iinsts[ninst].pred_sz; ++i) + unmarkNativeFlags(dyn, dyn->insts[ninst].pred[i], 0); + if(!dyn->insts[ninst].x64.has_next) + return; + ninst = dyn->insts[ninst].pred[0]; + } +} + +static void propagateNativeFlags(dynarec_native_t* dyn, int ninst) +{ + uint8_t flags = dyn->insts[ninst].use_nat_flags&flag2native(dyn->insts[ninst].x64.need_before); + uint8_t flags_after = flag2native(dyn->insts[ninst].x64.need_after); + int marked_flags = markNativeFlags(dyn, ninst, flags, 1); + if(!marked_flags) { +//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, because marked_flags is 0\n", ninst); + unmarkNativeFlags(dyn, ninst, 1); + return; + } + uint8_t need_flags; + // check if all next have the correct flag, or if using non-native flags while native are used + if(dyn->insts[ninst].x64.has_next && (flags_after&marked_flags)) { + need_flags = dyn->insts[ninst+1].nat_flags_op?dyn->insts[ninst+1].before_nat_flags:dyn->insts[ninst+1].need_nat_flags; // native flags used + flags_after = flag2native(dyn->insts[ninst+1].x64.need_before)&~need_flags; // flags that are needs to be x86 + if((need_flags&~marked_flags) || (!need_flags && (flags_after&marked_flags))) { +//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, because: need_flags=%hhx, flag_after=%hhx, marked_flags=%hhx\n", ninst, need_flags, flags_after, marked_flags); + unmarkNativeFlags(dyn, ninst, 1); + return; + } + } + #if 0 + // check at jump point, as native flags are not converted + int jmp = dyn->insts[ninst].x64.jmp_insts; + if(dyn->insts[ninst].x64.jmp && jmp!=-1) { + need_flags = dyn->insts[jmp].need_nat_flags; + flags_after = flag2native(dyn->insts[jmp].x64.need_before); + if(((need_flags&flags_after)!=need_flags) || (!need_flags && (flags_after&marked_flags))) { + unmarkNativeFlags(dyn, ninst, 1); + return; + } + } + #endif +} + +void updateNatveFlags(dynarec_native_t* dyn) +{ + // backward check if native flags are used + for(int ninst=dyn->size-1; ninst>=0; --ninst) + if(dyn->insts[ninst].use_nat_flags) { + propagateNativeFlags(dyn, ninst); + } +} + +void rasNativeState(dynarec_arm_t* dyn, int ninst) +{ + dyn->insts[ninst].nat_flags_op = dyn->insts[ninst].set_nat_flags = dyn->insts[ninst].use_nat_flags = dyn->insts[ninst].need_nat_flags = 0; +} + +int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst) +{ + int jmp = dyn->insts[ninst].x64.jmp_insts; + if(jmp<0) + return 0; + if(!dyn->insts[ninst].x64.need_after || !dyn->insts[jmp].x64.need_before) + return 0; + if(dyn->insts[ninst].set_nat_flags) + return 0; + uint8_t flags_before = dyn->insts[ninst].need_nat_flags; + uint8_t flags_after = dyn->insts[jmp].need_nat_flags; + if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH) + flags_after = dyn->insts[jmp].before_nat_flags; + uint8_t flags_x86 = flag2native(dyn->insts[jmp].x64.need_before); + flags_x86 &= ~flags_after; + // all flags_after should be present and none remaining flags_x86 + if(((flags_before&flags_after)!=flags_after) || (flags_before&flags_x86)) + return 1; + return 0; +} \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index e87da74c..446c1cb1 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -51,6 +51,16 @@ int neoncache_combine_st(dynarec_arm_t* dyn, int ninst, int a, int b); // with // Do not allow i64 type int neoncache_no_i64(dynarec_arm_t* dyn, int ninst, int st, int a); +// transform x86 flags to native flags +uint8_t flag2native(uint8_t flags); +// mark a instruction as using/generating flags. return flag +uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag); +// propage the use of nativeflags or not (done between step 0 and step 1) +void updateNatveFlags(dynarec_arm_t* dyn); +// raz arm speicifc state when an opcode is unused +void rasNativeState(dynarec_arm_t* dyn, int ninst); +// check if natives flags needs some tranform to/from x86 flags +int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst); // FPU Cache transformation (for loops) // Specific, need to be written by backend int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index f4497b61..c6493015 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -754,6 +754,9 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is32bits, int is64bits) void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg) { MAYUSE(fnc); + #if STEP == 0 + dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE; + #endif if(savereg==0) savereg = 7; if(saveflags) { @@ -799,6 +802,9 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w) { MAYUSE(fnc); + #if STEP == 0 + dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE; + #endif STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); fpu_pushcache(dyn, ninst, x3, 1); // x9..x15, x16,x17,x18 those needs to be saved by caller @@ -2380,11 +2386,63 @@ static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1) } } +static void nativeFlagsTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2) +{ + int j64; + int jmp = dyn->insts[ninst].x64.jmp_insts; + if(jmp<0) + return; + uint8_t flags_before = dyn->insts[ninst].need_nat_flags; + uint8_t flags_after = dyn->insts[jmp].need_nat_flags; + if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH) + flags_after = dyn->insts[jmp].before_nat_flags; + uint8_t flags_x86 = flag2native(dyn->insts[jmp].x64.need_before); + flags_x86 &= ~flags_after; + MESSAGE(LOG_DUMP, "\tFNative flags transform ---- ninst=%d -> %d %hhx -> %hhx/%hhx\n", ninst, jmp, flags_before, flags_after, flags_x86); + // flags present in before and missing in after + if((flags_before&NF_EQ) && (flags_x86&NF_EQ)) { + CSETw(s1, cEQ); + BFIw(xFlags, s1, F_ZF, 1); + } + if((flags_before&NF_SF) && (flags_x86&NF_SF)) { + CSETw(s1, cMI); + BFIw(xFlags, s1, F_SF, 1); + } + if((flags_before&NF_VF) && (flags_x86&NF_VF)) { + CSETw(s1, cVS); + BFIw(xFlags, s1, F_OF, 1); + } + // flags missing and needed later + int mrs = 0; + #define GO_MRS(A) if(!mrs) {mrs=1; MRS_nzvc(s2); } + if(!(flags_before&NF_EQ) && (flags_after&NF_EQ)) { + GO_MRS(s2); + BFIw(s1, xFlags, F_ZF, 1); + BFIx(s2, s1, NZCV_Z, 1); + } + if(!(flags_before&NF_SF) && (flags_after&NF_SF)) { + GO_MRS(s2); + BFIw(s1, xFlags, F_SF, 1); + BFIx(s2, s1, NZCV_N, 1); + } + if(!(flags_before&NF_VF) && (flags_after&NF_VF)) { + GO_MRS(s2); + BFIw(s1, xFlags, F_OF, 1); + BFIx(s2, s1, NZCV_V, 1); + } + #undef GL_MRS + if(mrs) MSR_nzvc(s2); + + MESSAGE(LOG_DUMP, "\tF---- Native flags transform\n"); +} + void CacheTransform(dynarec_arm_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) { if(cacheupd&1) flagsCacheTransform(dyn, ninst, s1); if(cacheupd&2) fpuCacheTransform(dyn, ninst, s1, s2, s3); + if(cacheupd&4) + nativeFlagsTransform(dyn, ninst, s1, s2); } void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index d3451182..47c3b806 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -32,6 +32,10 @@ #define PK64(a) *(uint64_t*)(addr+a) #define PKip(a) *(uint8_t*)(ip+a) +#ifndef FEMIT +#define FEMIT(A) EMIT(A) +#endif + // Strong mem emulation helpers #define SMREAD_VAL 4 #define SMWRITE2_MIN 1 @@ -884,12 +888,20 @@ j64 = GETMARKLOCK-(dyn->native_size); \ CBNZx(reg, j64) +#ifndef IFNATIVE +#define IFNATIVE(A) if(dyn->insts[ninst].need_nat_flags&(A)) +#define IFNATIVEN(A) if((dyn->insts[ninst].need_nat_flags&(A))==(A)) +#endif + +#ifndef IFX #define IFX(A) if((dyn->insts[ninst].x64.gen_flags&(A))) #define IFX2(A, B) if((dyn->insts[ninst].x64.gen_flags&(A)) B) #define IFX_PENDOR0 if((dyn->insts[ninst].x64.gen_flags&(X_PEND) || !dyn->insts[ninst].x64.gen_flags)) #define IFXX(A) if((dyn->insts[ninst].x64.gen_flags==(A))) #define IFX2X(A, B) if((dyn->insts[ninst].x64.gen_flags==(A) || dyn->insts[ninst].x64.gen_flags==(B) || dyn->insts[ninst].x64.gen_flags==((A)|(B)))) #define IFXN(A, B) if((dyn->insts[ninst].x64.gen_flags&(A) && !(dyn->insts[ninst].x64.gen_flags&(B)))) +#define IFXNATIVE(X, N) if((dyn->insts[ninst].x64.gen_flags&(X)) && (dyn->insts[ninst].need_nat_flags&(N))) +#endif // Generate FCOM with s1 and s2 scratch regs (the VCMP is already done) #define FCOM(s1, s2, s3) \ @@ -1660,13 +1672,21 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip #define GOCOND(B, T1, T2) \ case B+0x0: \ INST_NAME(T1 "O " T2); \ + IFNATIVE(NF_VF) { \ + GO( , cVC, cVS, X_OF) \ + } else { \ GO( TSTw_mask(xFlags, 0b010101, 0) \ , cEQ, cNE, X_OF) \ + } \ break; \ case B+0x1: \ INST_NAME(T1 "NO " T2); \ + IFNATIVE(NF_VF) { \ + GO( , cVS, cVC, X_OF) \ + } else { \ GO( TSTw_mask(xFlags, 0b010101, 0) \ , cNE, cEQ, X_OF) \ + } \ break; \ case B+0x2: \ INST_NAME(T1 "C " T2); \ @@ -1680,13 +1700,21 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip break; \ case B+0x4: \ INST_NAME(T1 "Z " T2); \ + IFNATIVE(NF_EQ) { \ + GO( , cNE, cEQ, X_ZF) \ + } else { \ GO( TSTw_mask(xFlags, 0b011010, 0) \ , cEQ, cNE, X_ZF) \ + } \ break; \ case B+0x5: \ INST_NAME(T1 "NZ " T2); \ + IFNATIVE(NF_EQ) { \ + GO( , cEQ, cNE, X_ZF) \ + } else { \ GO( TSTw_mask(xFlags, 0b011010, 0) \ , cNE, cEQ, X_ZF) \ + } \ break; \ case B+0x6: \ INST_NAME(T1 "BE " T2); \ @@ -1702,13 +1730,21 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip break; \ case B+0x8: \ INST_NAME(T1 "S " T2); \ + IFNATIVE(NF_SF) { \ + GO( , cPL, cMI, X_SF) \ + } else { \ GO( TSTw_mask(xFlags, 0b011001, 0) \ , cEQ, cNE, X_SF) \ + } \ break; \ case B+0x9: \ INST_NAME(T1 "NS " T2); \ + IFNATIVE(NF_SF) { \ + GO( , cMI, cPL, X_SF) \ + } else { \ GO( TSTw_mask(xFlags, 0b011001, 0) \ , cNE, cEQ, X_SF) \ + } \ break; \ case B+0xA: \ INST_NAME(T1 "P " T2); \ @@ -1722,29 +1758,45 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip break; \ case B+0xC: \ INST_NAME(T1 "L " T2); \ + IFNATIVEN(NF_SF|NF_VF) { \ + GO( , cGE, cLT, X_SF|X_OF) \ + } else { \ GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF); \ TSTw_mask(x1, 0b010101, 0) \ , cEQ, cNE, X_SF|X_OF) \ + } \ break; \ case B+0xD: \ INST_NAME(T1 "GE " T2); \ + IFNATIVEN(NF_SF|NF_VF) { \ + GO( , cLT, cGE, X_SF|X_OF) \ + } else { \ GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF); \ TSTw_mask(x1, 0b010101, 0) \ , cNE, cEQ, X_SF|X_OF) \ + } \ break; \ case B+0xE: \ INST_NAME(T1 "LE " T2); \ + IFNATIVEN(NF_SF|NF_VF|NF_EQ) { \ + GO( , cGT, cLE, X_SF|X_OF|X_ZF) \ + } else { \ GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF); \ ORRw_REG_LSL(x1, x1, xFlags, F_OF-F_ZF); \ TSTw_mask(x1, 0b010101, 0) \ , cEQ, cNE, X_SF|X_OF|X_ZF) \ + } \ break; \ case B+0xF: \ INST_NAME(T1 "G " T2); \ + IFNATIVEN(NF_SF|NF_VF|NF_EQ) { \ + GO( , cLE, cGT, X_SF|X_OF|X_ZF) \ + } else { \ GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF); \ ORRw_REG_LSL(x1, x1, xFlags, F_OF-F_ZF); \ TSTw_mask(x1, 0b010101, 0) \ , cNE, cEQ, X_SF|X_OF|X_ZF) \ + } \ break #define NOTEST(s1) \ @@ -1775,18 +1827,24 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(arm64_flagm) { \ SETF##A(s1); \ IFX(X_ZF) { \ + IFNATIVE(NF_EQ) {} else { \ CSETw(s3, cEQ); \ BFIw(xFlags, s3, F_ZF, 1); \ + } \ } \ IFX(X_SF) { \ + IFNATIVE(NF_SF) {} else { \ CSETw(s3, cMI); \ BFIw(xFlags, s3, F_SF, 1); \ + } \ } \ } else { \ IFX(X_ZF) { \ ANDSw_mask(s1, s1, 0, (A)-1); \ + IFNATIVE(NF_EQ) {} else { \ CSETw(s3, cEQ); \ BFIw(xFlags, s3, F_ZF, 1); \ + } \ } \ IFX(X_SF) { \ LSRw(s3, s1, (A)-1); \ diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index a9dd57e5..d03223ed 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -30,6 +30,8 @@ #define INST_EPILOG \ dyn->insts[ninst].f_exit = dyn->f; \ dyn->insts[ninst].n = dyn->n; \ + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && !dyn->insts[ninst].set_nat_flags) \ + dyn->insts[ninst].nat_flags_op=NAT_FLAG_OP_UNUSABLE; \ dyn->insts[ninst].x64.has_next = (ok>0)?1:0; #define INST_NAME(name) #define DEFAULT \ @@ -47,3 +49,14 @@ printFunctionAddr(ip, " => "); \ dynarec_log(LOG_NONE, "\n"); \ } + +#define FEMIT(A) dyn->insts[ninst].nat_flags_op = dyn->insts[ninst].x64.set_flags?NAT_FLAG_OP_TOUCH:NAT_FLAG_OP_UNUSABLE +#define IFNATIVE(A) if(mark_natflag(dyn, ninst, A)) +#define IFNATIVEN(A) if(mark_natflag(dyn, ninst, A)) +#define IFX(A) if((dyn->insts[ninst].x64.set_flags&(A))) +#define IFX2(A, B) if((dyn->insts[ninst].x64.set_flags&(A)) B) +#define IFX_PENDOR0 if((dyn->insts[ninst].x64.set_flags&(X_PEND) || !dyn->insts[ninst].x64.set_flags)) +#define IFXX(A) if((dyn->insts[ninst].x64.set_flags==(A))) +#define IFX2X(A, B) if((dyn->insts[ninst].x64.set_flags==(A) || dyn->insts[ninst].x64.set_flags==(B) || dyn->insts[ninst].x64.set_flags==((A)|(B)))) +#define IFXN(A, B) if((dyn->insts[ninst].x64.set_flags&(A) && !(dyn->insts[ninst].x64.set_flags&(B)))) +#define IFXNATIVE(X, N) if((dyn->insts[ninst].x64.set_flags&(X)) && mark_natflag(dyn, ninst, N)) \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 840fb47f..1ea9f658 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -9,6 +9,19 @@ typedef struct instsize_s instsize_t; #define BARRIER_MAYBE 8 +#define NF_EQ (1<<0) +#define NF_SF (1<<1) +#define NF_VF (1<<2) + +// Nothing happens to the native flags +#define NAT_FLAG_OP_NONE 0 +// Native flags are touched on this opcode +#define NAT_FLAG_OP_TOUCH 1 +// Native flags are destroyed and unusable +#define NAT_FLAG_OP_UNUSABLE 2 +// Native flags usaged are canceled here +#define NAT_FLAG_OP_CANCELED 3 + #define NEON_CACHE_NONE 0 #define NEON_CACHE_ST_D 1 #define NEON_CACHE_ST_F 2 @@ -94,6 +107,11 @@ typedef struct instruction_arm64_s { uint8_t barrier_maybe; uint8_t will_write; uint8_t last_write; + uint8_t set_nat_flags; // 0 or combinaison of native flags define + uint8_t use_nat_flags; // 0 or combinaison of native flags define + uint8_t nat_flags_op;// what happens to native flags here + uint8_t before_nat_flags; // 0 or combinaison of native flags define + uint8_t need_nat_flags; flagcache_t f_exit; // flags status at end of instruction neoncache_t n; // neoncache at end of instruction (but before poping) flagcache_t f_entry; // flags status before the instruction begin @@ -154,9 +172,11 @@ void CreateJmpNext(void* addr, void* next); #define GO_TRACE(A, B, s0) \ GETIP(addr); \ MOVx_REG(x1, xRIP); \ + MRS_nzvc(s0); \ STORE_XEMU_CALL(xRIP); \ MOV32w(x2, B); \ - CALL(A, -1); \ + CALL_(A, -1, s0); \ + MSR_nzvc(s0); \ LOAD_XEMU_CALL(xRIP) #endif //__DYNAREC_ARM_PRIVATE_H_ diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index c9de4b8f..3790a5db 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -9,14 +9,19 @@ #define ADDITIONNAL_DEFINITION() \ int fpuCacheNeedsTransform(dynarec_native_t* dyn, int ninst); -#define OTHER_CACHE() \ - if (fpuCacheNeedsTransform(dyn, ninst)) ret|=2; +#define OTHER_CACHE() \ + if (fpuCacheNeedsTransform(dyn, ninst)) ret|=2; \ + if (nativeFlagsNeedsTransform(dyn, ninst)) ret|=4; #include "arm64/arm64_printer.h" #include "arm64/dynarec_arm64_private.h" #include "arm64/dynarec_arm64_functions.h" // Limit here is defined by LD litteral, that is 19bits #define MAXBLOCK_SIZE ((1<<19)-200) + +#define RAZ_SPECIFIC(A, N) rasNativeState(A, N) +#define UPDATE_SPECIFICS(A) updateNatveFlags(A) + #elif defined(LA64) #define instruction_native_t instruction_la64_t @@ -33,6 +38,9 @@ #include "la64/dynarec_la64_functions.h" // Limit here is unconditionnal jump, that is signed 28bits #define MAXBLOCK_SIZE ((1 << 27) - 200) + +#define RAZ_SPECIFIC(A, N) +#define UPDATE_SPECIFICS(A) #elif defined(RV64) #define instruction_native_t instruction_rv64_t @@ -51,6 +59,9 @@ #include "rv64/dynarec_rv64_functions.h" // Limit here is unconditionnal jump, that is signed 21bits #define MAXBLOCK_SIZE ((1<<20)-200) + +#define RAZ_SPECIFIC(A, N) +#define UPDATE_SPECIFICS(A) #else #error Unsupported platform #endif diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index dc0e1830..e42b98f2 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -664,6 +664,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit while(ii