From af1b5a9a51d9a08a75993df4f354e44f4bfe75bb Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 19 Mar 2021 09:49:03 +0100 Subject: [DYNAREC] Added 80 opcodes --- src/dynarec/arm64_emitter.h | 25 +- src/dynarec/dynarec_arm64_00.c | 75 ++++++ src/dynarec/dynarec_arm64_emit_logic.c | 201 ++++++++-------- src/dynarec/dynarec_arm64_emit_math.c | 410 +++++++++++++++++---------------- src/dynarec/dynarec_arm64_emit_tests.c | 162 ++++++------- src/dynarec/dynarec_arm64_helper.h | 51 ++-- 6 files changed, 508 insertions(+), 416 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 746135fc..bbf5d420 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -85,6 +85,7 @@ #define cLE 0b1101 #define c__ 0b1110 +#define invCond(cond) ((cond)^0b0001) // MOVZ #define MOVZ_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b10<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd)) @@ -162,12 +163,22 @@ #define CMPSw_U12(Rn, imm12) SUBSw_U12(wZR, Rn, imm12) #define CMPSxw_U12(Rn, imm12) SUBSxw_U12(xZR, Rn, imm12) +#define ADDSUBC_gen(sf, op, S, Rm, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b11010000<<21 | (Rm)<<16 | (Rn)<<5 | (Rd)) +#define ADCx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 0, 0, Rm, Rn, Rd)) +#define ADCw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 0, 0, Rm, Rn, Rd)) +#define ADCxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 0, 0, Rm, Rn, Rd)) +#define SBCx_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(1, 1, 0, Rm, Rn, Rd)) +#define SBCw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(0, 1, 0, Rm, Rn, Rd)) +#define SBCxw_REG(Rd, Rn, Rm) EMIT(ADDSUBC_gen(rex.w, 1, 0, Rm, Rn, Rd)) + // LDR #define LDR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) #define LDRx_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) #define LDRx_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) #define LDRw_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) #define LDRw_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) +#define LDRB_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) +#define LDRB_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) #define LD_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) #define LDRx_U12(Rt, Rn, imm12) EMIT(LD_gen(0b11, 0b01, ((uint32_t)(imm12>>3))&0xfff, Rn, Rt)) @@ -261,6 +272,14 @@ #define NOP EMIT(0b11010101000000110010000000011111) +#define CSINC_gen(sf, Rm, cond, Rn, Rd) ((sf)<<31 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | 1<<10 | (Rn)<<5 | (Rd)) +#define CSINCx(Rd, Rn, Rm, cond) EMIT(CSINC_gen(1, Rm, cond, Rn, Rd)) +#define CSINCw(Rd, Rn, Rm, cond) EMIT(CSINC_gen(0, Rm, cond, Rn, Rd)) +#define CSINCxw(Rd, Rn, Rm, cond) EMIT(CSINC_gen(rex.w, Rm, cond, Rn, Rd)) +#define CSETx(Rd, cond) CSINCx(Rd, xZR, xZR, invCond(cond)) +#define CSETw(Rd, cond) CSINCw(Rd, xZR, xZR, invCond(cond)) +#define CSETxw(Rd, cond) CSINCxw(Rd, xZR, xZR, invCond(cond)) + // AND / ORR #define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | Rd) // logic to get the mask is ... convoluted... list of possible value there: https://gist.github.com/dinfuehr/51a01ac58c0b23e4de9aac313ed6a06a @@ -395,8 +414,10 @@ // MRS #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt) (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt)) // mrs x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 -#define VMRS_nzvc(Rt) EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt)) -#define VMSR_nzvc(Rt) EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt)) +// MRS : from System register +#define MRS_nzvc(Rt) EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt)) +// MSR : to System register +#define MSR_nzvc(Rt) EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt)) // mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2 #define VMRS(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt)) #define VMSR(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt)) diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c index ddb6538a..b8c34ca8 100755 --- a/src/dynarec/dynarec_arm64_00.c +++ b/src/dynarec/dynarec_arm64_00.c @@ -369,6 +369,81 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; #undef GO + case 0x80: + nextop = F8; + switch((nextop>>3)&7) { + case 0: //ADD + INST_NAME("ADD Eb, Ib"); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 1); + u8 = F8; + emit_add8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 1: //OR + INST_NAME("OR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 1); + u8 = F8; + emit_or8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 2: //ADC + INST_NAME("ADC Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 1); + u8 = F8; + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 3: //SBB + INST_NAME("SBB Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 1); + u8 = F8; + emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 4: //AND + INST_NAME("AND Eb, Ib"); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 1); + u8 = F8; + emit_and8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 5: //SUB + INST_NAME("SUB Eb, Ib"); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 1); + u8 = F8; + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK; + break; + case 6: //XOR + INST_NAME("XOR Eb, Ib"); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 1); + u8 = F8; + emit_xor8c(dyn, ninst, x1, u8, x2, x4); + EBBACK; + break; + case 7: //CMP + INST_NAME("CMP Eb, Ib"); + SETFLAGS(X_ALL, SF_SET); + GETEB(x1, 1); + u8 = F8; + if(u8) { + MOV32w(x2, u8); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5); + } else { + emit_cmp8_0(dyn, ninst, x1, x3, x4); + } + break; + } + break; case 0x81: case 0x83: nextop = F8; diff --git a/src/dynarec/dynarec_arm64_emit_logic.c b/src/dynarec/dynarec_arm64_emit_logic.c index bc529251..682400be 100755 --- a/src/dynarec/dynarec_arm64_emit_logic.c +++ b/src/dynarec/dynarec_arm64_emit_logic.c @@ -264,41 +264,37 @@ void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in //} // emit OR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -//void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOV32(s3, c&0xff); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s4, d_or8); -// } else IFX(X_ALL) { -// SET_DFNONE(s4); -// } -// IFX(X_ALL) { -// ORRS_IMM8(s1, s1, c, 0); -// } else { -// ORR_IMM8(s1, s1, c, 0); -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_CF | X_AF | X_ZF) { -// BIC_IMM8(xFlags, xFlags, (1<> 6) ^ ((bc>>6)>>1)) & 1 -// } -// } -// IFX(X_CF) { -// MOV_REG_LSR_IMM5(s3, s1, 8); -// BFI(xFlags, s3, F_CF, 1); -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_ZF) { -// ANDS_IMM8(s1, s1, 0xff); -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_CF) { + LSRw(s3, s1, 8); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} // emit SUB8 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved //void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4) @@ -438,56 +439,61 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in //} // emit SUB8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -//void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOVW(s3, c&0xff); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s3, d_sub8); -// } else IFX(X_ALL) { -// SET_DFNONE(s3); -// } -// IFX(X_AF|X_OF|X_CF) { -// MVN_REG_LSL_IMM5(s3, s1, 0); -// MOVW(s4, c&0xff); -// ORR_IMM8(s3, s3, c, 0); // s3 = ~op1 | op2 -// BIC_REG_LSL_IMM5(s4, s4, s1, 0); // s4 = ~op1 & op2 -// } -// SUB_IMM8(s1, s1, c); -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_AF|X_OF|X_CF) { -// AND_REG_LSL_IMM5(s3, s3, s1, 0); // s3 = (~op1 | op2) & res -// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = (~op1 & op2) | ((~op1 | op2) & res) -// IFX(X_CF) { -// MOV_REG_LSR_IMM5(s4, s3, 7); -// BFI(xFlags, s4, F_CF, 1); // CF : bc & 0x80 -// } -// IFX(X_AF) { -// MOV_REG_LSR_IMM5(s4, s3, 3); -// BFI(xFlags, s4, F_AF, 1); // AF: bc & 0x08 -// } -// IFX(X_OF) { -// MOV_REG_LSR_IMM5(s4, s3, 6); -// XOR_REG_LSR_IMM8(s4, s4, s4, 1); -// BFI(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 -// } -// } -// IFX(X_ZF) { -// ANDS_IMM8(s1, s1, 0xff); -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} // emit ADD16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved //void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4) @@ -1193,56 +1199,58 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4 //} // emit ADC8 instruction, from s1 , const c, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved -//void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOVW(s3, c&0xff); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s4, d_adc8); -// } else IFX(X_ALL) { -// SET_DFNONE(s4); -// } -// IFX(X_AF | X_OF) { -// MOV_REG(s4, s1); -// } -// MOVS_REG_LSR_IMM5(s3, xFlags, 1); // load CC into ARM CF -// ADC_IMM8(s1, s1, c); -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_AF|X_OF) { -// ORR_IMM8(s3, s4, c, 0); // s3 = op1 | op2 -// AND_IMM8(s4, s4, c); // s4 = op1 & op2 -// BIC_REG_LSL_IMM5(s3, s3, s1, 0); // s3 = (op1 | op2) & ~ res -// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s4 = (op1 & op2) | ((op1 | op2) & ~ res) -// IFX(X_AF) { -// MOV_REG_LSR_IMM5(s4, s3, 3); -// BFI(xFlags, s4, F_AF, 1); // AF: bc & 0x08 -// } -// IFX(X_OF) { -// MOV_REG_LSR_IMM5(s4, s3, 6); -// XOR_REG_LSR_IMM8(s4, s4, s4, 1); -// BFI(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 -// } -// } -// IFX(X_CF) { -// MOV_REG_LSR_IMM5(s3, s1, 8); -// BFI(xFlags, s3, F_CF, 1); -// } -// IFX(X_ZF) { -// ANDS_IMM8(s1, s1, 0xff); -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_CF) { + LSRw(s3, s1, 8); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} // emit ADC16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved //void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4) @@ -1547,57 +1555,59 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4 //} // emit SBB8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -//void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) -//{ -// IFX(X_PEND) { -// MOVW(s3, c&0xff); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s3, d_sbb8); -// } else IFX(X_ALL) { -// SET_DFNONE(s3); -// } -// IFX(X_AF|X_OF|X_CF) { -// MVN_REG_LSL_IMM5(s4, s1, 0); -// } -// XOR_IMM8(s3, xFlags, 1); // invert CC because it's reverted for SUB on ARM -// MOVS_REG_LSR_IMM5(s3, s3, 1); // load into ARM CF -// SBC_IMM8(s1, s1, c); -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_AF|X_OF|X_CF) { -// ORR_IMM8(s3, s4, c, 0); // s3 = ~op1 | op2 -// AND_IMM8(s4, s4, c); // s4 = ~op1 & op2 -// AND_REG_LSL_IMM5(s3, s3, s1, 0); // s3 = (~op1 | op2) & res -// ORR_REG_LSL_IMM5(s3, s3, s4, 0); // s3 = (~op1 & op2) | ((~op1 | op2) & res) -// IFX(X_CF) { -// MOV_REG_LSR_IMM5(s4, s3, 7); -// BFI(xFlags, s4, F_CF, 1); // CF : bc & 0x80 -// } -// IFX(X_AF) { -// MOV_REG_LSR_IMM5(s4, s3, 3); -// BFI(xFlags, s4, F_AF, 1); // AF: bc & 0x08 -// } -// IFX(X_OF) { -// MOV_REG_LSR_IMM5(s4, s3, 6); -// XOR_REG_LSR_IMM8(s4, s4, s4, 1); -// BFI(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 -// } -// } -// IFX(X_ZF) { -// ANDS_IMM8(s1, s1, 0xff); -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s3, s1, 7); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} // emit SBB16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved //void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4) diff --git a/src/dynarec/dynarec_arm64_emit_tests.c b/src/dynarec/dynarec_arm64_emit_tests.c index 16fdef1f..a6430847 100755 --- a/src/dynarec/dynarec_arm64_emit_tests.c +++ b/src/dynarec/dynarec_arm64_emit_tests.c @@ -203,93 +203,83 @@ void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int // } //} // emit CMP8 instruction, from cmp s1 , s2, using s3 and s4 as scratch -//void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) -//{ -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2)); -// SET_DF(s4, d_cmp8); -// } else { -// SET_DFNONE(s4); -// } -// SUB_REG_LSL_IMM5(s3, s1, s2, 0); // res = s1 - s2 -// IFX(X_PEND) { -// STR_IMM9(s3, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_ZF) { -// TSTS_IMM8(s3, 0xff); -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<> 6) ^ ((bc>>6)>>1)) & 1 -// } -// } -// IFX(X_PF) { -// IFX(X_CF|X_AF|X_OF) { -// SUB_REG_LSL_IMM5(s3, s1, s2, 0); -// } -// AND_IMM8(s3, s3, 0xE0); // lsr 5 masking pre-applied -// MOV32(s4, GetParityTab()); -// LDR_REG_LSR_IMM5(s4, s4, s3, 5-2); // x/32 and then *4 because array is integer -// SUB_REG_LSL_IMM5(s3, s1, s2, 0); -// AND_IMM8(s3, s3, 31); -// MVN_REG_LSR_REG(s4, s4, s3); -// BFI(xFlags, s4, F_PF, 1); -// } -//} +void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRB_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_cmp8); + } else { + SET_DFNONE(s4); + } + SUBw_REG(s5, s1, s2); // res = s1 - s2 + IFX(X_PEND) { + STRB_REG(s5, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff + CSETw(s3, cEQ); + BFIw(xFlags, s3, F_ZF, 1); + } + IFX(X_SF) { + LSRw(s5, s1, 7); + BFIw(xFlags, s5, F_SF, 1); + } + // bc = (res & (~d | s)) | (~d & s) + IFX(X_CF|X_AF|X_OF) { + MVNw_REG(s4, s1); // s4 = ~d + ORRw_REG(s4, s4, s2); // s4 = ~d | s + ANDw_REG(s4, s4, s5); // s4 = res & (~d | s) + BICw_REG(s3, s2, s5); // s3 = s & ~d + ORRw_REG(s3, s4, s3); // s3 = (res & (~d | s)) | (s & ~d) + IFX(X_CF) { + LSRw(s4, s3, 7); + BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 + } + IFX(X_AF) { + LSRw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_OF) { + LSRw(s4, s3, 6); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s5, s3, s4); + } +} // emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch -//void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) -//{ -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// MOVW(s4, 0); -// STR_IMM9(s4, xEmu, offsetof(x64emu_t, op2)); -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// SET_DF(s3, d_cmp8); -// } else { -// SET_DFNONE(s4); -// } -// // bc = (res & (~d | s)) | (~d & s) = 0 -// IFX(X_CF | X_AF | X_ZF) { -// BIC_IMM8(xFlags, xFlags, (1<>3)+(rex.r<<3)), gd, 0, 16); //GETEB will use i for ed, and can use r3 for wback. -#define GETEB(i) if((nextop&0xC0)==0xC0) { \ - wback = (nextop&7); \ - wb2 = (wback>>2); \ - wback = xEAX+(wback&3); \ - UXTB(i, wback, wb2); \ - wb1 = 0; \ - ed = i; \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 4095, 0); \ - LDRB_IMM9(i, wback, fixedaddress); \ - wb1 = 1; \ - ed = i; \ +#define GETEB(i, D) if(MODREG) { \ + if(rex.rex) { \ + wback = xRAX+(nextop&7)+(rex.r<<3); \ + wb2 = 0; \ + } else { \ + wback = (nextop&7); \ + wb2 = (wback>>2)*8; \ + wback = xRAX+(wback&3); \ + } \ + UBFMw(i, wback, wb2, 7); \ + wb1 = 0; \ + ed = i; \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, 0, D); \ + LDRB_U12(i, wback, fixedaddress); \ + wb1 = 1; \ + ed = i; \ } //GETEBO will use i for ed, i is also Offset, and can use r3 for wback. #define GETEBO(i) if((nextop&0xC0)==0xC0) { \ @@ -185,11 +190,11 @@ ed = i; \ } // Write eb (ed) back to original register / memory -#define EBBACK if(wb1) {STRB_IMM9(ed, wback, fixedaddress);} else {BFI(wback, ed, wb2*8, 8);} +#define EBBACK if(wb1) {STRB_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, wb2, 8);} //GETGB will use i for gd #define GETGB(i) gd = (nextop&0x38)>>3; \ gb2 = ((gd&4)>>2); \ - gb1 = xEAX+(gd&3); \ + gb1 = xRAX+(gd&3); \ gd = i; \ nopenope! \ UXTB(gd, gb1, gb2); @@ -616,10 +621,10 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst); void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); //void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg); //void grab_tlsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg); -//void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); //void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); -//void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); //void emit_cmp16_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); //void emit_test8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); @@ -628,11 +633,11 @@ void emit_test32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_add32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_add32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); //void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4); -//void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); //void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4); -//void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); void emit_xor32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); @@ -640,11 +645,11 @@ void emit_xor32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); //void emit_or8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); //void emit_xor8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); //void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); //void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4); //void emit_add16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); //void emit_sub16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4); @@ -664,13 +669,13 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4 //void emit_adc32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); //void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); //void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4); -//void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); //void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4); //void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); //void emit_sbb32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); //void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); //void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4); -//void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); //void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4); //void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); //void emit_neg32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -- cgit 1.4.1