#include #include #include #include #include "debug.h" #include "box64context.h" #include "dynarec.h" #include "emu/x64emu_private.h" #include "emu/x64run_private.h" #include "la64_emitter.h" #include "x64run.h" #include "x64emu.h" #include "box64stack.h" #include "callback.h" #include "emu/x64run_private.h" #include "x64trace.h" #include "dynarec_native.h" #include "la64_printer.h" #include "dynarec_la64_private.h" #include "dynarec_la64_functions.h" #include "dynarec_la64_helper.h" // emit ADD32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { IFX(X_PEND) { if (rex.w) { ST_D(s1, xEmu, offsetof(x64emu_t, op1)); ST_D(s2, xEmu, offsetof(x64emu_t, op2)); } else { ST_W(s1, xEmu, offsetof(x64emu_t, op1)); ST_W(s2, xEmu, offsetof(x64emu_t, op2)); } SET_DF(s3, rex.w ? d_add64 : d_add32b); } else IFX(X_ALL) { SET_DFNONE(); } if (la64_lbt) { IFX(X_ALL) { if (rex.w) X64_ADD_DU(s1, s2); else X64_ADD_WU(s1, s2); } ADDxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); IFX(X_PEND) SDxw(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); IFX(X_CF) { if (rex.w) { ZEROUP2(s5, s1); ZEROUP2(s4, s2); ADD_D(s5, s5, s4); SRLI_D(s3, s1, 0x20); SRLI_D(s4, s2, 0x20); ADD_D(s4, s4, s3); SRLI_D(s5, s5, 0x20); ADD_D(s5, s5, s4); // hi SRAI_D(s5, s5, 0x20); BEQZ(s5, 8); ORI(xFlags, xFlags, 1 << F_CF); } else { ZEROUP2(s3, s1); ZEROUP2(s4, s2); ADD_D(s5, s3, s4); SRLI_D(s5, s5, 0x20); BEQZ(s5, 8); ORI(xFlags, xFlags, 1 << F_CF); } } IFX(X_AF | X_OF) { OR(s3, s1, s2); // s3 = op1 | op2 AND(s4, s1, s2); // s4 = op1 & op2 } ADDxw(s1, s1, s2); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX(X_OF) { SRLI_D(s3, s3, rex.w ? 62 : 30); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX(X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } IFX(X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } // emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5) { if (s1 == xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags == X_PEND)) { // special case when doing math on ESP and only PEND is needed: ignoring it! if (c >= -2048 && c < 2048) { ADDIxw(s1, s1, c); } else { MOV64xw(s2, c); ADDxw(s1, s1, s2); } return; } IFX(X_PEND | X_AF | X_CF | X_OF) { MOV64xw(s2, c); } else if (la64_lbt) { MOV64xw(s2, c); } IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w ? d_add64 : d_add32b); } else IFX(X_ALL) { SET_DFNONE(); } if (la64_lbt) { IFX(X_ALL) { if (rex.w) X64_ADD_DU(s1, s2); else X64_ADD_WU(s1, s2); } ADDxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); IFX(X_PEND) SDxw(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); IFX(X_CF) { if (rex.w) { ZEROUP2(s5, s1); ZEROUP2(s4, s2); ADD_D(s5, s5, s4); SRLI_D(s3, s1, 0x20); SRLI_D(s4, s2, 0x20); ADD_D(s4, s4, s3); SRLI_D(s5, s5, 0x20); ADD_D(s5, s5, s4); // hi SRAI_D(s5, s5, 0x20); BEQZ(s5, 8); ORI(xFlags, xFlags, 1 << F_CF); } else { ZEROUP2(s3, s1); ZEROUP2(s4, s2); ADD_D(s5, s3, s4); SRLI_D(s5, s5, 0x20); BEQZ(s5, 8); ORI(xFlags, xFlags, 1 << F_CF); } } IFX(X_AF | X_OF) { OR(s3, s1, s2); // s3 = op1 | op2 AND(s4, s1, s2); // s4 = op1 & op2 } if (c >= -2048 && c < 2048) { ADDIxw(s1, s1, c); } else { IFX(X_PEND | X_AF | X_CF | X_OF) { } else { MOV64xw(s2, c); } ADDxw(s1, s1, s2); } IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX(X_OF) { SRLI_D(s3, s3, rex.w ? 62 : 30); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX(X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } IFX(X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } // emit ADD8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { IFX(X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); ST_B(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_add8); } else IFX(X_ALL) { SET_DFNONE(); } if (la64_lbt) { IFX(X_ALL) { X64_ADD_B(s1, s2); } ADD_D(s1, s1, s2); IFX(X_PEND) ST_H(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); IFX(X_AF | X_OF) { OR(s3, s1, s2); // s3 = op1 | op2 AND(s4, s1, s2); // s4 = op1 & op2 } ADD_D(s1, s1, s2); IFX(X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX(X_OF) { SRLI_D(s3, s3, 6); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX(X_CF) { SRLI_D(s3, s1, 8); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_CF); } IFX(X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); } ANDI(s1, s1, 0xff); IFX(X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_SF) { SRLI_D(s3, s1, 7); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit ADD8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4) { IFX(X_PEND) { MOV32w(s4, c & 0xff); ST_B(s1, xEmu, offsetof(x64emu_t, op1)); ST_B(s4, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_add8); } else IFX(X_ALL) { SET_DFNONE(); } if (la64_lbt) { IFX(X_ALL) { IFX(X_PEND) {} else { MOV32w(s4, c & 0xff); } X64_ADD_B(s1, s4); } ADDI_D(s1, s1, c & 0xff); IFX(X_PEND) ST_H(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); IFX(X_AF | X_OF) { IFX(X_PEND) {} else { MOV32w(s4, c & 0xff); } OR(s3, s1, s4); // s3 = op1 | op2 AND(s4, s1, s4); // s4 = op1 & op2 } ADDI_D(s1, s1, c & 0xff); IFX(X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX(X_OF) { SRLI_D(s3, s3, 6); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX(X_CF) { SRLI_D(s3, s1, 8); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_CF); } IFX(X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); } ANDI(s1, s1, 0xff); IFX(X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_SF) { SRLI_D(s3, s1, 7); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit ADD16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); ST_H(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_add16); } else IFX (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { OR(s3, s1, s2); // s3 = op1 | op2 AND(s4, s1, s2); // s4 = op1 & op2 } IFXA (X_ALL, la64_lbt) { X64_ADD_DU(s1, s2); } ADD_D(s1, s1, s2); IFX (X_PEND) { ST_W(s1, xEmu, offsetof(x64emu_t, res)); } if (la64_lbt) { BSTRPICK_D(s1, s1, 15, 0); return; } CLEAR_FLAGS(s5); IFX (X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX (X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { SRLI_D(s3, s3, 14); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX (X_CF) { SRLI_D(s3, s1, 16); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_CF); } BSTRPICK_D(s1, s1, 15, 0); IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_SF) { SRLI_D(s3, s1, 15); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { IFX(X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); ST_B(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sub8); } else IFX(X_ALL) { SET_DFNONE(); } if (la64_lbt) { IFX(X_ALL) { X64_SUB_B(s1, s2); } SUB_D(s1, s1, s2); IFX(X_PEND) ST_H(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); IFX(X_AF | X_CF | X_OF) { // for later flag calculation NOR(s5, xZR, s1); } SUB_D(s1, s1, s2); ANDI(s1, s1, 0xff); IFX(X_SF) { SRLI_D(s3, s1, 7); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } IFX(X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, res)); } CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8); IFX(X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit SUB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4, int s5) { MOV32w(s2, c&0xff); emit_sub8(dyn, ninst, s1, s2, s3, s4, s5); } // emit SUB16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); ST_H(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sub16); } else IFX (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_CF | X_OF, !la64_lbt) { // for later flag calculation NOR(s5, xZR, s1); } IFXA (X_ALL, la64_lbt) { X64_SUB_H(s1, s2); } SUB_W(s1, s1, s2); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); } if (la64_lbt) return; CLEAR_FLAGS(s3); SLLI_D(s1, s1, 48); IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } SRLI_D(s1, s1, 48); CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 16); IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w?d_sub64:d_sub32); } else IFX(X_ALL) { SET_DFNONE(); } if (la64_lbt) { IFX(X_ALL) { if (rex.w) X64_SUB_DU(s1, s2); else X64_SUB_WU(s1, s2); } SUBxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); IFX(X_PEND) SDxw(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); IFX(X_AF | X_CF | X_OF) { // for later flag calculation NOR(s5, xZR, s1); } SUBxw(s1, s1, s2); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32); IFX(X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit SUB32 instruction, from s1, constant c, store result in s1 using s2, s3, s4 and s5 as scratch void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5) { if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags==X_PEND)) { // special case when doing math on RSP and only PEND is needed: ignoring it! if (c > -2048 && c <= 2048) { ADDI_D(s1, s1, -c); } else { MOV64xw(s2, c); SUBxw(s1, s1, s2); } return; } IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); MOV64xw(s2, c); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w?d_sub64:d_sub32); } else IFX(X_ALL) { SET_DFNONE(); } if (la64_lbt) { IFX(X_PEND) {} else {MOV64xw(s2, c);} IFX(X_ALL) { if (rex.w) { X64_SUB_DU(s1, s2); } else { X64_SUB_WU(s1, s2); } } SUBxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); IFX(X_PEND) SDxw(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); IFX(X_AF | X_CF | X_OF) { // for later flag calculation NOR(s5, xZR, s1); } if (c > -2048 && c <= 2048) { ADDIxw(s1, s1, -c); } else { IFX(X_PEND) {} else {MOV64xw(s2, c);} SUBxw(s1, s1, s2); } IFX(X_AF | X_CF | X_OF) { IFX(X_PEND) {} else if (c > -2048 && c <= 2048) { MOV64xw(s2, c); } } IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32); IFX(X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit SBB8 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch void emit_sbb8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); ST_B(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sbb8); } else IFX (X_ALL) { SET_DFNONE(); } if (la64_lbt) { SBC_B(s3, s1, s2); IFX (X_ALL) { X64_SBC_B(s1, s2); } ANDI(s1, s3, 0xff); IFX (X_PEND) ST_B(s1, xEmu, offsetof(x64emu_t, res)); return; } IFX (X_AF | X_CF | X_OF) { // for later flag calculation NOR(s5, xZR, s1); } SUB_W(s1, s1, s2); ANDI(s3, xFlags, 1 << F_CF); SUB_W(s1, s1, s3); ANDI(s1, s1, 0xff); CLEAR_FLAGS(s3); IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, res)); } CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8); IFX (X_SF) { SRLI_D(s3, s1, 7); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit SBB8 instruction, from s1, constant c, store result in s1 using s3, s4, s5 and s6 as scratch void emit_sbb8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5, int s6) { MOV32w(s6, c & 0xff); emit_sbb8(dyn, ninst, s1, s6, s3, s4, s5); } // emit SBB16 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch void emit_sbb16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); ST_H(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sbb16); } else IFX (X_ALL) { SET_DFNONE(); } IFXA (X_ALL, la64_lbt) { SBC_H(s3, s1, s2); IFX (X_ALL) { X64_SBC_H(s1, s2); } BSTRPICK_D(s1, s3, 15, 0); IFX (X_PEND) ST_H(s1, xEmu, offsetof(x64emu_t, res)); return; } IFX (X_AF | X_CF | X_OF) { // for later flag calculation NOR(s5, xZR, s1); } SUB_W(s1, s1, s2); ANDI(s3, xFlags, 1 << F_CF); SUB_W(s1, s1, s3); CLEAR_FLAGS(s3); SLLI_W(s1, s1, 16); IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } SRLI_W(s1, s1, 16); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); } CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 16); IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit SBB32 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w ? d_sbb64 : d_sbb32); } else IFX (X_ALL) { SET_DFNONE(); } if (la64_lbt) { if (rex.w) { SBC_D(s3, s1, s2); } else { SBC_W(s3, s1, s2); } IFX (X_ALL) { if (rex.w) X64_SBC_D(s1, s2); else X64_SBC_W(s1, s2); } MVxw(s1, s3); IFX (X_PEND) SDxw(s1, xEmu, offsetof(x64emu_t, res)); return; } IFX (X_AF | X_CF | X_OF) { // for later flag calculation NOR(s5, xZR, s1); } SUBxw(s1, s1, s2); ANDI(s3, xFlags, 1 << F_CF); SUBxw(s1, s1, s3); CLEAR_FLAGS(s3); IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w ? 64 : 32); IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit NEG8 instruction, from s1, store result in s1 using s2 and s3 as scratch void emit_neg8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) { IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_neg8); } else IFX (X_ALL) { SET_DFNONE(); } IFX (X_AF | X_OF) { MV(s3, s1); // s3 = op1 } IFXA (X_ALL, la64_lbt) { X64_SUB_B(xZR, s1); } NEG_D(s1, s1); ANDI(s1, s1, 0xff); IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, res)); } if (la64_lbt) return; CLEAR_FLAGS(s2); IFX (X_CF) { BEQZ(s1, 8); ORI(xFlags, xFlags, 1 << F_CF); } IFX (X_AF | X_OF) { OR(s3, s1, s3); // s3 = res | op1 IFX (X_AF) { /* af = bc & 0x8 */ ANDI(s2, s3, 8); BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ SRLI_D(s2, s3, 6); SRLI_D(s3, s2, 1); XOR(s2, s2, s3); ANDI(s2, s2, 1); BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX (X_SF) { ANDI(s3, s1, 1 << F_SF); // 1<> (width-2)) ^ (bc >> (width-1))) & 0x1; */ SRLI_D(s2, s3, (rex.w ? 64 : 32) - 2); SRLI_D(s3, s2, 1); XOR(s2, s2, s3); ANDI(s2, s2, 1); BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s2); } IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } // emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6) { IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w ? d_adc64 : d_adc32b); } else IFX (X_ALL) { SET_DFNONE(); } if (la64_lbt) { if (rex.w) ADC_D(s3, s1, s2); else ADC_W(s3, s1, s2); IFX (X_ALL) { if (rex.w) X64_ADC_D(s1, s2); else X64_ADC_W(s1, s2); } MV(s1, s3); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } return; } IFX (X_CF) { if (rex.w) { ZEROUP2(s5, s1); ZEROUP2(s4, s2); ADD_D(s5, s5, s4); // lo ANDI(s3, xFlags, 1); ADD_D(s5, s5, s3); // add carry SRLI_D(s3, s1, 0x20); SRLI_D(s4, s2, 0x20); ADD_D(s4, s4, s3); SRLI_D(s5, s5, 0x20); ADD_D(s5, s5, s4); // hi SRAI_D(s6, s5, 0x20); } else { ZEROUP2(s3, s1); ZEROUP2(s4, s2); ADD_D(s5, s3, s4); ANDI(s3, xFlags, 1); ADD_D(s5, s5, s3); // add carry SRLI_D(s6, s5, 0x20); } } IFX (X_AF | X_OF) { OR(s4, s1, s2); // s4 = op1 | op2 AND(s5, s1, s2); // s5 = op1 & op2 } ADDxw(s1, s1, s2); ANDI(s3, xFlags, 1 << F_CF); ADDxw(s1, s1, s3); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } CLEAR_FLAGS(s3); IFX (X_CF) { BEQZ(s6, 8); ORI(xFlags, xFlags, 1 << F_CF); } IFX (X_AF | X_OF) { ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX (X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { SRLI_D(s3, s3, rex.w ? 62 : 30); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } // emit INC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_inc8); } else IFX (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { ORI(s3, s1, 1); // s3 = op1 | op2 ANDI(s4, s1, 1); // s5 = op1 & op2 } IFXA (X_ALL, la64_lbt) { X64_INC_B(s1); } ADDI_W(s1, s1, 1); IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, res)); } if (la64_lbt) { ANDI(s1, s1, 0xff); return; } IFX (X_ALL) { // preserving CF MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); ANDN(xFlags, xFlags, s4); } IFX (X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX (X_AF) { ANDI(s2, s3, 0x08); // AF: cc & 0x08 BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { SRLI_D(s3, s3, 6); SRLI_D(s2, s3, 1); XOR(s3, s3, s2); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX (X_SF) { ANDI(s2, s1, 0x80); BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_SF); } ANDI(s1, s1, 0xff); IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s2); } IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } // emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_inc16); } else IFX (X_ZF | X_OF | X_AF | X_SF | X_PF) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { ORI(s3, s1, 1); // s3 = op1 | op2 ANDI(s4, s1, 1); // s4 = op1 & op2 } IFXA (X_ALL, la64_lbt) { X64_INC_H(s1); } ADDI_D(s1, s1, 1); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); } if (la64_lbt) { BSTRPICK_D(s1, s1, 15, 0); return; } IFX (X_ALL) { // preserving CF MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); ANDN(xFlags, xFlags, s4); } IFX (X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX (X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { SRLI_D(s3, s3, 14); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } BSTRPICK_D(s1, s1, 15, 0); IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_SF) { SRLI_D(s3, s1, 15); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } // emit INC32 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, rex.w ? d_inc64 : d_inc32); } else IFX (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { ORI(s3, s1, 1); // s3 = op1 | op2 ANDI(s5, s1, 1); // s5 = op1 & op2 } IFXA (X_ALL, la64_lbt) { if (rex.w) { X64_INC_D(s1); } else { X64_INC_W(s1); } } ADDIxw(s1, s1, 1); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } if (la64_lbt) { if (!rex.w) ZEROUP(s1); return; } IFX (X_ALL) { // preserving CF MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); ANDN(xFlags, xFlags, s4); } IFX (X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX (X_AF) { ANDI(s2, s3, 0x08); // AF: cc & 0x08 BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { SRLI_D(s3, s3, rex.w ? 62 : 30); SRLI_D(s2, s3, 1); XOR(s3, s3, s2); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s2); } IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } // emit DEC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch void emit_dec8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_dec8); } else IFX (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { NOR(s4, xZR, s1); // s4 = ~op1 ORI(s3, s4, 1); // s3 = ~op1 | op2 ANDI(s4, s4, 1); // s4 = ~op1 & op2 } IFXA (X_ALL, la64_lbt) { X64_DEC_B(s1); } ADDI_W(s1, s1, -1); IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, res)); } if (la64_lbt) { ANDI(s1, s1, 0xff); return; } IFX (X_ALL) { // preserving CF MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); ANDN(xFlags, xFlags, s4); } IFX (X_AF | X_OF) { AND(s3, s1, s3); // s3 = res & (~op1 | op2) OR(s3, s3, s4); // cc = (res & (~op1 | op2)) | (~op1 & op2) IFX (X_AF) { ANDI(s2, s3, 0x08); // AF: cc & 0x08 BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { SRLI_D(s3, s3, 6); SRLI_D(s2, s3, 1); XOR(s3, s3, s2); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX (X_SF) { ANDI(s2, s1, 0x80); BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_SF); } ANDI(s1, s1, 0xff); IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s2); } IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } // emit DEC16 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_dec16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_dec16); } else IFX (X_ALL) { SET_DFNONE(); } IFX (X_AF | X_OF) { NOR(s5, xZR, s1); ORI(s3, s5, 1); // s3 = ~op1 | op2 ANDI(s5, s5, 1); // s5 = ~op1 & op2 } IFXA (X_ALL, la64_lbt) { X64_DEC_H(s1); } ADDI_W(s1, s1, -1); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); } if (la64_lbt) { BSTRPICK_D(s1, s1, 15, 0); return; } IFX (X_ALL) { // preserving CF MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); ANDN(xFlags, xFlags, s4); } IFX (X_AF | X_OF) { AND(s3, s1, s3); // s3 = res & (~op1 | op2) OR(s3, s3, s5); // cc = (res & (~op1 | op2)) | (~op1 & op2) IFX (X_AF) { ANDI(s2, s3, 0x08); // AF: cc & 0x08 BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { SRLI_D(s3, s3, 14); SRLI_D(s2, s3, 1); XOR(s3, s3, s2); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } SLLI_W(s1, s1, 16); IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } SRLI_W(s1, s1, 16); IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s2); } IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } // emit DEC32 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, rex.w ? d_dec64 : d_dec32); } else IFX (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { NOR(s5, xZR, s1); ORI(s3, s5, 1); // s3 = ~op1 | op2 ANDI(s5, s5, 1); // s5 = ~op1 & op2 } IFXA (X_ALL, la64_lbt) { if (rex.w) { X64_DEC_D(s1); } else { X64_DEC_W(s1); } } ADDIxw(s1, s1, -1); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } if (la64_lbt) { if (!rex.w) ZEROUP(s1); return; } IFX (X_ALL) { // preserving CF MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); ANDN(xFlags, xFlags, s4); } IFX (X_AF | X_OF) { AND(s3, s1, s3); // s3 = res & (~op1 | op2) OR(s3, s3, s5); // cc = (res & (~op1 | op2)) | (~op1 & op2) IFX (X_AF) { ANDI(s2, s3, 0x08); // AF: cc & 0x08 BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { SRLI_D(s3, s3, rex.w ? 62 : 30); SRLI_D(s2, s3, 1); XOR(s3, s3, s2); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s2); } IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } // emit NEG16 instruction, from s1, store result in s1 using s2 and s3 as scratch void emit_neg16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) { IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_neg16); } else IFX (X_ALL) { SET_DFNONE(); } IFX (X_AF | X_OF) { MV(s3, s1); // s3 = op1 } NEG_D(s1, s1); BSTRPICK_D(s1, s1, 15, 0); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); } CLEAR_FLAGS(s3); IFX (X_CF) { BEQZ(s1, 8); ORI(xFlags, xFlags, 1 << F_CF); } IFX (X_AF | X_OF) { OR(s3, s1, s3); // s3 = res | op1 IFX (X_AF) { /* af = bc & 0x8 */ ANDI(s2, s3, 8); BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX (X_OF) { /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ SRLI_D(s2, s3, 14); SRLI_D(s3, s2, 1); XOR(s2, s2, s3); ANDI(s2, s2, 1); BEQZ(s2, 8); ORI(xFlags, xFlags, 1 << F_OF); } } IFX (X_SF) { SRLI_D(s3, s1, 15 - F_SF); // put sign bit in place ANDI(s3, s3, 1 << F_SF); // 1<