diff options
Diffstat (limited to 'src/dynarec/rv64/dynarec_rv64_emit_math.c')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_math.c | 212 |
1 files changed, 161 insertions, 51 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index 01579ea3..5d6f7e0e 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -1,7 +1,6 @@ #include <stdio.h> #include <stdlib.h> #include <stddef.h> -#include <pthread.h> #include <errno.h> #include "debug.h" @@ -16,7 +15,6 @@ #include "emu/x64run_private.h" #include "x64trace.h" #include "dynarec_native.h" -#include "../tools/bridge_private.h" #include "rv64_printer.h" #include "dynarec_rv64_private.h" @@ -37,8 +35,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_CF) { if (rex.w) { AND(s5, xMASK, s1); - AND(s4, xMASK, s2); - ADD(s5, s5, s4); // lo + if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo SRLI(s3, s1, 0x20); SRLI(s4, s2, 0x20); ADD(s4, s4, s3); @@ -65,8 +62,12 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { - NOT(s5, s1); // s5 = ~res - AND(s3, s5, s3); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s3); // s3 = ~res & (op1 | op2) + } else { + NOT(s5, s1); // s5 = ~res + AND(s3, s5, s3); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 @@ -126,8 +127,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i IFX(X_CF) { if (rex.w) { AND(s5, xMASK, s1); - AND(s4, xMASK, s2); - ADD(s5, s5, s4); // lo + if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo SRLI(s3, s1, 0x20); SRLI(s4, s2, 0x20); ADD(s4, s4, s3); @@ -159,8 +159,12 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { - NOT(s2, s1); // s2 = ~res - AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s3); // s3 = ~res & (op1 | op2) + } else { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 @@ -213,8 +217,12 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SW(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { - NOT(s5, s1); // s5 = ~res - AND(s3, s5, s3); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s3); // s3 = ~res & (op1 | op2) + } else { + NOT(s5, s1); // s5 = ~res + AND(s3, s5, s3); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 @@ -237,8 +245,7 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ORI(xFlags, xFlags, 1 << F_CF); } - SLLI(s1, s1, 48); - SRLI(s1, s1, 48); + ZEXTH(s1, s1); IFX(X_ZF) { BNEZ(s1, 8); @@ -272,8 +279,12 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ADD(s1, s1, s2); IFX(X_AF|X_OF) { - NOT(s4, s1); // s4 = ~res - AND(s3, s4, s3); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s3); // s3 = ~res & (op1 | op2) + } else { + NOT(s4, s1); // s4 = ~res + AND(s3, s4, s3); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s2); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 @@ -332,8 +343,12 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i ADDI(s1, s1, c); IFX(X_AF|X_OF) { - NOT(s2, s1); // s2 = ~res - AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s3); // s3 = ~res & (op1 | op2) + } else { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 @@ -580,8 +595,12 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SB(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { - NOT(s2, s1); // s2 = ~res - AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s3); // s3 = ~res & (op1 | op2) + } else { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s2, s3, 0x08); // AF: cc & 0x08 @@ -625,8 +644,9 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SET_DFNONE(); } IFX(X_AF | X_OF) { - ORI(s3, s1, 1); // s3 = op1 | op2 - ANDI(s4, s1, 1); // s4 = op1 & op2 + NOT(s4, s1); // s4 = ~op1 + ORI(s3, s4, 1); // s3 = ~op1 | op2 + ANDI(s4, s4, 1); // s4 = ~op1 & op2 } ADDIW(s1, s1, -1); @@ -635,9 +655,8 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SB(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { - NOT(s2, s1); // s2 = ~res - AND(s3, s2, s3); // s3 = ~res & (op1 | op2) - OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) + AND(s3, s1, s3); // s3 = res & (~op1 | op2) + OR(s3, s3, s4); // cc = (res & (~op1 | op2)) | (~op1 & op2) IFX(X_AF) { ANDI(s2, s3, 0x08); // AF: cc & 0x08 BEQZ(s2, 8); @@ -689,8 +708,12 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { - NOT(s2, s1); // s2 = ~res - AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s3); // s3 = ~res & (op1 | op2) + } else { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s2, s3, 0x08); // AF: cc & 0x08 @@ -781,6 +804,9 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + IFX(X_ALL) { + ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF))); + } IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_inc16); @@ -798,8 +824,12 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SH(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { - NOT(s2, s1); // s2 = ~res - AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s3); // s3 = ~res & (op1 | op2) + } else { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s3); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 @@ -816,8 +846,7 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) } } - SLLI(s1, s1, 48); - SRLI(s1, s1, 48); + ZEXTH(s1, s1); IFX(X_ZF) { BNEZ(s1, 8); @@ -909,6 +938,7 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SUBW(s1, s1, s3); ANDI(s1, s1, 0xff); + CLEAR_FLAGS(); IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -928,6 +958,78 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } } +// emit ADC8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SH(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_adc8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_AF | X_OF) { + OR(s4, s1, s2); // s3 = op1 | op2 + AND(s5, s1, s2); // s4 = op1 & op2 + } + + ADD(s1, s1, s2); + ANDI(s3, xFlags, 1 << F_CF); + ADD(s1, s1, s3); + + CLEAR_FLAGS(); + IFX(X_PEND) { + SW(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF | X_OF) { + if(rv64_zbb) { + ANDN(s3, s1, s4); // s3 = ~res & (op1 | op2) + } else { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s4); // s3 = ~res & (op1 | op2) + } + OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX(X_AF) { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) { + SRLI(s3, s3, 6); + SRLI(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_CF) { + SRLI(s3, s1, 8); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + + ANDI(s1, s1, 0xff); + + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_SF) { + SRLI(s3, s1, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADC8 instruction, from s1, const c, store result in s1 using s3, s4, s5 and s6 as scratch +void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6) { + MOV32w(s5, c&0xff); + emit_adc8(dyn, ninst, s1, s5, s3, s4, s6); +} + // emit SBB8 instruction, from s1, constant c, store result in s1 using s3, s4, s5 and s6 as scratch void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5, int s6) { @@ -955,6 +1057,7 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ANDI(s3, xFlags, 1 << F_CF); SUBW(s1, s1, s3); + CLEAR_FLAGS(); SLLIW(s1, s1, 16); IFX(X_SF) { BGE(s1, xZR, 8); @@ -996,6 +1099,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ANDI(s3, xFlags, 1 << F_CF); SUBxw(s1, s1, s3); + CLEAR_FLAGS(); IFX(X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); @@ -1091,8 +1195,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) } NEG(s1, s1); - SLLI(s1, s1, 48); - SRLI(s1, s1, 48); + ZEXTH(s1, s1); IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1121,7 +1224,8 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) } } IFX(X_SF) { - ANDI(s3, s1, 1 << F_SF); // 1<<F_SF is sign bit, so just mask + SRLI(s3, s1, 15-F_SF); // put sign bit in place + ANDI(s3, s3, 1 << F_SF); // 1<<F_SF is sign bit, so just mask OR(xFlags, xFlags, s3); } IFX(X_PF) { @@ -1192,7 +1296,6 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) // emit ADC16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { - CLEAR_FLAGS(); IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, op1)); SH(s2, xEmu, offsetof(x64emu_t, op2)); @@ -1209,12 +1312,17 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ANDI(s3, xFlags, 1 << F_CF); ADD(s1, s1, s3); + CLEAR_FLAGS(); IFX(X_PEND) { SW(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { - NOT(s2, s1); // s2 = ~res - AND(s3, s2, s4); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s4); // s3 = ~res & (op1 | op2) + } else { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s4); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 @@ -1236,8 +1344,7 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ORI(xFlags, xFlags, 1 << F_CF); } - SLLI(s1, s1, 48); - SRLI(s1, s1, 48); + ZEXTH(s1, s1); IFX(X_ZF) { BNEZ(s1, 8); @@ -1254,9 +1361,8 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } // emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) +void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6) { - CLEAR_FLAGS(); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); @@ -1267,21 +1373,16 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_CF) { if (rex.w) { AND(s5, xMASK, s1); - AND(s4, xMASK, s2); - ADD(s5, s5, s4); // lo + if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo SRLI(s3, s1, 0x20); SRLI(s4, s2, 0x20); ADD(s4, s4, s3); SRLI(s5, s5, 0x20); ADD(s5, s5, s4); // hi - SRAI(s5, s5, 0x20); - BEQZ(s5, 8); - ORI(xFlags, xFlags, 1 << F_CF); + SRAI(s6, s5, 0x20); } else { ADD(s5, s1, s2); - SRLI(s5, s5, 0x20); - BEQZ(s5, 8); - ORI(xFlags, xFlags, 1 << F_CF); + SRLI(s6, s5, 0x20); } } IFX(X_AF | X_OF) { @@ -1293,12 +1394,21 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ANDI(s3, xFlags, 1 << F_CF); ADDxw(s1, s1, s3); + CLEAR_FLAGS(); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } + IFX(X_CF) { + BEQZ(s6, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } IFX(X_AF | X_OF) { - NOT(s2, s1); // s2 = ~res - AND(s3, s2, s4); // s3 = ~res & (op1 | op2) + if(rv64_zbb) { + ANDN(s3, s1, s4); // s3 = ~res & (op1 | op2) + } else { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s4); // s3 = ~res & (op1 | op2) + } OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 |