diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 100 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_math.c | 519 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_functions.h | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 79 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 257 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_jmpnext.c (renamed from src/dynarec/la64/dynarec_la64_jumpnext.c) | 0 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_pass0.h | 5 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 95 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_epilog.S | 8 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_lock.S | 78 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_lock.h | 46 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_next.S | 8 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 12 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.h | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_prolog.S | 4 | ||||
| -rw-r--r-- | src/include/dynarec_la64.h (renamed from src/include/dynarec_la464.h) | 0 |
16 files changed, 1122 insertions, 93 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index e464e07c..5eeba868 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -53,6 +53,106 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(cacheupd); switch (opcode) { + case 0x00: + INST_NAME("ADD Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_add8(dyn, ninst, x1, x2, x4, x5); + EBBACK(x5, 0); + break; + case 0x01: + INST_NAME("ADD Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_add32(dyn, ninst, rex, ed, gd, x3, x4, x5); + WBACK; + break; + case 0x02: + INST_NAME("ADD Gb, Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_add8(dyn, ninst, x2, x1, x4, x5); + GBBACK(x5); + break; + case 0x03: + INST_NAME("ADD Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_add32(dyn, ninst, rex, gd, ed, x3, x4, x5); + break; + case 0x04: + INST_NAME("ADD AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + ANDI(x1, xRAX, 0xff); + emit_add8c(dyn, ninst, x1, u8, x3, x4, x5); + ANDI(xRAX, xRAX, ~0xff); + OR(xRAX, xRAX, x1); + break; + case 0x05: + INST_NAME("ADD EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6); + break; + case 0x28: + INST_NAME("SUB Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_sub8(dyn, ninst, x1, x2, x4, x5, x6); + EBBACK(x5, 0); + break; + case 0x29: + INST_NAME("SUB Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_sub32(dyn, ninst, rex, ed, gd, x3, x4, x5); + WBACK; + break; + case 0x2A: + INST_NAME("SUB Gb, Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_sub8(dyn, ninst, x2, x1, x4, x5, x6); + GBBACK(x5); + break; + case 0x2B: + INST_NAME("SUB Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_sub32(dyn, ninst, rex, gd, ed, x3, x4, x5); + break; + case 0x2C: + INST_NAME("SUB AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + ANDI(x1, xRAX, 0xff); + emit_sub8c(dyn, ninst, x1, u8, x2, x3, x4, x5); + ANDI(xRAX, xRAX, ~0xff); + OR(xRAX, xRAX, x1); + break; + case 0x2D: + INST_NAME("SUB EAX, Id"); + SETFLAGS(X_ALL, SF_SET_PENDING); + i64 = F32S; + emit_sub32c(dyn, ninst, rex, xRAX, i64, x2, x3, x4, x5); + break; case 0x50: case 0x51: case 0x52: diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c new file mode 100644 index 00000000..646606df --- /dev/null +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -0,0 +1,519 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "la64_printer.h" +#include "dynarec_la64_private.h" +#include "dynarec_la64_functions.h" +#include "dynarec_la64_helper.h" + +// emit ADD32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) +{ + CLEAR_FLAGS(); + IFX(X_PEND) + { + if (rex.w) { + ST_D(s1, xEmu, offsetof(x64emu_t, op1)); + ST_D(s2, xEmu, offsetof(x64emu_t, op2)); + } else { + ST_W(s1, xEmu, offsetof(x64emu_t, op1)); + ST_W(s2, xEmu, offsetof(x64emu_t, op2)); + } + SET_DF(s3, rex.w ? d_add64 : d_add32b); + } + else IFX(X_ALL) + { + SET_DFNONE(); + } + IFX(X_CF) + { + if (rex.w) { + MOV32w(x2, 0xffffffff); + AND(s5, x2, s1); + AND(s4, x2, s2); + ADD_D(s5, s5, s4); + SRLI_D(s3, s1, 0x20); + SRLI_D(s4, s2, 0x20); + ADD_D(s4, s4, s3); + SRLI_D(s5, s5, 0x20); + ADD_D(s5, s5, s4); // hi + SRAI_D(s5, s5, 0x20); + BEQZ(s5, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } else { + ADD_D(s5, s1, s2); + SRLI_D(s5, s5, 0x20); + BEQZ(s5, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + } + IFX(X_AF | X_OF) + { + OR(s3, s1, s2); // s3 = op1 | op2 + AND(s4, s1, s2); // s4 = op1 & op2 + } + + if (rex.w) + ADD_D(s1, s1, s2); + else + ADD_W(s1, s1, s2); + + IFX(X_PEND) + { + if (rex.w) + ST_D(s1, xEmu, offsetof(x64emu_t, res)); + else + ST_W(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF | X_OF) + { + ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) + OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX(X_AF) + { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) + { + SRLI_D(s3, s3, rex.w ? 62 : 30); + SRLI_D(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_SF) + { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX(X_PF) + { + emit_pf(dyn, ninst, s1, s3, s4); + } + IFX(X_ZF) + { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } +} + +// emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5) +{ + CLEAR_FLAGS(); + if (s1 == xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags == X_PEND)) { + // special case when doing math on ESP and only PEND is needed: ignoring it! + if (c >= -2048 && c < 2048) { + ADDIxw(s1, s1, c); + } else { + MOV64xw(s2, c); + ADDxw(s1, s1, s2); + } + return; + } + IFX(X_PEND | X_AF | X_CF | X_OF) + { + MOV64xw(s2, c); + } + IFX(X_PEND) + { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, rex.w ? d_add64 : d_add32b); + } + else IFX(X_ALL) + { + SET_DFNONE(); + } + IFX(X_CF) + { + if (rex.w) { + MOV32w(x2, 0xffffffff); + AND(s5, x2, s1); + AND(s4, x2, s2); + ADD_D(s5, s5, s4); + SRLI_D(s3, s1, 0x20); + SRLI_D(s4, s2, 0x20); + ADD_D(s4, s4, s3); + SRLI_D(s5, s5, 0x20); + ADD_D(s5, s5, s4); // hi + SRAI_D(s5, s5, 0x20); + BEQZ(s5, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } else { + ADD_D(s5, s1, s2); + SRLI_D(s5, s5, 0x20); + BEQZ(s5, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + } + IFX(X_AF | X_OF) + { + OR(s3, s1, s2); // s3 = op1 | op2 + AND(s4, s1, s2); // s4 = op1 & op2 + } + + if (c >= -2048 && c < 2048) { + ADDIxw(s1, s1, c); + } else { + IFX(X_PEND | X_AF | X_CF | X_OF) { } + else + { + MOV64xw(s2, c); + } + ADDxw(s1, s1, s2); + } + + IFX(X_PEND) + { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF | X_OF) + { + ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) + OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX(X_AF) + { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) + { + SRLI_D(s3, s3, rex.w ? 62 : 30); + SRLI_D(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_SF) + { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX(X_PF) + { + emit_pf(dyn, ninst, s1, s3, s4); + } + IFX(X_ZF) + { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } +} + +// emit ADD8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + CLEAR_FLAGS(); + IFX(X_PEND) + { + SUB_D(s1, xEmu, offsetof(x64emu_t, op1)); + SUB_D(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_add8); + } + else IFX(X_ALL) + { + SET_DFNONE(); + } + IFX(X_AF | X_OF) + { + OR(s3, s1, s2); // s3 = op1 | op2 + AND(s4, s1, s2); // s4 = op1 & op2 + } + ADD_D(s1, s1, s2); + + IFX(X_AF | X_OF) + { + ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) + OR(s3, s3, s2); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX(X_AF) + { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) + { + SRLI_D(s3, s3, 6); + SRLI_D(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_CF) + { + SRLI_D(s3, s1, 8); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + IFX(X_PEND) + { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + ANDI(s1, s1, 0xff); + IFX(X_ZF) + { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_SF) + { + SRLI_D(s3, s1, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_PF) + { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADD8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch +void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4) +{ + CLEAR_FLAGS(); + IFX(X_PEND) + { + MOV32w(s4, c & 0xff); + ST_B(s1, xEmu, offsetof(x64emu_t, op1)); + ST_B(s4, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_add8); + } + else IFX(X_ALL) + { + SET_DFNONE(); + } + IFX(X_AF | X_OF) + { + IFX(X_PEND) { } + else + { + MOV32w(s4, c & 0xff); + } + OR(s3, s1, s4); // s3 = op1 | op2 + AND(s4, s1, s4); // s4 = op1 & op2 + } + ADDI_D(s1, s1, c); + + IFX(X_AF | X_OF) + { + ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) + OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX(X_AF) + { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) + { + SRLI_D(s3, s3, 6); + SRLI_D(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_CF) + { + SRLI_D(s3, s1, 8); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + IFX(X_PEND) + { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + ANDI(s1, s1, 0xff); + IFX(X_ZF) + { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_SF) + { + SRLI_D(s3, s1, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_PF) + { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + CLEAR_FLAGS(); + IFX(X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, op1)); + ST_B(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_sub8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + IFX(X_AF | X_CF | X_OF) { + // for later flag calculation + NOT(s5, s1); + } + + SUB_D(s1, s1, s2); + ANDI(s1, s1, 0xff); + IFX(X_SF) { + SRLI_D(s3, s1, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8); + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SUB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4, int s5) +{ + MOV32w(s2, c&0xff); + emit_sub8(dyn, ninst, s1, s2, s3, s4, s5); +} + +// emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) +{ + CLEAR_FLAGS(); + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, rex.w?d_sub64:d_sub32); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + IFX(X_AF | X_CF | X_OF) { + // for later flag calculation + NOT(s5, s1); + } + + SUBxw(s1, s1, s2); + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32); + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SUB32 instruction, from s1, constant c, store result in s1 using s2, s3, s4 and s5 as scratch +void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5) +{ + CLEAR_FLAGS(); + if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags==X_PEND)) + { + // special case when doing math on RSP and only PEND is needed: ignoring it! + if (c > -2048 && c <= 2048) { + ADDI_D(s1, s1, -c); + } else { + MOV64xw(s2, c); + SUBxw(s1, s1, s2); + } + return; + } + + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + MOV64xw(s2, c); + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, rex.w?d_sub64:d_sub32); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + IFX(X_AF | X_CF | X_OF) { + // for later flag calculation + NOT(s5, s1); + } + + if (c > -2048 && c <= 2048) { + ADDIxw(s1, s1, -c); + } else { + IFX(X_PEND) {} else {MOV64xw(s2, c);} + SUBxw(s1, s1, s2); + } + + IFX(X_AF | X_CF | X_OF) { + IFX(X_PEND) {} + else if (c > -2048 && c <= 2048) { + MOV64xw(s2, c); + } + } + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32); + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} diff --git a/src/dynarec/la64/dynarec_la64_functions.h b/src/dynarec/la64/dynarec_la64_functions.h index 5f7b5036..67608783 100644 --- a/src/dynarec/la64/dynarec_la64_functions.h +++ b/src/dynarec/la64/dynarec_la64_functions.h @@ -4,7 +4,7 @@ #include "../dynarec_native_functions.h" typedef struct x64emu_s x64emu_t; -typedef struct dynarec_rv64_s dynarec_rv64_t; +typedef struct dynarec_la64_s dynarec_la64_t; // Reset scratch regs counter void fpu_reset_scratch(dynarec_la64_t* dyn); diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index 71e3c1ed..1215cbd8 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -401,6 +401,67 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg) { + MAYUSE(fnc); + if (savereg == 0) + savereg = x6; + if (saveflags) { + FLAGS_ADJUST_TO11(xFlags, xFlags, reg); + ST_D(xFlags, xEmu, offsetof(x64emu_t, eflags)); + } + fpu_pushcache(dyn, ninst, reg, 0); + if (ret != -2) { + ADDI_D(xSP, xSP, -16); // RV64 stack needs to be 16byte aligned + ST_D(xEmu, xSP, 0); + ST_D(savereg, xSP, 8); + // x5..x8, x10..x17, x28..x31 those needs to be saved by caller + STORE_REG(RAX); + STORE_REG(RCX); + STORE_REG(RDX); + STORE_REG(R12); + STORE_REG(R13); + STORE_REG(R14); + STORE_REG(R15); + ST_D(xRIP, xEmu, offsetof(x64emu_t, ip)); + } + TABLE64(reg, (uintptr_t)fnc); + JIRL(xRA, reg, 0); + if (ret >= 0) { + MV(ret, xEmu); + } + if (ret != -2) { + LD_D(xEmu, xSP, 0); + LD_D(savereg, xSP, 8); + ADDI_D(xSP, xSP, 16); +#define GO(A) \ + if (ret != x##A) { LOAD_REG(A); } + GO(RAX); + GO(RCX); + GO(RDX); + GO(R12); + GO(R13); + GO(R14); + GO(R15); + if (ret != xRIP) + LD_D(xRIP, xEmu, offsetof(x64emu_t, ip)); +#undef GO + } + + fpu_popcache(dyn, ninst, reg, 0); + if (saveflags) { + LD_D(xFlags, xEmu, offsetof(x64emu_t, eflags)); + FLAGS_ADJUST_FROM11(xFlags, xFlags, reg); + } + SET_NODF(); + dyn->last_ip = 0; +} + +void fpu_pushcache(dynarec_la64_t* dyn, int ninst, int s1, int not07) +{ + // TODO +} + +void fpu_popcache(dynarec_la64_t* dyn, int ninst, int s1, int not07) +{ // TODO } @@ -419,6 +480,24 @@ void fpu_unreflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) // TODO } +void emit_pf(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4) +{ + MAYUSE(dyn); + MAYUSE(ninst); + // PF: (((emu->x64emu_parity_tab[(res&0xff) / 32] >> ((res&0xff) % 32)) & 1) == 0) + MOV64x(s4, (uintptr_t)GetParityTab()); + SRLI_D(s3, s1, 3); + ANDI(s3, s3, 28); + ADD_D(s4, s4, s3); + LD_W(s4, s4, 0); + NOT(s4, s4); + SRL_W(s4, s4, s1); + ANDI(s4, s4, 1); + + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_PF); +} + void fpu_reset_cache(dynarec_la64_t* dyn, int ninst, int reset_n) { // TODO diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index deec5656..6073a359 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -82,20 +82,116 @@ #define LOCK_LOCK (int*)1 // GETGD get x64 register in gd -#define GETGD \ - do { \ - gd = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3)); \ - } while (0); +#define GETGD gd = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3)); + +// GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +#define GETED(D) \ + if (MODREG) { \ + ed = TO_LA64((nextop & 7) + (rex.b << 3)); \ + wback = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + if (rex.w) \ + LD_D(x1, wback, fixedaddress); \ + else \ + LD_W(x1, wback, fixedaddress); \ + ed = x1; \ + } + +// Write back ed in wback (if wback not 0) +#define WBACK \ + if (wback) { \ + if (rex.w) \ + ST_D(ed, wback, fixedaddress); \ + else \ + ST_W(ed, wback, fixedaddress); \ + SMWRITE(); \ + } + +// GETEB will use i for ed, and can use r3 for wback. +#define GETEB(i, D) \ + if (MODREG) { \ + if (rex.rex) { \ + wback = TO_LA64((nextop & 7) + (rex.b << 3)); \ + wb2 = 0; \ + } else { \ + wback = (nextop & 7); \ + wb2 = (wback >> 2) * 8; \ + wback = TO_LA64((wback & 3)); \ + } \ + if (wb2) { \ + MV(i, wback); \ + SRLI_D(i, i, wb2); \ + ANDI(i, i, 0xff); \ + } else \ + ANDI(i, wback, 0xff); \ + wb1 = 0; \ + ed = i; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ + LD_BU(i, wback, fixedaddress); \ + wb1 = 1; \ + ed = i; \ + } -// CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1) +// GETGB will use i for gd +#define GETGB(i) \ + if (rex.rex) { \ + gb1 = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3)); \ + gb2 = 0; \ + } else { \ + gd = (nextop & 0x38) >> 3; \ + gb2 = ((gd & 4) >> 2); \ + gb1 = TO_LA64((gd & 3)); \ + } \ + gd = i; \ + if (gb2) { \ + MV(gd, gb1); \ + SRLI_D(gd, gd, 8); \ + ANDI(gd, gd, 0xff); \ + } else \ + ANDI(gd, gb1, 0xff); + +// Write gb (gd) back to original register / memory, using s1 as scratch +#define GBBACK(s1) \ + if (gb2) { \ + MOV64x(s1, 0xffffffffffff00ffLL); \ + AND(gb1, gb1, s1); \ + SLLI_D(s1, gd, 8); \ + OR(gb1, gb1, s1); \ + } else { \ + ANDI(gb1, gb1, ~0xff); \ + OR(gb1, gb1, gd); \ + } + +// Write eb (ed) back to original register / memory, using s1 as scratch +#define EBBACK(s1, c) \ + if (wb1) { \ + SUB_D(ed, wback, fixedaddress); \ + SMWRITE(); \ + } else if (wb2) { \ + MOV64x(s1, 0xffffffffffff00ffLL); \ + AND(wback, wback, s1); \ + if (c) { ANDI(ed, ed, 0xff); } \ + SLLI_D(s1, ed, 8); \ + OR(wback, wback, s1); \ + } else { \ + ANDI(wback, wback, ~0xff); \ + if (c) { ANDI(ed, ed, 0xff); } \ + OR(wback, wback, ed); \ + } + +// CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0) -// CALL_ will use x7 for the call address. Return value can be put in ret (unless ret is -1) +#define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0) +// CALL_ will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x7, ret, 1, reg) -// CALL_S will use x7 for the call address. Return value can be put in ret (unless ret is -1) +#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x6, ret, 1, reg) +// CALL_S will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2. Flags are not save/restored -#define CALL_S(F, ret) call_c(dyn, ninst, F, x7, ret, 0, 0) +#define CALL_S(F, ret) call_c(dyn, ninst, F, x6, ret, 0, 0) #define MARKi(i) dyn->insts[ninst].mark[i] = dyn->native_size #define GETMARKi(i) dyn->insts[ninst].mark[i] @@ -118,9 +214,119 @@ #define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size #define GETMARKLOCK dyn->insts[ninst].marklock +#define IFX(A) if ((dyn->insts[ninst].x64.gen_flags & (A))) + +#define STORE_REG(A) ST_D(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) +#define LOAD_REG(A) LD_D(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) + +#define SET_DFNONE() \ + if (!dyn->f.dfnone) { \ + ST_W(xZR, xEmu, offsetof(x64emu_t, df)); \ + dyn->f.dfnone = 1; \ + } +#define SET_DF(S, N) \ + if ((N) != d_none) { \ + MOV32w(S, (N)); \ + ST_W(S, xEmu, offsetof(x64emu_t, df)); \ + dyn->f.dfnone = 0; \ + } else \ + SET_DFNONE() +#define SET_NODF() dyn->f.dfnone = 0 +#define SET_DFOK() dyn->f.dfnone = 1 + +#define CLEAR_FLAGS() \ + IFX(X_ALL) { ANDI(xFlags, xFlags, ~((1UL << F_AF) | (1UL << F_CF) | (1UL << F_OF2) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF))); } + +#define CALC_SUB_FLAGS(op1_, op2, res, scratch1, scratch2, width) \ + IFX(X_AF | X_CF | X_OF) \ + { \ + /* calc borrow chain */ \ + /* bc = (res & (~op1 | op2)) | (~op1 & op2) */ \ + OR(scratch1, op1_, op2); \ + AND(scratch2, res, scratch1); \ + AND(op1_, op1_, op2); \ + OR(scratch2, scratch2, op1_); \ + IFX(X_AF) \ + { \ + /* af = bc & 0x8 */ \ + ANDI(scratch1, scratch2, 8); \ + BEQZ(scratch1, 8); \ + ORI(xFlags, xFlags, 1 << F_AF); \ + } \ + IFX(X_CF) \ + { \ + /* cf = bc & (1<<(width-1)) */ \ + if ((width) == 8) { \ + ANDI(scratch1, scratch2, 0x80); \ + } else { \ + SRLI_D(scratch1, scratch2, (width)-1); \ + if (width != 64) ANDI(scratch1, scratch1, 1); \ + } \ + BEQZ(scratch1, 8); \ + ORI(xFlags, xFlags, 1 << F_CF); \ + } \ + IFX(X_OF) \ + { \ + /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ \ + SRLI_D(scratch1, scratch2, (width)-2); \ + SRLI_D(scratch2, scratch1, 1); \ + XOR(scratch1, scratch1, scratch2); \ + ANDI(scratch1, scratch1, 1); \ + BEQZ(scratch1, 8); \ + ORI(xFlags, xFlags, 1 << F_OF2); \ + } \ + } + +// Adjust the flags bit 11 -> bit 5, result in reg (can be same as flags, but not s1) +#define FLAGS_ADJUST_FROM11(reg, flags, s1) \ + ANDI(reg, flags, ~(1 << 5)); \ + SRLI_D(s1, reg, 11 - 5); \ + ANDI(s1, s1, 1 << 5); \ + OR(reg, reg, s1) + +// Adjust the xFlags bit 5 -> bit 11, src and dst can be the same (and can be xFlags, but not s1) +#define FLAGS_ADJUST_TO11(dst, src, s1) \ + LU12I_W(s1, 0xFFFFF); \ + ADDI_W(s1, s1, 0x7DF); \ + AND(s1, src, s1); \ + ANDI(dst, src, 1 << 5); \ + SLLI_D(dst, dst, 11 - 5); \ + OR(dst, dst, s1) + #ifndef READFLAGS -#define READFLAGS(A) +#define READFLAGS(A) \ + if (((A) != X_PEND && dyn->f.pending != SF_SET) \ + && (dyn->f.pending != SF_SET_PENDING)) { \ + if (dyn->f.pending != SF_PENDING) { \ + LD_D(x3, xEmu, offsetof(x64emu_t, df)); \ + j64 = (GETMARKF) - (dyn->native_size); \ + BEQ(x3, xZR, j64); \ + } \ + CALL_(UpdateFlags, -1, 0); \ + FLAGS_ADJUST_FROM11(xFlags, xFlags, x3); \ + MARKF; \ + dyn->f.pending = SF_SET; \ + SET_DFOK(); \ + } +#endif +#ifndef SETFLAGS +#define SETFLAGS(A, B) \ + if (dyn->f.pending != SF_SET \ + && ((B) & SF_SUB) \ + && (dyn->insts[ninst].x64.gen_flags & (~(A)))) \ + READFLAGS(((dyn->insts[ninst].x64.gen_flags & X_PEND) ? X_ALL : dyn->insts[ninst].x64.gen_flags) & (~(A))); \ + if (dyn->insts[ninst].x64.gen_flags) switch (B) { \ + case SF_SUBSET: \ + case SF_SET: dyn->f.pending = SF_SET; break; \ + case SF_PENDING: dyn->f.pending = SF_PENDING; break; \ + case SF_SUBSET_PENDING: \ + case SF_SET_PENDING: \ + dyn->f.pending = (dyn->insts[ninst].x64.gen_flags & X_PEND) ? SF_SET_PENDING : SF_SET; \ + break; \ + } \ + else \ + dyn->f.pending = SF_SET #endif #ifndef BARRIER @@ -190,7 +396,19 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define jump_to_epilog STEPNAME(jump_to_epilog) #define jump_to_next STEPNAME(jump_to_next) #define call_c STEPNAME(call_c) - +#define emit_add32 STEPNAME(emit_add32) +#define emit_add32c STEPNAME(emit_add32c) +#define emit_add8 STEPNAME(emit_add8) +#define emit_add8c STEPNAME(emit_add8c) +#define emit_sub32 STEPNAME(emit_sub32) +#define emit_sub32c STEPNAME(emit_sub32c) +#define emit_sub8 STEPNAME(emit_sub8) +#define emit_sub8c STEPNAME(emit_sub8c) + +#define emit_pf STEPNAME(emit_pf) + +#define fpu_pushcache STEPNAME(fpu_pushcache) +#define fpu_popcache STEPNAME(fpu_popcache) #define fpu_reset_cache STEPNAME(fpu_reset_cache) #define fpu_propagate_stack STEPNAME(fpu_propagate_stack) #define fpu_purgecache STEPNAME(fpu_purgecache) @@ -207,7 +425,18 @@ uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop void jump_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst); void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits); void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); - +void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); +void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5); +void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4); +void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); +void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5); +void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5); + +void emit_pf(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4); + +// common coproc helpers // reset the cache with n void fpu_reset_cache(dynarec_la64_t* dyn, int ninst, int reset_n); // propagate stack state @@ -216,6 +445,8 @@ void fpu_propagate_stack(dynarec_la64_t* dyn, int ninst); void fpu_purgecache(dynarec_la64_t* dyn, int ninst, int next, int s1, int s2, int s3); void fpu_reflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3); void fpu_unreflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3); +void fpu_pushcache(dynarec_la64_t* dyn, int ninst, int s1, int not07); +void fpu_popcache(dynarec_la64_t* dyn, int ninst, int s1, int not07); uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); diff --git a/src/dynarec/la64/dynarec_la64_jumpnext.c b/src/dynarec/la64/dynarec_la64_jmpnext.c index 793eb729..793eb729 100644 --- a/src/dynarec/la64/dynarec_la64_jumpnext.c +++ b/src/dynarec/la64/dynarec_la64_jmpnext.c diff --git a/src/dynarec/la64/dynarec_la64_pass0.h b/src/dynarec/la64/dynarec_la64_pass0.h index 3f6a68ce..f3cca4d7 100644 --- a/src/dynarec/la64/dynarec_la64_pass0.h +++ b/src/dynarec/la64/dynarec_la64_pass0.h @@ -9,6 +9,11 @@ dyn->insts[ninst].x64.use_flags = A; \ dyn->f.dfnone = 1; \ dyn->f.pending = SF_SET +#define SETFLAGS(A, B) \ + dyn->insts[ninst].x64.set_flags = A; \ + dyn->insts[ninst].x64.state_flags = B; \ + dyn->f.pending = (B) & SF_SET_PENDING; \ + dyn->f.dfnone = ((B) & SF_SET) ? 1 : 0; #define EMIT(A) dyn->native_size += 4 #define BARRIER(A) \ if (A != BARRIER_MAYBE) { \ diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 7ba34204..a1719187 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -99,6 +99,9 @@ f24-f31 fs0-fs7 Static registers Callee #define wZR xZR #define r0 xZR +// replacement for F_OF internaly, using a reserved bit. Need to use F_OF2 internaly, never F_OF directly! +#define F_OF2 F_res3 + // split a 32bits value in 20bits + 12bits, adjust the upper part is 12bits is negative #define SPLIT20(A) (((A) + 0x800) >> 12) #define SPLIT12(A) ((A) & 0xfff) @@ -195,6 +198,19 @@ f24-f31 fs0-fs7 Static registers Callee // GR[rd] = GR[rj] ^ ZeroExtend(imm12, GRLEN) #define XORI(rd, rj, imm12) EMIT(type_2RI12(0b0000001111, imm12, rj, rd)) +// tmp = SLL(GR[rj][31:0], GR[rk][4:0]) +// GR[rd] = SignExtend(tmp[31:0], GRLEN) +#define SLL_W(rd, rj, rk) EMIT(type_3R(0b00000000000101110, rk, rj, rd)) +// tmp = SRL(GR[rj][31:0], GR[rk][4:0]) +// GR[rd] = SignExtend(tmp[31:0], GRLEN) +#define SRL_W(rd, rj, rk) EMIT(type_3R(0b00000000000101111, rk, rj, rd)) +// tmp = SLA(GR[rj][31:0], GR[rk][4:0]) +// GR[rd] = SignExtend(tmp[31:0], GRLEN) +#define SLA_W(rd, rj, rk) EMIT(type_3R(0b00000000000110000, rk, rj, rd)) +// tmp = ROTR(GR[rj][31:0], GR[rk][4:0]) +// GR[rd] = SignExtend(tmp[31:0], GRLEN) +#define ROTR_W(rd, rj, rk) EMIT(type_3R(0b00000000000110110, rk, rj, rd)) + // GR[rd] = SLL(GR[rj][63:0], imm6) (Shift Left Logical) #define SLLI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001000001, imm6, rj, rd)) // GR[rd] = SRL(GR[rj][63:0], imm6) (Shift Right Logical) @@ -213,6 +229,25 @@ f24-f31 fs0-fs7 Static registers Callee ADD_D(rd, rs1, scratch); \ } +// if GR[rj] == GR[rd]: +// PC = PC + SignExtend({imm16, 2'b0}, GRLEN) +#define BEQ(rj, rd, imm16) EMIT(type_2RI16(0b010110, imm16, rj, rd)) +// if GR[rj] != GR[rd]: +// PC = PC + SignExtend({imm16, 2'b0}, GRLEN) +#define BNE(rj, rd, imm16) EMIT(type_2RI16(0b010111, imm16, rj, rd)) +// if signed(GR[rj]) < signed(GR[rd]): +// PC = PC + SignExtend({imm16, 2'b0}, GRLEN) +#define BLT(rj, rd, imm16) EMIT(type_2RI16(0b011000, imm16, rj, rd)) +// if signed(GR[rj]) >= signed(GR[rd]): +// PC = PC + SignExtend({imm16, 2'b0}, GRLEN) +#define BGE(rj, rd, imm16) EMIT(type_2RI16(0b011001, imm16, rj, rd)) +// if unsigned(GR[rj]) == unsigned(GR[rd]): +// PC = PC + SignExtend({imm16, 2'b0}, GRLEN) +#define BLTU(rj, rd, imm16) EMIT(type_2RI16(0b011010, imm16, rj, rd)) +// if unsigned(GR[rj]) == unsigned(GR[rd]): +// PC = PC + SignExtend({imm16, 2'b0}, GRLEN) +#define BGEU(rj, rd, imm16) EMIT(type_2RI16(0b011011, imm16, rj, rd)) + // if GR[rj] == 0: // PC = PC + SignExtend({imm21, 2'b0}, GRLEN) #define BEQZ(rj, imm21) EMIT(type_1RI21(0b010000, (imm21) >> 2, rj)) @@ -305,6 +340,19 @@ f24-f31 fs0-fs7 Static registers Callee LU52I_D(rd, rd, ((uint64_t)(imm64)) >> 52); \ } +#define MOV64xw(A, B) \ + if (rex.w) { \ + MOV64x(A, B); \ + } else { \ + MOV32w(A, B); \ + } +#define MOV64z(A, B) \ + if (rex.is32bits) { \ + MOV32w(A, B); \ + } else { \ + MOV64x(A, B); \ + } + // rd[63:0] = rj[63:0] (pseudo instruction) #define MV(rd, rj) ADDI_D(rd, rj, 0) // rd = rj (pseudo instruction) @@ -324,6 +372,53 @@ f24-f31 fs0-fs7 Static registers Callee MV(rd, rj); \ } +// rd = !rs1 +#define NOT(rd, rs1) XORI(rd, rs1, -1) + +#define ADDIxw(rd, rj, imm12) \ + if (rex.w) \ + ADDI_D(rd, rj, imm12); \ + else \ + ADDI_W(rd, rj, imm12); +#define ADDIz(rd, rj, imm12) \ + if (rex.is32bits) \ + ADDI_W(rd, rj, imm12); \ + else \ + ADDI_D(rd, rj, imm12); + +#define ADDxw(rd, rj, rk) \ + if (rex.w) \ + ADD_D(rd, rj, rk); \ + else \ + ADD_W(rd, rj, rk); +#define ADDz(rd, rj, rk) \ + if (rex.is32bits) \ + ADD_W(rd, rj, rk); \ + else \ + ADD_D(rd, rj, rk); + +#define SDxw(rd, rj, imm12) \ + if (rex.w) \ + ST_D(rd, rj, imm12); \ + else \ + ST_W(rd, rj, imm12); +#define SDz(rd, rj, imm12) \ + if (rex.is32bits) \ + ST_W(rd, rj, imm12); \ + else \ + ST_D(rd, rj, imm12); + +#define SUBxw(rd, rj, rk) \ + if (rex.w) \ + SUB_D(rd, rj, rk); \ + else \ + SUB_W(rd, rj, rk); +#define SUBz(rd, rj, rk) \ + if (rex.is32bits) \ + SUB_W(rd, rj, rk); \ + else \ + SUB_D(rd, rj, rk); + // PUSH / POP reg[0:63] #define PUSH1(reg) \ do { \ diff --git a/src/dynarec/la64/la64_epilog.S b/src/dynarec/la64/la64_epilog.S index 41eae0cc..bb6977c1 100644 --- a/src/dynarec/la64/la64_epilog.S +++ b/src/dynarec/la64/la64_epilog.S @@ -1,4 +1,4 @@ -//la464 epilog for dynarec +//la64 epilog for dynarec //Save stuff, prepare stack and register //called with pointer to emu as 1st parameter //and address to jump to as 2nd parameter @@ -6,8 +6,8 @@ .text .align 4 -.global la464_epilog -la464_epilog: +.global la64_epilog +la64_epilog: //update register -> emu st.d $r12, $r4, (8 * 0) st.d $r13, $r4, (8 * 1) @@ -27,7 +27,7 @@ la464_epilog: st.d $r30, $r4, (8 * 15) st.d $r31, $r4, (8 * 16) // xFlags st.d $r20, $r4, (8 * 17) // put back reg value in emu, including EIP (so x27 must be EIP now) - ld.d $sp, $r4, 552 // restore saved sp from emu->xSPSave, see la464_prolog + ld.d $sp, $r4, 552 // restore saved sp from emu->xSPSave, see la64_prolog ld.d $r11, $sp, -8 st.d $r11, $r4, 552 // vpop {d8-d15} diff --git a/src/dynarec/la64/la64_lock.S b/src/dynarec/la64/la64_lock.S index 9a728b14..74c877d4 100644 --- a/src/dynarec/la64/la64_lock.S +++ b/src/dynarec/la64/la64_lock.S @@ -5,39 +5,39 @@ .text .align 4 -.global la464_lock_xchg_dd -.global la464_lock_xchg_d -.global la464_lock_storeifnull -.global la464_lock_storeifnull_d -.global la464_lock_storeifref -.global la464_lock_storeifref_d -.global la464_lock_storeifref2_d -.global la464_lock_decifnot0b -.global la464_lock_storeb -.global la464_lock_incif0 -.global la464_lock_decifnot0 -.global la464_lock_store -.global la464_lock_store_dd -.global la464_lock_get_b -.global la464_lock_get_d -.global la464_lock_get_dd -.global la464_lock_cas_d -.global la464_lock_cas_dd -.global la464_lock_cas_dq - -la464_lock_xchg_dd: +.global la64_lock_xchg_dd +.global la64_lock_xchg_d +.global la64_lock_storeifnull +.global la64_lock_storeifnull_d +.global la64_lock_storeifref +.global la64_lock_storeifref_d +.global la64_lock_storeifref2_d +.global la64_lock_decifnot0b +.global la64_lock_storeb +.global la64_lock_incif0 +.global la64_lock_decifnot0 +.global la64_lock_store +.global la64_lock_store_dd +.global la64_lock_get_b +.global la64_lock_get_d +.global la64_lock_get_dd +.global la64_lock_cas_d +.global la64_lock_cas_dd +.global la64_lock_cas_dq + +la64_lock_xchg_dd: // address is a0, value is a1, return old value in a0 amswap_db.d $a2, $a1, $a0 move $a0, $a2 ret -la464_lock_xchg_d: +la64_lock_xchg_d: // address is a0, value is a1, return old value in a0 amswap_db.w $a2, $a1, $a0 move $a0, $a2 ret -la464_lock_storeifnull: +la64_lock_storeifnull: // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value dbar 0 move $a3, $a1 @@ -48,7 +48,7 @@ la464_lock_storeifnull: move $a0, $a2 ret -la464_lock_storeifnull_d: +la64_lock_storeifnull_d: // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value dbar 0 move $a3, $a1 @@ -59,7 +59,7 @@ la464_lock_storeifnull_d: move $a0, $a2 ret -la464_lock_storeifref: +la64_lock_storeifref: // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value) dbar 0 move $a4, $a1 @@ -74,7 +74,7 @@ la464_lock_storeifref: move $a0, $a3 ret -la464_lock_storeifref_d: +la64_lock_storeifref_d: // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value) dbar 0 move $a4, $a1 @@ -89,7 +89,7 @@ la464_lock_storeifref_d: move $a0, $a3 ret -la464_lock_storeifref2_d: +la64_lock_storeifref2_d: // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value dbar 0 move $a4, $a1 @@ -100,17 +100,17 @@ la464_lock_storeifref2_d: move $a0, $a3 ret -la464_lock_decifnot0b: +la64_lock_decifnot0b: dbar 0 // TODO ret -la464_lock_storeb: +la64_lock_storeb: st.b $a1, $a0, 0 dbar 0 ret -la464_lock_decifnot0: +la64_lock_decifnot0: dbar 0 ll.w $a1, $a0, 0 beqz $a1, 20 @@ -121,7 +121,7 @@ la464_lock_decifnot0: move $a0, $a1 ret -la464_lock_incif0: +la64_lock_incif0: dbar 0 ll.w $a1, $a0, 0 bnez $a1, 20 @@ -132,32 +132,32 @@ la464_lock_incif0: move $a0, $a1 ret -la464_lock_store: +la64_lock_store: st.w $a1, $a0, 0 dbar 0 ret -la464_lock_store_dd: +la64_lock_store_dd: st.d $a1, $a0, 0 dbar 0 ret -la464_lock_get_b: +la64_lock_get_b: dbar 0 ld.b $a0, $a0, 0 ret -la464_lock_get_d: +la64_lock_get_d: dbar 0 ld.w $a0, $a0, 0 ret -la464_lock_get_dd: +la64_lock_get_dd: dbar 0 ld.d $a0, $a0, 0 ret -la464_lock_cas_d: +la64_lock_cas_d: ll.w $a3, $a0, 0 bne $a3, $a1, 16 sc.w $a2, $a0, 0 @@ -166,7 +166,7 @@ la464_lock_cas_d: li.d $a0, 1 ret -la464_lock_cas_dd: +la64_lock_cas_dd: ll.d $a3, $a0, 0 bne $a3, $a1, 16 sc.d $a2, $a0, 0 @@ -175,7 +175,7 @@ la464_lock_cas_dd: li.d $a0, 1 ret -la464_lock_cas_dq: +la64_lock_cas_dq: ll.d $a4, $a0, 0 bne $a4, $a2, 20 sc.d $a1, $a0, 0 diff --git a/src/dynarec/la64/la64_lock.h b/src/dynarec/la64/la64_lock.h index 1bf9a004..c757e08a 100644 --- a/src/dynarec/la64/la64_lock.h +++ b/src/dynarec/la64/la64_lock.h @@ -3,61 +3,61 @@ #include <stdint.h> // Atomically exchange value at [p] with val, return old p -extern uintptr_t la464_lock_xchg_dd(void* p, uintptr_t val); +extern uintptr_t la64_lock_xchg_dd(void* p, uintptr_t val); // Atomically exchange value at [p] with val, return old p -extern uint32_t la464_lock_xchg_d(void* p, uint32_t val); +extern uint32_t la64_lock_xchg_d(void* p, uint32_t val); // Atomically store value to [p] only if [p] is NULL. Return old [p] value -extern uint32_t la464_lock_storeifnull_d(void*p, uint32_t val); +extern uint32_t la64_lock_storeifnull_d(void*p, uint32_t val); // Atomically store value to [p] only if [p] is NULL. Return old [p] value -extern void* la464_lock_storeifnull(void*p, void* val); +extern void* la64_lock_storeifnull(void*p, void* val); // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) -extern void* la464_lock_storeifref(void*p, void* val, void* ref); +extern void* la64_lock_storeifref(void*p, void* val, void* ref); // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) -extern uint32_t la464_lock_storeifref_d(void*p, uint32_t val, uint32_t ref); +extern uint32_t la64_lock_storeifref_d(void*p, uint32_t val, uint32_t ref); // Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) -extern uint32_t la464_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref); +extern uint32_t la64_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref); // decrement atomically the byte at [p] (but only if p not 0) -extern void la464_lock_decifnot0b(void*p); +extern void la64_lock_decifnot0b(void*p); // atomic store (with memory barrier) -extern void la464_lock_storeb(void*p, uint8_t b); +extern void la64_lock_storeb(void*p, uint8_t b); // increment atomically the int at [p] only if it was 0. Return the old value of [p] -extern int la464_lock_incif0(void*p); +extern int la64_lock_incif0(void*p); // decrement atomically the int at [p] (but only if p not 0) -extern int la464_lock_decifnot0(void*p); +extern int la64_lock_decifnot0(void*p); // atomic store (with memory barrier) -extern void la464_lock_store(void*p, uint32_t v); +extern void la64_lock_store(void*p, uint32_t v); // atomic store (with memory barrier) -extern void la464_lock_store_dd(void*p, uint64_t v); +extern void la64_lock_store_dd(void*p, uint64_t v); // atomic get (with memory barrier) -extern uint32_t la464_lock_get_b(void* p); +extern uint32_t la64_lock_get_b(void* p); // atomic get (with memory barrier) -extern uint32_t la464_lock_get_d(void* p); +extern uint32_t la64_lock_get_d(void* p); // atomic get (with memory barrier) -extern void* la464_lock_get_dd(void* p); +extern void* la64_lock_get_dd(void* p); // Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned -extern int la464_lock_cas_d(void* p, int32_t ref, int32_t val); +extern int la64_lock_cas_d(void* p, int32_t ref, int32_t val); // Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned -extern int la464_lock_cas_dd(void* p, int64_t ref, int64_t val); +extern int la64_lock_cas_dd(void* p, int64_t ref, int64_t val); // (mostly) Atomically store val1 and val2 at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned -extern int la464_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2); +extern int la64_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2); // Not defined in assembler but in dynarec_rv64_functions uint8_t extract_byte(uint32_t val, void* address); @@ -65,9 +65,9 @@ uint32_t insert_byte(uint32_t val, uint8_t b, void* address); uint16_t extract_half(uint32_t val, void* address); uint32_t insert_half(uint32_t val, uint16_t h, void* address); -uint8_t la464_lock_xchg_b(void* addr, uint8_t v); -uint16_t la464_lock_xchg_h(void* addr, uint16_t v); -int la464_lock_cas_b(void* p, uint8_t ref, uint8_t val); -int la464_lock_cas_h(void* p, uint16_t ref, uint16_t val); +uint8_t la64_lock_xchg_b(void* addr, uint8_t v); +uint16_t la64_lock_xchg_h(void* addr, uint16_t v); +int la64_lock_cas_b(void* p, uint8_t ref, uint8_t val); +int la64_lock_cas_h(void* p, uint16_t ref, uint16_t val); #endif //__LA64_LOCK__H__ diff --git a/src/dynarec/la64/la64_next.S b/src/dynarec/la64/la64_next.S index e2c4924d..c69830f3 100644 --- a/src/dynarec/la64/la64_next.S +++ b/src/dynarec/la64/la64_next.S @@ -1,4 +1,4 @@ -//la464 update linker table for dynarec +//la64 update linker table for dynarec //called with pointer to emu as 1st parameter //and address of table to as 2nd parameter //ip is at r12 @@ -8,10 +8,10 @@ .extern LinkNext -.global la464_next +.global la64_next - .8byte 0 // NULL pointer before la464_next, for getDB -la464_next: + .8byte 0 // NULL pointer before la64_next, for getDB +la64_next: // emu is a0 // IP address is a1 addi.d $sp, $sp, -(8 * 12) diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 5651ec12..5e1231bd 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -3,17 +3,17 @@ #include <string.h> #include <stdio.h> -#include "la464_printer.h" +#include "la64_printer.h" #include "debug.h" static const char* Xt[] = {"xZR", "r1", "r2", "sp", "xEmu", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "xRAX", "xRCX", "xRDX", "xRBX", "xRSP", "xRBP", "xRSI", "xRDI", "xR8", "r21", "xR9", "xR10", "xR11", "xR12", "xR13", "xR14", "xR15", "xFlags", "xRIP", "r31"}; -typedef struct la464_print_s { +typedef struct la64_print_s { int d, j, k, a; int i, u; -} la464_print_t; +} la64_print_t; -int isMask(uint32_t opcode, const char* mask, la464_print_t *a) +int isMask(uint32_t opcode, const char* mask, la64_print_t *a) { if(strlen(mask)!=32) { printf_log(LOG_NONE, "Error: printer mask \"%s\" in not len 32 but %ld\n", mask, strlen(mask)); @@ -50,10 +50,10 @@ int64_t signExtend(uint32_t val, int sz) return ret; } -const char* la464_print(uint32_t opcode, uintptr_t addr) +const char* la64_print(uint32_t opcode, uintptr_t addr) { static char buff[200]; - la464_print_t a; + la64_print_t a; #define Rd a.d #define Rj a.j #define Rk a.k diff --git a/src/dynarec/la64/la64_printer.h b/src/dynarec/la64/la64_printer.h index 2bc4ded8..bce44339 100644 --- a/src/dynarec/la64/la64_printer.h +++ b/src/dynarec/la64/la64_printer.h @@ -1,6 +1,6 @@ #ifndef _LA64_PRINTER_H_ #define _LA64_PRINTER_H_ -const char* la464_print(uint32_t opcode, uint64_t addr); +const char* la64_print(uint32_t opcode, uint64_t addr); #endif //_LA64_PRINTER_H_ diff --git a/src/dynarec/la64/la64_prolog.S b/src/dynarec/la64/la64_prolog.S index aafe4dc7..b1dd3450 100644 --- a/src/dynarec/la64/la64_prolog.S +++ b/src/dynarec/la64/la64_prolog.S @@ -6,8 +6,8 @@ .text .align 4 -.global la464_prolog -la464_prolog: +.global la64_prolog +la64_prolog: //save all 18 used register addi.d $sp, $sp, -(8 * 19) st.d $r1, $sp, (8 * 0) //save ra diff --git a/src/include/dynarec_la464.h b/src/include/dynarec_la64.h index cf8946f0..cf8946f0 100644 --- a/src/include/dynarec_la464.h +++ b/src/include/dynarec_la64.h |