diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 67 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 41 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f20f.c | 36 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f30f.c | 62 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 30 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 37 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 8 |
7 files changed, 269 insertions, 12 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index a540f714..4aee3a5d 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -139,6 +139,22 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; FAKEED; break; + case 0x28: + INST_NAME("MOVAPS Gx,Ex"); + nextop = F8; + GETG; + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v1 = sse_get_reg(dyn, ninst, x1, ed, 0); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VOR_V(v0, v1, v1); + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + VLD(v0, ed, fixedaddress); + } + break; case 0x29: INST_NAME("MOVAPS Ex,Gx"); nextop = F8; @@ -154,6 +170,47 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMWRITE2(); } break; + case 0x2E: + // no special check... + case 0x2F: + if (opcode == 0x2F) { + INST_NAME("COMISS Gx, Ex"); + } else { + INST_NAME("UCOMISS Gx, Ex"); + } + SETFLAGS(X_ALL, SF_SET); + SET_DFNONE(); + nextop = F8; + GETGX(d0, 0); + GETEXSS(v0, 0, 0); + CLEAR_FLAGS(x2); + // if isnan(d0) || isnan(v0) + IFX (X_ZF | X_PF | X_CF) { + FCMP_S(fcc0, d0, v0, cUN); + BCEQZ_MARK(fcc0); + ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); + B_MARK3_nocond; + } + MARK; + // else if isless(d0, v0) + IFX (X_CF) { + FCMP_S(fcc1, d0, v0, cLT); + BCEQZ_MARK2(fcc1); + ORI(xFlags, xFlags, 1 << F_CF); + B_MARK3_nocond; + } + MARK2; + // else if d0 == v0 + IFX (X_ZF) { + FCMP_S(fcc2, d0, v0, cEQ); + BCEQZ_MARK3(fcc2); + ORI(xFlags, xFlags, 1 << F_ZF); + } + MARK3; + IFX (X_ALL) { + SPILL_EFLAGS(); + } + break; #define GO(GETFLAGS, NO, YES, F, I) \ READFLAGS(F); \ if (la64_lbt) { \ @@ -511,10 +568,14 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(v0, 1); GETEX(v1, 0, 1); u8 = F8; - if (v0 != v1) { - VEXTRINS_D(v0, v1, 0x11); // v0[127:64] = v1[127:64] + if (v0 == v1) { + VSHUF4I_W(v0, v0, u8); + } else { + q1 = fpu_get_scratch(dyn); + VSHUF4I_W(v0, v0, u8); + VSHUF4I_W(q1, v1, u8); + VEXTRINS_D(v0, q1, 0x11); // v0[127:64] = q1[127:64] } - VSHUF4I_W(v0, v0, u8); break; case 0xC8: case 0xC9: diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 474bbfc2..23ad1fc6 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -56,6 +56,22 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; FAKEED; break; + case 0x28: + INST_NAME("MOVAPD Gx,Ex"); + nextop = F8; + GETG; + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v1 = sse_get_reg(dyn, ninst, x1, ed, 0); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VOR_V(v0, v1, v1); + } else { + SMREAD(); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + VLD(v0, ed, fixedaddress); + } + break; case 0x61: INST_NAME("PUNPCKLWD Gx,Ex"); nextop = F8; @@ -121,6 +137,31 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VLD(v0, ed, fixedaddress); } break; + case 0x7E: + INST_NAME("MOVD Ed,Gx"); + nextop = F8; + GETGX(v0, 0); + if (rex.w) { + if (MODREG) { + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + MOVFR2GR_D(ed, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + FST_D(v0, ed, fixedaddress); + SMWRITE2(); + } + } else { + if (MODREG) { + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + MOVFR2GR_S(ed, v0); + ZEROUP(ed); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + FST_S(v0, ed, fixedaddress); + SMWRITE2(); + } + } + break; case 0xBE: INST_NAME("MOVSX Gw, Eb"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c index 25e523cb..74f76744 100644 --- a/src/dynarec/la64/dynarec_la64_f20f.c +++ b/src/dynarec/la64/dynarec_la64_f20f.c @@ -80,41 +80,69 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x58: INST_NAME("ADDSD Gx, Ex"); nextop = F8; - // TODO: fastnan handling GETGX(v0, 1); GETEXSD(v1, 0); d0 = fpu_get_scratch(dyn); FADD_D(d0, v0, v1); + if (!box64_dynarec_fastnan) { + FCMP_D(fcc0, v0, v1, cUN); + BCNEZ_MARK(fcc0); + FCMP_D(fcc1, d0, d0, cOR); + BCNEZ_MARK(fcc1); + FNEG_D(d0, d0); + } + MARK; VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0] break; case 0x59: INST_NAME("MULSD Gx, Ex"); nextop = F8; - // TODO: fastnan handling GETGX(v0, 1); GETEXSD(v1, 0); d0 = fpu_get_scratch(dyn); FMUL_D(d0, v0, v1); + if (!box64_dynarec_fastnan) { + FCMP_D(fcc0, v0, v1, cUN); + BCNEZ_MARK(fcc0); + FCMP_D(fcc1, d0, d0, cOR); + BCNEZ_MARK(fcc1); + FNEG_D(d0, d0); + } + MARK; VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0] break; case 0x5C: INST_NAME("SUBSD Gx, Ex"); nextop = F8; - // TODO: fastnan handling GETGX(v0, 1); GETEXSD(v1, 0); d0 = fpu_get_scratch(dyn); FSUB_D(d0, v0, v1); + if (!box64_dynarec_fastnan) { + FCMP_D(fcc0, v0, v1, cUN); + BCNEZ_MARK(fcc0); + FCMP_D(fcc1, d0, d0, cOR); + BCNEZ_MARK(fcc1); + FNEG_D(d0, d0); + } + MARK; VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0] break; case 0x5E: INST_NAME("DIVSD Gx, Ex"); nextop = F8; - // TODO: fastnan handling GETGX(v0, 1); GETEXSD(v1, 0); d0 = fpu_get_scratch(dyn); FDIV_D(d0, v0, v1); + if (!box64_dynarec_fastnan) { + FCMP_D(fcc0, v0, v1, cUN); + BCNEZ_MARK(fcc0); + FCMP_D(fcc1, d0, d0, cOR); + BCNEZ_MARK(fcc1); + FNEG_D(d0, d0); + } + MARK; VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0] break; default: diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c index 47670a31..496fb001 100644 --- a/src/dynarec/la64/dynarec_la64_f30f.c +++ b/src/dynarec/la64/dynarec_la64_f30f.c @@ -8,6 +8,7 @@ #include "dynarec.h" #include "emu/x64emu_private.h" #include "emu/x64run_private.h" +#include "la64_emitter.h" #include "x64run.h" #include "x64emu.h" #include "box64stack.h" @@ -107,6 +108,30 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int FMUL_S(d1, v0, d0); VEXTRINS_W(v0, d1, 0); break; + case 0x5A: + INST_NAME("CVTSS2SD Gx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETEXSS(v1, 0, 0); + d1 = fpu_get_scratch(dyn); + FCVT_D_S(d1, v1); + VEXTRINS_D(v0, d1, 0); + break; + case 0x5D: + INST_NAME("MINSS Gx, Ex"); + nextop = F8; + GETGX(d0, 1); + GETEXSS(d1, 0, 0); + FCMP_S(fcc0, d0, d1, cUN); + BCNEZ_MARK(fcc0); + FCMP_S(fcc1, d1, d0, cLT); + BCEQZ_MARK2(fcc1); + MARK; + v1 = fpu_get_scratch(dyn); + FMOV_S(v1, d1); + VEXTRINS_W(d0, v1, 0); + MARK2; + break; case 0x5E: INST_NAME("DIVSS Gx, Ex"); nextop = F8; @@ -116,6 +141,21 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int FDIV_S(d1, v0, d0); VEXTRINS_W(v0, d1, 0); break; + case 0x5F: + INST_NAME("MAXSS Gx, Ex"); + nextop = F8; + GETGX(d0, 1); + GETEXSS(d1, 0, 0); + FCMP_S(fcc0, d0, d1, cUN); + BCNEZ_MARK(fcc0); + FCMP_S(fcc1, d0, d1, cLT); + BCEQZ_MARK2(fcc1); + MARK; + v1 = fpu_get_scratch(dyn); + FMOV_S(v1, d1); + VEXTRINS_W(d0, v1, 0); + MARK2; + break; case 0x6F: INST_NAME("MOVDQU Gx, Ex"); nextop = F8; @@ -145,6 +185,28 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } VEXTRINS_D(v0, v1, 0); // v0[63:0] = v1[63:0] break; + case 0xC2: + INST_NAME("CMPSS Gx, Ex, Ib"); + nextop = F8; + GETGX(v0, 1); + GETEXSS(v1, 0, 1); + u8 = F8; + switch (u8 & 7) { + case 0: FCMP_S(fcc0, v0, v1, cEQ); break; // Equal + case 1: FCMP_S(fcc0, v0, v1, cLT); break; // Less than + case 2: FCMP_S(fcc0, v0, v1, cLE); break; // Less or equal + case 3: FCMP_S(fcc0, v0, v1, cUN); break; // NaN + case 4: FCMP_S(fcc0, v0, v1, cUNE); break; // Not Equal or unordered + case 5: FCMP_S(fcc0, v1, v0, cULE); break; // Greater or equal or unordered + case 6: FCMP_S(fcc0, v1, v0, cULT); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + case 7: FCMP_S(fcc0, v0, v1, cOR); break; // not NaN + } + MOVCF2GR(x2, fcc0); + NEG_D(x2, x2); + q1 = fpu_get_scratch(dyn); + MOVGR2FR_W(q1, x2); + VEXTRINS_W(v0, q1, 0); + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index d6b9629a..a7d55077 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -364,6 +364,10 @@ j64 = GET##M - dyn->native_size; \ B##OP##Z(reg, j64) +#define BCxxZ_gen(OP, M, fcc) \ + j64 = GET##M - dyn->native_size; \ + BC##OP##Z(fcc, j64) + // Branch to MARK if reg1!=reg2 (use j64) #define BNE_MARK(reg1, reg2) Bxx_gen(NE, MARK, reg1, reg2) // Branch to MARK2 if reg1!=reg2 (use j64) @@ -393,6 +397,24 @@ // Branch to MARKLOCK if reg1!=0 (use j64) #define BNEZ_MARKLOCK(reg) BxxZ_gen(NE, MARKLOCK, reg) +// Branch to MARK if fcc!=0 (use j64) +#define BCNEZ_MARK(fcc) BCxxZ_gen(NE, MARK, fcc) +// Branch to MARK2 if fcc!=0 (use j64) +#define BCNEZ_MARK2(fcc) BCxxZ_gen(NE, MARK2, fcc) +// Branch to MARK3 if fcc!=0 (use j64) +#define BCNEZ_MARK3(fcc) BCxxZ_gen(NE, MARK3, fcc) +// Branch to MARKLOCK if fcc!=0 (use j64) +#define BCNEZ_MARKLOCK(fcc) BxxZ_gen(NE, MARKLOCK, fcc) + +// Branch to MARK if fcc==0 (use j64) +#define BCEQZ_MARK(fcc) BCxxZ_gen(EQ, MARK, fcc) +// Branch to MARK2 if fcc==0 (use j64) +#define BCEQZ_MARK2(fcc) BCxxZ_gen(EQ, MARK2, fcc) +// Branch to MARK3 if fcc==0 (use j64) +#define BCEQZ_MARK3(fcc) BCxxZ_gen(EQ, MARK3, fcc) +// Branch to MARKLOCK if fcc==0 (use j64) +#define BCEQZ_MARKLOCK(fcc) BxxZ_gen(EQ, MARKLOCK, fcc) + // Branch to MARK if reg1<reg2 (use j64) #define BLT_MARK(reg1, reg2) Bxx_gen(LT, MARK, reg1, reg2) // Branch to MARK if reg1<reg2 (use j64) @@ -418,6 +440,14 @@ #define B_NEXT_nocond \ j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ B(j64) +// Branch to NEXT if fcc==0 (use j64) +#define CBCZ_NEXT(fcc) \ + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ + BCEQZ(fcc, j64) +// Branch to NEXT if fcc!=0 (use j64) +#define CBCNZ_NEXT(fcc) \ + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ + BCNEZ(fcc, j64) // Branch to NEXT if reg1==reg2 (use j64) #define BEQ_NEXT(reg1, reg2) \ diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 7860734a..778e1a83 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -109,6 +109,38 @@ f24-f31 fs0-fs7 Static registers Callee #define wZR xZR #define r0 xZR +#define fcc0 0 +#define fcc1 1 +#define fcc2 2 +#define fcc3 3 +#define fcc4 4 +#define fcc5 5 +#define fcc6 6 +#define fcc7 7 + +#define cAF 0x0 +#define cUN 0x8 +#define cEQ 0x4 +#define cUEQ 0xC +#define cLT 0x2 +#define cULT 0xA +#define cLE 0x6 +#define cULE 0xE +#define cNE 0x10 +#define cOR 0x14 +#define cUNE 0x18 +#define sAF 0x1 +#define sUN 0x9 +#define sEQ 0x5 +#define sUEQ 0xD +#define sLT 0x3 +#define sULT 0xB +#define sLE 0x7 +#define sULE 0xF +#define sNE 0x11 +#define sOR 0x15 +#define sUNE 0x19 + // split a 32bits value in 20bits + 12bits, adjust the upper part is 12bits is negative #define SPLIT20(A) (((A) + 0x800) >> 12) #define SPLIT12(A) ((A) & 0xfff) @@ -508,7 +540,10 @@ f24-f31 fs0-fs7 Static registers Callee #define BEQZ(rj, imm23) EMIT(type_1RI21(0b010000, ((imm23)>>2), rj)) // if GR[rj] != 0: // PC = PC + SignExtend({imm21, 2'b0}, GRLEN) -#define BNEZ(rj, imm23) EMIT(type_1RI21(0b010001, ((imm23)>>2), rj)) +#define BNEZ(rj, imm23) EMIT(type_1RI21(0b010001, ((imm23) >> 2), rj)) + +#define BCEQZ(cj, imm23) EMIT(type_1RI21(0b010010, ((imm23)>>2), 0b00000 | cj)) +#define BCNEZ(cj, imm23) EMIT(type_1RI21(0b010010, ((imm23)>>2), 0b01000 | cj)) // GR[rd] = PC + 4 // PC = GR[rj] + SignExtend({imm16, 2'b0}, GRLEN) diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 3d4cf991..247653b4 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -2141,19 +2141,19 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) return buff; } if (isMask(opcode, "01110011100000iiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.D", Vt[Rd], Vt[Rj], signExtend(imm, 8)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VEXTRINS.D", Vt[Rd], Vt[Rj], imm); return buff; } if (isMask(opcode, "01110011100001iiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.W", Vt[Rd], Vt[Rj], signExtend(imm, 8)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VEXTRINS.W", Vt[Rd], Vt[Rj], imm); return buff; } if (isMask(opcode, "01110011100010iiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.H", Vt[Rd], Vt[Rj], signExtend(imm, 8)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VEXTRINS.H", Vt[Rd], Vt[Rj], imm); return buff; } if (isMask(opcode, "01110011100011iiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.B", Vt[Rd], Vt[Rj], signExtend(imm, 8)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VEXTRINS.B", Vt[Rd], Vt[Rj], imm); return buff; } if (isMask(opcode, "0010110000iiiiiiiiiiiijjjjjddddd", &a)) { |