diff options
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_2.c | 9 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 41 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_64.c | 85 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_math.c | 78 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 8 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 63 |
6 files changed, 277 insertions, 7 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index dbc39039..427ed4a9 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -72,6 +72,15 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); EBBACK(x5, 0); break; + case 2: // ADC + INST_NAME("ADC Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5, x6); + EBBACK(x5, 0); + break; case 3: // SBB INST_NAME("SBB Eb, Ib"); READFLAGS(X_CF); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 1eab5d24..ac0c6fc0 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -585,6 +585,47 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } } break; + case 0x61: + INST_NAME("PUNPCKLWD Gm, Em"); + nextop = F8; + GETGM(x1); + GETEM(x2, 0); + // GM->uw[3] = EM->uw[1]; + LHU(x3, wback, fixedaddress+2*1); + SH(x3, gback, 2*3); + // GM->uw[2] = GM->uw[1]; + LHU(x3, gback, 2*1); + SH(x3, gback, 2*2); + // GM->uw[1] = EM->uw[0]; + LHU(x3, wback, fixedaddress+2*0); + SH(x3, gback, 2*1); + break; + case 0x62: + INST_NAME("PUNPCKLDQ Gm, Em"); + nextop = F8; + GETGM(x1); + GETEM(x2, 0); + // GM->ud[1] = EM->ud[0]; + LWU(x3, wback, fixedaddress); + SW(x3, gback, 4*1); + break; + case 0x6E: + INST_NAME("MOVD Gm, Ed"); + nextop = F8; + GETGM(x1); + if(MODREG) { + ed = xRAX + (nextop&7) + (rex.b<<3); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, 0); + if(rex.w) { + LD(x4, ed, fixedaddress); + } else { + LW(x4, ed, fixedaddress); + } + ed = x4; + } + if(rex.w) SD(ed, gback, 0); else SW(ed, gback, 0); + break; case 0x77: INST_NAME("EMMS"); // empty MMX, FPU now usable diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c index dde8c972..7155ca00 100644 --- a/src/dynarec/rv64/dynarec_rv64_64.c +++ b/src/dynarec/rv64/dynarec_rv64_64.c @@ -96,6 +96,91 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x66: addr = dynarec64_6664(dyn, addr, ip, ninst, rex, seg, ok, need_epilog); break; + case 0x80: + nextop = F8; + switch((nextop>>3)&7) { + case 0: // ADD + INST_NAME("ADD Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_add8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK(x5, 0); + break; + case 1: // OR + INST_NAME("OR Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK(x5, 0); + break; + case 2: // ADC + INST_NAME("ADC Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5, x6); + EBBACK(x5, 0); + break; + case 3: // SBB + INST_NAME("SBB Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5, x6); + EBBACK(x5, 0); + break; + case 4: // AND + INST_NAME("AND Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_and8c(dyn, ninst, x1, u8, x2, x4); + EBBACK(x5, 0); + break; + case 5: // SUB + INST_NAME("SUB Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6); + EBBACK(x5, 0); + break; + case 6: // XOR + INST_NAME("XOR Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_xor8c(dyn, ninst, x1, u8, x2, x4); + EBBACK(x5, 0); + break; + case 7: // CMP + INST_NAME("CMP Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + if(u8) { + MOV32w(x2, u8); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5, x6); + } else { + emit_cmp8_0(dyn, ninst, x1, x3, x4); + } + break; + default: + DEFAULT; + } + break; case 0x81: case 0x83: nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index 6d7f3b25..1942a34b 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -806,6 +806,9 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + IFX(X_ALL) { + ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF))); + } IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_inc16); @@ -919,6 +922,7 @@ void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, // emit SBB8 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + CLEAR_FLAGS(); IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, op1)); SB(s2, xEmu, offsetof(x64emu_t, op2)); @@ -956,6 +960,78 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } } +// emit ADC8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + CLEAR_FLAGS(); + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SH(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_adc8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_AF | X_OF) { + OR(s4, s1, s2); // s3 = op1 | op2 + AND(s5, s1, s2); // s4 = op1 & op2 + } + + ADD(s1, s1, s2); + ANDI(s3, xFlags, 1 << F_CF); + ADD(s1, s1, s3); + + IFX(X_PEND) { + SW(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF | X_OF) { + if(rv64_zbb) { + ANDN(s3, s1, s4); // s3 = ~res & (op1 | op2) + } else { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s4); // s3 = ~res & (op1 | op2) + } + OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX(X_AF) { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) { + SRLI(s3, s3, 6); + SRLI(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_CF) { + SRLI(s3, s1, 8); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + + ANDI(s1, s1, 0xff); + + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_SF) { + SRLI(s3, s1, 7); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ADC8 instruction, from s1, const c, store result in s1 using s3, s4, s5 and s6 as scratch +void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6) { + MOV32w(s5, c&0xff); + emit_adc8(dyn, ninst, s1, s5, s3, s4, s6); +} + // emit SBB8 instruction, from s1, constant c, store result in s1 using s3, s4, s5 and s6 as scratch void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5, int s6) { @@ -966,6 +1042,7 @@ void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s3, int s4, i // emit SBB16 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + CLEAR_FLAGS(); IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, op1)); SH(s2, xEmu, offsetof(x64emu_t, op2)); @@ -1007,6 +1084,7 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, // emit SBB32 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { + CLEAR_FLAGS(); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 0ec243a6..d61c8001 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -1227,6 +1227,14 @@ static int isx87Empty(dynarec_rv64_t* dyn) return 1; } +// forget ext register for a MMX reg, does nothing if the regs is not loaded +void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int a) +{ + FSD(dyn->e.mmxcache[a], xEmu, offsetof(x64emu_t, mmx[a])); + fpu_free_reg(dyn, dyn->e.mmxcache[a]); + return; +} + // get neon register for a MMX reg, create the entry if needed int mmx_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a) { diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index fa0e0808..750a4ec1 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -212,6 +212,28 @@ wb1 = 1; \ ed = i; \ } +//GETEBO will use i for ed, i is also Offset, and can use r3 for wback. +#define GETEBO(i, D) if(MODREG) { \ + if(rex.rex) { \ + wback = xRAX+(nextop&7)+(rex.b<<3); \ + wb2 = 0; \ + } else { \ + wback = (nextop&7); \ + wb2 = (wback>>2)*8; \ + wback = xRAX+(wback&3); \ + } \ + if (wb2) {MV(i, wback); SRLI(i, i, wb2); ANDI(i, i, 0xff);} else {ANDI(i, wback, 0xff);} \ + wb1 = 0; \ + ed = i; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ + ADD(x3, wback, i); \ + if(wback!=x3) wback = x3; \ + LBU(i, wback, fixedaddress);\ + wb1 = 1; \ + ed = i; \ + } //GETSEB sign extend EB, will use i for ed, and can use r3 for wback. #define GETSEB(i, D) if(MODREG) { \ if(rex.rex) { \ @@ -269,7 +291,6 @@ // Write gb (gd) back to original register / memory, using s1 as scratch #define GBBACK(s1) if(gb2) { \ - assert(gb2 == 8); \ MOV64x(s1, 0xffffffffffff00ffLL); \ AND(gb1, gb1, s1); \ SLLI(s1, gd, 8); \ @@ -284,7 +305,6 @@ SB(ed, wback, fixedaddress); \ SMWRITE(); \ } else if(wb2) { \ - assert(wb2 == 8); \ MOV64x(s1, 0xffffffffffff00ffLL); \ AND(wback, wback, s1); \ if (c) {ANDI(ed, ed, 0xff);} \ @@ -370,6 +390,26 @@ addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ } +#define GETGM(a) \ + gd = ((nextop&0x38)>>3); \ + mmx_forget_reg(dyn, ninst, gd); \ + gback = a; \ + ADDI(a, xEmu, offsetof(x64emu_t, mmx[gd])) + +// Get EM, might use x3 +#define GETEM(a, D) \ + if(MODREG) { \ + ed = (nextop&7)+(rex.b<<3); \ + mmx_forget_reg(dyn, ninst, ed); \ + fixedaddress = 0; \ + ADDI(a, xEmu, offsetof(x64emu_t, mmx[ed])); \ + wback = a; \ + } else { \ + SMREAD(); \ + ed=8; \ + addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ + } + #define SSE_LOOP_D_ITEM(GX1, EX1, F, i) \ LWU(GX1, gback, i*4); \ LWU(EX1, wback, fixedaddress+i*4); \ @@ -906,6 +946,7 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr); #define sse_setround STEPNAME(sse_setround) #define mmx_get_reg STEPNAME(mmx_get_reg) #define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty) +#define mmx_forget_reg STEPNAME(mmx_forget_reg) #define sse_get_reg STEPNAME(sse_get_reg) #define sse_get_reg_empty STEPNAME(sse_get_reg_empty) #define sse_forget_reg STEPNAME(sse_forget_reg) @@ -995,8 +1036,8 @@ void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); //void emit_adc32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -//void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); +void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6); void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); //void emit_adc16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); @@ -1090,12 +1131,20 @@ int extcache_st_coherency(dynarec_rv64_t* dyn, int ninst, int a, int b); #define X87_ST(A) extcache_get_st(dyn, ninst, A) #endif +//MMX helpers +// get float register for a MMX reg, create the entry if needed +int mmx_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); +// get float register for a MMX reg, but don't try to synch it if it needed to be created +int mmx_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); +// forget float register for a MMX reg, create the entry if needed +void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int a); + //SSE/SSE2 helpers -// get neon register for a SSE reg, create the entry if needed +// get float register for a SSE reg, create the entry if needed int sse_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single); -// get neon register for a SSE reg, but don't try to synch it if it needed to be created +// get float register for a SSE reg, but don't try to synch it if it needed to be created int sse_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single); -// forget neon register for a SSE reg, create the entry if needed +// forget float register for a SSE reg, create the entry if needed void sse_forget_reg(dynarec_rv64_t* dyn, int ninst, int a); // purge the XMM0..XMM7 cache (before function call) void sse_purge07cache(dynarec_rv64_t* dyn, int ninst, int s1); |