From dbd408c6d155e892f340b2388dc8883dcbc01e60 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Thu, 11 May 2023 17:12:18 +0800 Subject: [RV64_DYNAREC] Added more opcodes and some fixes (#778) * Added 64 80 opcodes and some fixes * Added 80 /2 ADC opcode * Added 0F 6E MOVD opcode * Added 0F 61 PUNPCKLWD opcode * Added 0F 62 PUNPCKLDQ opcode * Added some MMX infra --- src/dynarec/rv64/dynarec_rv64_00_2.c | 9 ++++ src/dynarec/rv64/dynarec_rv64_0f.c | 41 +++++++++++++++ src/dynarec/rv64/dynarec_rv64_64.c | 85 +++++++++++++++++++++++++++++++ src/dynarec/rv64/dynarec_rv64_emit_math.c | 78 ++++++++++++++++++++++++++++ src/dynarec/rv64/dynarec_rv64_helper.c | 8 +++ src/dynarec/rv64/dynarec_rv64_helper.h | 63 ++++++++++++++++++++--- 6 files changed, 277 insertions(+), 7 deletions(-) diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index dbc39039..427ed4a9 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -72,6 +72,15 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); EBBACK(x5, 0); break; + case 2: // ADC + INST_NAME("ADC Eb, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = F8; + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5, x6); + EBBACK(x5, 0); + break; case 3: // SBB INST_NAME("SBB Eb, Ib"); READFLAGS(X_CF); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 1eab5d24..ac0c6fc0 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -585,6 +585,47 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } } break; + case 0x61: + INST_NAME("PUNPCKLWD Gm, Em"); + nextop = F8; + GETGM(x1); + GETEM(x2, 0); + // GM->uw[3] = EM->uw[1]; + LHU(x3, wback, fixedaddress+2*1); + SH(x3, gback, 2*3); + // GM->uw[2] = GM->uw[1]; + LHU(x3, gback, 2*1); + SH(x3, gback, 2*2); + // GM->uw[1] = EM->uw[0]; + LHU(x3, wback, fixedaddress+2*0); + SH(x3, gback, 2*1); + break; + case 0x62: + INST_NAME("PUNPCKLDQ Gm, Em"); + nextop = F8; + GETGM(x1); + GETEM(x2, 0); + // GM->ud[1] = EM->ud[0]; + LWU(x3, wback, fixedaddress); + SW(x3, gback, 4*1); + break; + case 0x6E: + INST_NAME("MOVD Gm, Ed"); + nextop = F8; + GETGM(x1); + if(MODREG) { + ed = xRAX + (nextop&7) + (rex.b<<3); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, 0); + if(rex.w) { + LD(x4, ed, fixedaddress); + } else { + LW(x4, ed, fixedaddress); + } + ed = x4; + } + if(rex.w) SD(ed, gback, 0); else SW(ed, gback, 0); + break; case 0x77: INST_NAME("EMMS"); // empty MMX, FPU now usable diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c index dde8c972..7155ca00 100644 --- a/src/dynarec/rv64/dynarec_rv64_64.c +++ b/src/dynarec/rv64/dynarec_rv64_64.c @@ -96,6 +96,91 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x66: addr = dynarec64_6664(dyn, addr, ip, ninst, rex, seg, ok, need_epilog); break; + case 0x80: + nextop = F8; + switch((nextop>>3)&7) { + case 0: // ADD + INST_NAME("ADD Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_add8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK(x5, 0); + break; + case 1: // OR + INST_NAME("OR Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); + EBBACK(x5, 0); + break; + case 2: // ADC + INST_NAME("ADC Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5, x6); + EBBACK(x5, 0); + break; + case 3: // SBB + INST_NAME("SBB Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5, x6); + EBBACK(x5, 0); + break; + case 4: // AND + INST_NAME("AND Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_and8c(dyn, ninst, x1, u8, x2, x4); + EBBACK(x5, 0); + break; + case 5: // SUB + INST_NAME("SUB Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6); + EBBACK(x5, 0); + break; + case 6: // XOR + INST_NAME("XOR Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + emit_xor8c(dyn, ninst, x1, u8, x2, x4); + EBBACK(x5, 0); + break; + case 7: // CMP + INST_NAME("CMP Eb, Ib"); + grab_segdata(dyn, addr, ninst, x1, seg); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEBO(x1, 1); + u8 = F8; + if(u8) { + MOV32w(x2, u8); + emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5, x6); + } else { + emit_cmp8_0(dyn, ninst, x1, x3, x4); + } + break; + default: + DEFAULT; + } + break; case 0x81: case 0x83: nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index 6d7f3b25..1942a34b 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -806,6 +806,9 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + IFX(X_ALL) { + ANDI(xFlags, xFlags, ~((1UL<e.mmxcache[a], xEmu, offsetof(x64emu_t, mmx[a])); + fpu_free_reg(dyn, dyn->e.mmxcache[a]); + return; +} + // get neon register for a MMX reg, create the entry if needed int mmx_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a) { diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index fa0e0808..750a4ec1 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -212,6 +212,28 @@ wb1 = 1; \ ed = i; \ } +//GETEBO will use i for ed, i is also Offset, and can use r3 for wback. +#define GETEBO(i, D) if(MODREG) { \ + if(rex.rex) { \ + wback = xRAX+(nextop&7)+(rex.b<<3); \ + wb2 = 0; \ + } else { \ + wback = (nextop&7); \ + wb2 = (wback>>2)*8; \ + wback = xRAX+(wback&3); \ + } \ + if (wb2) {MV(i, wback); SRLI(i, i, wb2); ANDI(i, i, 0xff);} else {ANDI(i, wback, 0xff);} \ + wb1 = 0; \ + ed = i; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ + ADD(x3, wback, i); \ + if(wback!=x3) wback = x3; \ + LBU(i, wback, fixedaddress);\ + wb1 = 1; \ + ed = i; \ + } //GETSEB sign extend EB, will use i for ed, and can use r3 for wback. #define GETSEB(i, D) if(MODREG) { \ if(rex.rex) { \ @@ -269,7 +291,6 @@ // Write gb (gd) back to original register / memory, using s1 as scratch #define GBBACK(s1) if(gb2) { \ - assert(gb2 == 8); \ MOV64x(s1, 0xffffffffffff00ffLL); \ AND(gb1, gb1, s1); \ SLLI(s1, gd, 8); \ @@ -284,7 +305,6 @@ SB(ed, wback, fixedaddress); \ SMWRITE(); \ } else if(wb2) { \ - assert(wb2 == 8); \ MOV64x(s1, 0xffffffffffff00ffLL); \ AND(wback, wback, s1); \ if (c) {ANDI(ed, ed, 0xff);} \ @@ -370,6 +390,26 @@ addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ } +#define GETGM(a) \ + gd = ((nextop&0x38)>>3); \ + mmx_forget_reg(dyn, ninst, gd); \ + gback = a; \ + ADDI(a, xEmu, offsetof(x64emu_t, mmx[gd])) + +// Get EM, might use x3 +#define GETEM(a, D) \ + if(MODREG) { \ + ed = (nextop&7)+(rex.b<<3); \ + mmx_forget_reg(dyn, ninst, ed); \ + fixedaddress = 0; \ + ADDI(a, xEmu, offsetof(x64emu_t, mmx[ed])); \ + wback = a; \ + } else { \ + SMREAD(); \ + ed=8; \ + addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ + } + #define SSE_LOOP_D_ITEM(GX1, EX1, F, i) \ LWU(GX1, gback, i*4); \ LWU(EX1, wback, fixedaddress+i*4); \ @@ -906,6 +946,7 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr); #define sse_setround STEPNAME(sse_setround) #define mmx_get_reg STEPNAME(mmx_get_reg) #define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty) +#define mmx_forget_reg STEPNAME(mmx_forget_reg) #define sse_get_reg STEPNAME(sse_get_reg) #define sse_get_reg_empty STEPNAME(sse_get_reg_empty) #define sse_forget_reg STEPNAME(sse_forget_reg) @@ -995,8 +1036,8 @@ void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); //void emit_adc32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -//void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); +void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6); void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); //void emit_adc16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); @@ -1090,12 +1131,20 @@ int extcache_st_coherency(dynarec_rv64_t* dyn, int ninst, int a, int b); #define X87_ST(A) extcache_get_st(dyn, ninst, A) #endif +//MMX helpers +// get float register for a MMX reg, create the entry if needed +int mmx_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); +// get float register for a MMX reg, but don't try to synch it if it needed to be created +int mmx_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); +// forget float register for a MMX reg, create the entry if needed +void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int a); + //SSE/SSE2 helpers -// get neon register for a SSE reg, create the entry if needed +// get float register for a SSE reg, create the entry if needed int sse_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single); -// get neon register for a SSE reg, but don't try to synch it if it needed to be created +// get float register for a SSE reg, but don't try to synch it if it needed to be created int sse_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single); -// forget neon register for a SSE reg, create the entry if needed +// forget float register for a SSE reg, create the entry if needed void sse_forget_reg(dynarec_rv64_t* dyn, int ninst, int a); // purge the XMM0..XMM7 cache (before function call) void sse_purge07cache(dynarec_rv64_t* dyn, int ninst, int s1); -- cgit 1.4.1