From 85f6d8308a0dccda7880fd973f0d3c0bc61d4a31 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 26 Mar 2023 12:28:53 +0000 Subject: [RV64_DYNAREC] Added a bunch of opcodes, plus some improvments/fixes to SSE macros --- src/dynarec/rv64/dynarec_rv64_00.c | 59 +++++++++++++++++++++++++++++- src/dynarec/rv64/dynarec_rv64_0f.c | 43 +++++++++++++++++++++- src/dynarec/rv64/dynarec_rv64_660f.c | 18 +++------ src/dynarec/rv64/dynarec_rv64_emit_math.c | 55 ++++++++++++++++++++++++++++ src/dynarec/rv64/dynarec_rv64_emit_shift.c | 54 +++++++++++++++++++++++++++ src/dynarec/rv64/dynarec_rv64_f30f.c | 17 +++++++++ src/dynarec/rv64/dynarec_rv64_helper.h | 21 +++++++++-- 7 files changed, 248 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 4c7d0247..3d393fdc 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -751,7 +751,47 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMWRITELOCK(lock); } break; - + case 0x8A: + INST_NAME("MOV Gb, Eb"); + nextop = F8; + gd = ((nextop&0x38)>>3)+(rex.r<<3); + if(rex.rex) { + gb2 = 0; + gb1 = xRAX + gd; + } else { + gb2 = ((gd&4)>>2); + gb1 = xRAX+(gd&3); + } + gd = x4; + if(MODREG) { + ed = (nextop&7) + (rex.b<<3); + if(rex.rex) { + eb1 = xRAX+ed; + eb2 = 0; + } else { + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = ((ed&4)>>2); // L or H + } + if(eb2) { + SRLI(x1, eb1, 8); + ANDI(x1, x1, 0xff); + } else { + ANDI(x1, eb1, 0xff); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); + SMREADLOCK(lock); + LB(x1, ed, fixedaddress); + } + if(gb2) { + MOV64x(x4, ~0xff00); + AND(gb1, gb1, x4); + SLLI(x1, x1, 8); + } else { + ANDI(gb1, gb1, ~0xff); + } + OR(gb1, gb1, x1); + break; case 0x8B: INST_NAME("MOV Gd, Ed"); nextop=F8; @@ -1312,6 +1352,16 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_shl32(dyn, ninst, rex, ed, x3, x5, x4, x6); WBACK; break; + case 5: + INST_NAME("SHR Ed, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + ANDI(x3, xRCX, rex.w?0x3f:0x1f); + GETED(0); + if(!rex.w && MODREG) {ZEROUP(ed);} + CBZ_NEXT(x3); + emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); + WBACK; + break; case 7: INST_NAME("SAR Ed, CL"); SETFLAGS(X_ALL, SF_PENDING); @@ -1511,6 +1561,13 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni NOT(x1, x1); EBBACK(x5, 1); break; + case 3: + INST_NAME("NEG Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 0); + emit_neg8(dyn, ninst, x1, x2, x4); + EBBACK(x5, 0); + break; case 4: INST_NAME("MUL AL, Ed"); SETFLAGS(X_ALL, SF_PENDING); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index a2449465..543ffae8 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -31,7 +31,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uint8_t opcode = F8; uint8_t nextop, u8; uint8_t gd, ed; - uint8_t wback, wb2; + uint8_t wback, wb2, gback; uint8_t eb1, eb2; int32_t i32, i32_; int cacheupd = 0; @@ -44,6 +44,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni int64_t fixedaddress; int unscaled; MAYUSE(wb2); + MAYUSE(gback); MAYUSE(eb1); MAYUSE(eb2); MAYUSE(q0); @@ -111,6 +112,30 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; + case 0x10: + INST_NAME("MOVUPS Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + LD(x3, wback, fixedaddress+0); + LD(x4, wback, fixedaddress+8); + SD(x3, gback, 0); + SD(x4, gback, 8); + break; + case 0x11: + INST_NAME("MOVUPS Ex,Gx"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + LD(x3, gback, 0); + LD(x4, gback, 8); + SD(x3, wback, fixedaddress+0); + SD(x4, wback, fixedaddress+8); + if(!MODREG) + SMWRITE2(); + break; + + case 0x18: nextop = F8; if((nextop&0xC0)==0xC0) { @@ -169,6 +194,22 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GOCOND(0x40, "CMOV", "Gd, Ed"); #undef GO + case 0x57: + INST_NAME("XORPS"); + nextop = F8; + //TODO: it might be possible to check if SS or SD are used and not purge them to optimize a bit + GETGX(x1); + if(MODREG && gd==(nextop&7)+(rex.b<<3)) + { + // just zero dest + SD(xZR, x1, 0); + SD(xZR, x1, 8); + } else { + GETEX(x2, 0); + SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + } + break; + case 0x77: INST_NAME("EMMS"); // empty MMX, FPU now usable diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 03e44fc8..5299f162 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -30,7 +30,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int uint8_t nextop, u8; int32_t i32; uint8_t gd, ed; - uint8_t wback, wb1, wb2; + uint8_t wback, wb1, wb2, gback; uint8_t eb1, eb2; int64_t j64; uint64_t tmp64u, tmp64u2; @@ -126,22 +126,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PXOR Gx, Ex"); nextop = F8; GETGX(x1); - GETEX(x2, 0); - if(gd==ed) { + if(MODREG && gd==(nextop&7)+(rex.b<<3)) + { // just zero dest SD(xZR, x1, 0); SD(xZR, x1, 8); } else { - //1st - LD(x3, x1, 0); - LD(x4, x2, 0); - XOR(x3, x3, x4); - SD(x3, x1, 0); - // 2nd - LD(x3, x1, 8); - LD(x4, x2, 8); - XOR(x3, x3, x4); - SD(x3, x1, 8); + GETEX(x2, 0); + SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); } break; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index bd2dbb96..e3125f31 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -759,6 +759,61 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } } +// emit NEG8 instruction, from s1, store result in s1 using s2 and s3 as scratch +void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) +{ + CLEAR_FLAGS(); + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_neg8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_AF | X_OF) { + MV(s3, s1); // s3 = op1 + } + + NEG(s1, s1); + ANDI(s1, s1, 0xff); + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, res)); + } + + IFX(X_CF) { + BEQZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + + IFX(X_AF | X_OF) { + OR(s3, s1, s3); // s3 = res | op1 + IFX(X_AF) { + /* af = bc & 0x8 */ + ANDI(s2, s3, 8); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) { + /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ + SRLI(s2, s3, 6); + SRLI(s3, s2, 1); + XOR(s2, s2, s3); + ANDI(s2, s2, 1); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_SF) { + ANDI(s3, s1, 1 << F_SF); // 1<>3)+(rex.r<<3); \ sse_forget_reg(dyn, ninst, gd); \ + gback = a; \ ADDI(a, xEmu, offsetof(x64emu_t, xmm[gd])) -// Get Ex address in regenal register a, will purge SS or SD or it's reg and is loaded. May use x3 +// Get Ex address in regenal register a, will purge SS or SD or it's reg and is loaded. May use x3. Use wback as load adress! #define GETEX(a, D) \ if(MODREG) { \ ed = (nextop&7)+(rex.b<<3); \ sse_forget_reg(dyn, ninst, ed); \ fixedaddress = 0; \ ADDI(a, xEmu, offsetof(x64emu_t, xmm[ed])); \ + wback = a; \ } else { \ SMREAD(); \ ed=16; \ addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ } +// Loop for SSE opcode that use 64bits value and write to GX. +#define SSE_LOOP_Q(GX1, EX1, F) \ + LD(GX1, gback, 0); \ + LD(EX1, wback, fixedaddress+0); \ + F; \ + SD(GX1, gback, 0); \ + LD(GX1, gback, 8); \ + LD(EX1, wback, fixedaddress+8); \ + F; \ + SD(GX1, gback, 8) + // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 #define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0) @@ -830,10 +843,10 @@ void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s //void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3); //void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); -//void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); -//void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); //void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); -- cgit 1.4.1