From cddea1e4c199147f6f85c8e2779d0412e57f5d7e Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Mon, 17 Apr 2023 21:44:45 +0800 Subject: [RV64_DYNAREC] Added more opcodes (#706) * Added 0F 5E DIVPS opcoe * Added 0F 55 ANDNPS opcode * Added 0F 59 MULPS opcode * Added 66 A9 TEST opcode * Added 66 0F F4 PMULUDQ opcode * Added 0F 5C SUBPS opcode * Added 0F 17 MOVHPS opcode * Added 66 F7 /3 NEG opcode --- src/dynarec/rv64/dynarec_rv64_0f.c | 62 +++++++++++++++++++++++++++++++ src/dynarec/rv64/dynarec_rv64_66.c | 17 ++++++++- src/dynarec/rv64/dynarec_rv64_660f.c | 16 ++++++++ src/dynarec/rv64/dynarec_rv64_emit_math.c | 57 ++++++++++++++++++++++++++++ src/dynarec/rv64/dynarec_rv64_helper.h | 2 +- 5 files changed, 152 insertions(+), 2 deletions(-) diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 21895f48..86d1eab7 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -190,6 +190,16 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LD(x4, wback, fixedaddress+0); SD(x4, gback, 8); break; + case 0x17: + nextop = F8; + INST_NAME("MOVHPS Ex,Gx"); + GETGX(x1); + GETEX(x2, 0); + LD(x4, gback, 8); + SD(x4, wback, fixedaddress+0); + if(!MODREG) + SMWRITE2(); + break; case 0x18: nextop = F8; if((nextop&0xC0)==0xC0) { @@ -332,6 +342,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SSE_LOOP_Q(x3, x4, AND(x3, x3, x4)); } break; + case 0x55: + INST_NAME("ANDNPS Gx, Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + SSE_LOOP_Q(x3, x4, NOT(x3, x3); AND(x3, x3, x4)); + break; case 0x56: INST_NAME("ORPS Gx, Ex"); nextop = F8; @@ -372,6 +389,21 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSW(s1, gback, i*4); } break; + case 0x59: + INST_NAME("MULPS Gx, Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + s0 = fpu_get_scratch(dyn); + s1 = fpu_get_scratch(dyn); + for(int i=0; i<4; ++i) { + // GX->f[i] *= EX->f[i]; + FLW(s0, wback, fixedaddress+i*4); + FLW(s1, gback, i*4); + FMULS(s1, s1, s0); + FSW(s1, gback, i*4); + } + break; case 0x5A: INST_NAME("CVTPS2PD Gx, Ex"); nextop = F8; @@ -398,6 +430,36 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSW(s0, gback, i*4); } break; + case 0x5C: + INST_NAME("SUBPS Gx, Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + s0 = fpu_get_scratch(dyn); + s1 = fpu_get_scratch(dyn); + for(int i=0; i<4; ++i) { + // GX->f[i] -= EX->f[i]; + FLW(s0, wback, fixedaddress+i*4); + FLW(s1, gback, i*4); + FSUBS(s1, s1, s0); + FSW(s1, gback, i*4); + } + break; + case 0x5E: + INST_NAME("DIVPS Gx, Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + s0 = fpu_get_scratch(dyn); + s1 = fpu_get_scratch(dyn); + for(int i=0; i<4; ++i) { + // GX->f[i] /= EX->f[i]; + FLW(s0, wback, fixedaddress+i*4); + FLW(s1, gback, i*4); + FDIVS(s1, s1, s0); + FSW(s1, gback, i*4); + } + break; case 0x77: INST_NAME("EMMS"); // empty MMX, FPU now usable diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index cecb4313..0bcebef5 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -414,7 +414,15 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni AND(x1, x1, x2); OR(xRAX, xRAX, x1); break; - + case 0xA9: + INST_NAME("TEST AX,Iw"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u16 = F16; + MOV32w(x2, u16); + SLLIW(x1, xRAX, 16); + SRLIW(x1, x1, 16); + emit_test16(dyn, ninst, x1, x2, x3, x4, x5); + break; case 0xB8: case 0xB9: case 0xBA: @@ -613,6 +621,13 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MOV32w(x2, u16); emit_test16(dyn, ninst, x1, x2, x3, x4, x5); break; + case 3: + INST_NAME("NEG Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, 0); + emit_neg16(dyn, ninst, ed, x2, x4); + EWBACK; + break; case 6: INST_NAME("DIV Ew"); SETFLAGS(X_ALL, SF_SET); diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 3cd22575..98836ecf 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -1042,6 +1042,22 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SD(x4, gback, 0); SD(x5, gback, 8); break; + case 0xF4: + INST_NAME("PMULUDQ Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + // GX->q[1] = (uint64_t)EX->ud[2]*GX->ud[2]; + LWU(x3, gback, 2*4); + LWU(x4, wback, fixedaddress+2*4); + MUL(x3, x3, x4); + SD(x3, gback, 8); + // GX->q[0] = (uint64_t)EX->ud[0]*GX->ud[0]; + LWU(x3, gback, 0*4); + LWU(x4, wback, fixedaddress+0*4); + MUL(x3, x3, x4); + SD(x3, gback, 0); + break; case 0xF8: INST_NAME("PSUBB Gx,Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index 3564d684..2f4138d6 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -816,6 +816,63 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } } +// emit NEG16 instruction, from s1, store result in s1 using s2 and s3 as scratch +void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) +{ + CLEAR_FLAGS(); + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_neg16); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_AF | X_OF) { + MV(s3, s1); // s3 = op1 + } + + NEG(s1, s1); + SLLI(s1, s1, 48); + SRLI(s1, s1, 48); + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, res)); + } + + IFX(X_CF) { + BEQZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + + IFX(X_AF | X_OF) { + OR(s3, s1, s3); // s3 = res | op1 + IFX(X_AF) { + /* af = bc & 0x8 */ + ANDI(s2, s3, 8); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) { + /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ + SRLI(s2, s3, 14); + SRLI(s3, s2, 1); + XOR(s2, s2, s3); + ANDI(s2, s2, 1); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_SF) { + ANDI(s3, s1, 1 << F_SF); // 1<