diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-04-07 17:34:05 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-07 11:34:05 +0200 |
| commit | 149414f233495d53de799c4e078c09e0861fb73d (patch) | |
| tree | 259f7cecec23e785342aa6fd909d29b82d6b16f2 /src | |
| parent | 14695faa43b34bc4d81a8fc0d3756a2f7f60b398 (diff) | |
| download | box64-149414f233495d53de799c4e078c09e0861fb73d.tar.gz box64-149414f233495d53de799c4e078c09e0861fb73d.zip | |
[RV64_DYNAREC] Added more SSE opcodes for Stardew Valley (#672)
* [RV64_DYNAREC] Fixed 66 0F 73 /7 PSLLDQ opcode * [RV64_DYNAREC] Added 66 0F 6D PUNPCKHQDQ opcode * [RV64_DYNAREC] Added F2 0F 2C CVTTSD2SI opcode * [RV64_DYNAREC] Added 66 0F 7F MOVDQA opcode * [RV64_DYNAREC] Added 0F C6 SHUFPS opcode * [RV64_DYNAREC] Added 66 0F 72 /6 PSLLD opcode * [RV64_DYNAREC] Added 66 0F 74 PCMPEQB opcode * [RV64_DYNAREC] Added 66 0F FA PSUBD opcode * [RV64_DYNAREC] Added F2 0F 5D MINSD opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 21 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 55 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f.c | 23 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 9 |
4 files changed, 107 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 520b1817..ef988fd1 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -564,7 +564,28 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if(!rex.w) ZEROUP(gd); break; + case 0xC6: // TODO: Optimize this! + INST_NAME("SHUFPS Gx, Ex, Ib"); + nextop = F8; + GETGX(x1); + GETEX(x2, 1); + u8 = F8; + int32_t idx; + idx = (u8>>(0*2))&3; + LWU(x3, gback, idx*4); + idx = (u8>>(1*2))&3; + LWU(x4, gback, idx*4); + idx = (u8>>(2*2))&3; + LWU(x5, wback, fixedaddress+idx*4); + idx = (u8>>(3*2))&3; + LWU(x6, wback, fixedaddress+idx*4); + + SW(x3, gback, 0*4); + SW(x4, gback, 1*4); + SW(x5, gback, 2*4); + SW(x6, gback, 3*4); + break; default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index a5be39bf..246371f1 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -176,6 +176,16 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SD(x3, gback, 8); } break; + case 0x6D: + INST_NAME("PUNPCKHQDQ Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + LD(x3, gback, 8); + SD(x3, gback, 0); + LD(x3, wback, fixedaddress+8); + SD(x3, gback, 8); + break; case 0x6E: INST_NAME("MOVD Gx, Ed"); nextop = F8; @@ -206,7 +216,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(x1); GETEX(x2, 1); u8 = F8; - i32 = -1; int32_t idx; idx = (u8>>(0*2))&3; @@ -240,6 +249,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } } break; + case 6: + INST_NAME("PSLLD Ex, Ib"); + GETEX(x1, 1); + u8 = F8; + if(u8) { + if (u8>31) { + // just zero dest + SD(xZR, x1, fixedaddress+0); + SD(xZR, x1, fixedaddress+8); + } else if(u8) { + SSE_LOOP_DS(x3, SLLI(x3, x3, u8)); + } + } + break; default: DEFAULT; } @@ -302,10 +325,25 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SD(xZR, x1, fixedaddress+0); } } + break; default: DEFAULT; } break; + case 0x74: + INST_NAME("PCMPEQB Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + for (int i=0; i<16; ++i) { + LBU(x3, gback, i); + LBU(x4, wback, fixedaddress+i); + SUB(x3, x3, x4); + SEQZ(x3, x3); + NEG(x3, x3); + SB(x3, gback, i); + } + break; case 0x76: INST_NAME("PCMPEQD Gx,Ex"); nextop = F8; @@ -339,6 +377,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } } break; + case 0x7F: + INST_NAME("MOVDQA Ex,Gx"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + SSE_LOOP_MV_Q2(x3); + if(!MODREG) SMWRITE2(); + break; case 0xAF: INST_NAME("IMUL Gw,Ew"); SETFLAGS(X_ALL, SF_PENDING); @@ -422,6 +468,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); } break; + case 0xFA: + INST_NAME("PSUBD Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + SSE_LOOP_D(x3, x4, SUBW(x3, x3, x4)); + break; case 0xFD: INST_NAME("PADDW Gx,Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c index 683917d5..6cb62e3c 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f.c @@ -90,6 +90,14 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FCVTDW(v0, ed, RD_RNE); } break; + case 0x2C: + INST_NAME("CVTTSD2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEXSD(v0, 0); + // TODO: fastnan handling + FCVTLDxw(gd, v0, RD_RTZ); + break; case 0x38: // these are some more SSSE4.2+ opcodes opcode = F8; switch(opcode) { @@ -149,6 +157,21 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEXSD(v1, 0); FSUBD(v0, v0, v1); break; + case 0x5D: + INST_NAME("MINSD Gx, Ex"); + nextop = F8; + GETGXSD(v0); + GETEXSD(v1, 0); + FEQD(x2, v0, v0); + FEQD(x3, v1, v1); + AND(x2, x2, x3); + BEQ_MARK(x2, xZR); + FLTD(x2, v1, v0); + BEQ_MARK2(x2, xZR); + MARK; + FMVD(v0, v1); + MARK2; + break; case 0x5E: INST_NAME("DIVSD Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 73955b1c..af4cd430 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -456,6 +456,10 @@ f28–31 ft8–11 FP temporaries Caller #define FCVTSD(frd, frs1) EMIT(R_type(0b0100000, 0b00001, frs1, 0b000, frd, 0b1010011)) // Convert Single frs1 to Double frd #define FCVTDS(frd, frs1) EMIT(R_type(0b0100001, 0b00000, frs1, 0b000, frd, 0b1010011)) +// Convert from Double to signed 32bits +#define FCVTWD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000, frs1, rm, rd, 0b1010011)) +// Convert from Double to unsigned 32bits +#define FCVTWUD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00001, frs1, rm, rd, 0b1010011)) // store rs1 with rs2 sign bit to rd #define FSGNJD(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b000, rd, 0b1010011)) // move rs1 to rd @@ -497,4 +501,9 @@ f28–31 ft8–11 FP temporaries Caller // Convert from Double to unsigned 64bits #define FCVTLUD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00011, frs1, rm, rd, 0b1010011)) +// Convert from Double to signed integer +#define FCVTLDxw(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000+(rex.w?0b10:0b00), frs1, rm, rd, 0b1010011)) +// Convert from Double to unsigned integer +#define FCVTLUDxw(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00001+(rex.w?0b10:0b00), frs1, rm, rd, 0b1010011)) + #endif //__RV64_EMITTER_H__ |