diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-04-07 14:08:14 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-07 08:08:14 +0200 |
| commit | 1159c0530bba39bd293c146b313bfd5968c9efa0 (patch) | |
| tree | f2d33266e5ab6102d522433f929b21383c4bdd18 /src | |
| parent | 053ecec70bda076cfd4910a850bfbd8971fd7501 (diff) | |
| download | box64-1159c0530bba39bd293c146b313bfd5968c9efa0.tar.gz box64-1159c0530bba39bd293c146b313bfd5968c9efa0.zip | |
[RV64_DYNAREC] Added more opcodes for Stardew Valley (#671)
* [RV64_DYNAREC] Fixed PADD opcode * [RV64_DYNAREC] Added 66 0F 73 /7 PSLLDQ opcode * [RV64_DYNAREC] Added F2 0F 58 ADDSD opcode * [RV64_DYNAREC] Added F2 0F 59 MULSD opcode * [RV64_DYNAREC] Added F3 0F 6F,7F MOVDQU opcode * [RV64_DYNAREC] Fixed emit_shl32* flagg calculation * [RV64_DYNAREC] Fixed 8D LEA opcode * [RV64_DYNAREC] Fixed 66 0F FD PADDW opcode * [RV64_DYNAREC] Fixed PSHUFD opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 101 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 18 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f.c | 17 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 15 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 12 |
6 files changed, 113 insertions, 52 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 03843610..0dae504d 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -870,7 +870,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if(gd!=ed) { // it's sometimes used as a 3 bytes NOP MV(gd, ed); } - else if(!rex.w) { + if(!rex.w) { ZEROUP(gd); //truncate the higher 32bits as asked } } diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 05b13987..a5be39bf 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -198,10 +198,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(x1); GETEX(x2, 0); - LD(x3, wback, fixedaddress+0); - LD(x4, wback, fixedaddress+8); - SD(x3, gback, 0); - SD(x4, gback, 8); + SSE_LOOP_MV_Q(x3); break; case 0x70: // TODO: Optimize this! INST_NAME("PSHUFD Gx,Ex,Ib"); @@ -210,14 +207,21 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 1); u8 = F8; i32 = -1; - for (int i=0; i<4; ++i) { - int32_t idx = (u8>>(i*2))&3; - if (idx!=i32) { - LWU(x4, wback, fixedaddress+idx*4); - i32 = idx; - } - SW(x4, gback, i*4); - } + int32_t idx; + + idx = (u8>>(0*2))&3; + LWU(x3, wback, fixedaddress+idx*4); + idx = (u8>>(1*2))&3; + LWU(x4, wback, fixedaddress+idx*4); + idx = (u8>>(2*2))&3; + LWU(x5, wback, fixedaddress+idx*4); + idx = (u8>>(3*2))&3; + LWU(x6, wback, fixedaddress+idx*4); + + SW(x3, gback, 0*4); + SW(x4, gback, 1*4); + SW(x5, gback, 2*4); + SW(x6, gback, 3*4); break; case 0x72: nextop = F8; @@ -247,31 +251,57 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PSRLDQ Ex, Ib"); GETEX(x1, 1); u8 = F8; - if(u8) { - if(u8>15) { - // just zero dest - SD(xZR, x1, fixedaddress+0); - SD(xZR, x1, fixedaddress+8); + if(!u8) break; + if(u8>15) { + // just zero dest + SD(xZR, x1, fixedaddress+0); + SD(xZR, x1, fixedaddress+8); + } else { + u8*=8; + if (u8 < 64) { + LD(x3, x1, fixedaddress+0); + LD(x4, x1, fixedaddress+8); + SRLI(x3, x3, u8); + SLLI(x5, x4, 64-u8); + OR(x3, x3, x5); + SD(x3, x1, fixedaddress+0); + SRLI(x4, x4, u8); + SD(x4, x1, fixedaddress+8); } else { - u8*=8; - if (u8 < 64) { - LD(x3, x1, fixedaddress+0); - LD(x4, x1, fixedaddress+8); - SRLI(x3, x3, u8); - SLLI(x5, x4, 64-u8); - OR(x3, x3, x5); - SD(x3, x1, fixedaddress+0); - SRLI(x4, x4, u8); - SD(x4, x1, fixedaddress+8); - } else { - LD(x3, x1, fixedaddress+8); - if (u8-64 > 0) { SRLI(x3, x3, u8-64); } - SD(x3, x1, fixedaddress+0); - SD(xZR, x1, fixedaddress+8); - } + LD(x3, x1, fixedaddress+8); + if (u8-64 > 0) { SRLI(x3, x3, u8-64); } + SD(x3, x1, fixedaddress+0); + SD(xZR, x1, fixedaddress+8); } } break; + case 7: + INST_NAME("PSLLDQ Ex, Ib"); + GETEX(x1, 1); + u8 = F8; + if(!u8) break; + if(u8>15) { + // just zero dest + SD(xZR, x1, fixedaddress+0); + SD(xZR, x1, fixedaddress+8); + } else { + u8*=8; + if (u8 < 64) { + LD(x3, x1, fixedaddress+0); + LD(x4, x1, fixedaddress+8); + SLLI(x4, x4, u8); + SRLI(x5, x3, 64-u8); + OR(x4, x4, x5); + SD(x4, x1, fixedaddress+8); + SLLI(x3, x3, u8); + SD(x3, x1, fixedaddress+0); + } else { + LD(x3, x1, fixedaddress+0); + if (u8-64 > 0) { SLLI(x3, x3, u8-64); } + SD(x3, x1, fixedaddress+8); + SD(xZR, x1, fixedaddress+0); + } + } default: DEFAULT; } @@ -395,17 +425,16 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xFD: INST_NAME("PADDW Gx,Ex"); nextop = F8; - nextop = F8; GETGX(x1); GETEX(x2, 0); - SSE_LOOP_WQ(x3, x4, ADDW(x3, x3, x4)); + SSE_LOOP_W(x3, x4, ADDW(x3, x3, x4)); break; case 0xFE: INST_NAME("PADDD Gx,Ex"); nextop = F8; GETGX(x1); GETEX(x2, 0); - SSE_LOOP_DQ(x3, x4, ADDW(x3, x3, x4)); + SSE_LOOP_D(x3, x4, ADDW(x3, x3, x4)); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index 1ecb57c6..f0245994 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -36,13 +36,14 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - IFX(X_CF) { + IFX(X_CF|X_OF) { SUBI(s5, s2, rex.w?64:32); NEG(s5, s5); SRL(s3, s1, s5); - ANDI(s5, s3, 1); // LSB - BEQZ(s5, 8); - ORI(xFlags, xFlags, 1 << F_CF); + ANDI(s5, s3, 1); // F_CF + IFX(X_CF) { + OR(xFlags, xFlags, s5); + } } SLL(s1, s1, s2); @@ -92,12 +93,13 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } return; } - IFX(X_CF) { + IFX(X_CF|X_OF) { if (c > 0) { SRLI(s3, s1, (rex.w?64:32)-c); - ANDI(s5, s3, 1); // LSB - BEQZ(s5, 8); - ORI(xFlags, xFlags, 1 << F_CF); + ANDI(s5, s3, 1); // F_CF + IFX(X_CF) { + OR(xFlags, xFlags, s5); + } } else { IFX(X_OF) MOV64x(s5, 0); } diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c index ef181e97..683917d5 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f.c @@ -125,7 +125,22 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int DEFAULT; } break; - + case 0x58: + INST_NAME("ADDSD Gx, Ex"); + nextop = F8; + // TODO: fastnan handling + GETGXSD(v0); + GETEXSD(v1, 0); + FADDD(v0, v0, v1); + break; + case 0x59: + INST_NAME("MULSD Gx, Ex"); + nextop = F8; + //TODO: fastnan handling + GETGXSD(v0); + GETEXSD(v1, 0); + FMULD(v0, v0, v1); + break; case 0x5C: INST_NAME("SUBSD Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 9c11eec8..f90861bf 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -186,6 +186,13 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FMVS(d0, d1); MARK2; break; + case 0x6F: + INST_NAME("MOVDQU Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + SSE_LOOP_MV_Q(x3); + break; case 0x7E: INST_NAME("MOVQ Gx, Ex"); nextop = F8; @@ -202,6 +209,14 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd])+8); break; + case 0x7F: + INST_NAME("MOVDQU Ex,Gx"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + SSE_LOOP_MV_Q2(x3); + if(!MODREG) SMWRITE2(); + break; case 0xC2: INST_NAME("CMPSS Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 565fe018..62aaf79d 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -348,15 +348,15 @@ SSE_LOOP_D_ITEM(GX1, EX1, F, 2) \ SSE_LOOP_D_ITEM(GX1, EX1, F, 3) -#define SSE_LOOP_DQ(GX1, EX1, F) \ - SSE_LOOP_D_ITEM(GX1, EX1, F, 0) \ - SSE_LOOP_D_ITEM(GX1, EX1, F, 1) - -#define SSE_LOOP_WQ(GX1, EX1, F) \ +#define SSE_LOOP_W(GX1, EX1, F) \ SSE_LOOP_W_ITEM(GX1, EX1, F, 0) \ SSE_LOOP_W_ITEM(GX1, EX1, F, 1) \ SSE_LOOP_W_ITEM(GX1, EX1, F, 2) \ - SSE_LOOP_W_ITEM(GX1, EX1, F, 3) + SSE_LOOP_W_ITEM(GX1, EX1, F, 3) \ + SSE_LOOP_W_ITEM(GX1, EX1, F, 4) \ + SSE_LOOP_W_ITEM(GX1, EX1, F, 5) \ + SSE_LOOP_W_ITEM(GX1, EX1, F, 6) \ + SSE_LOOP_W_ITEM(GX1, EX1, F, 7) #define SSE_LOOP_DS_ITEM(EX1, F, i) \ |