diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-04-14 01:07:10 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-13 19:07:10 +0200 |
| commit | 9246f4925bbf0835f052e9aa0b4e167857e7ced9 (patch) | |
| tree | 590dfbe8c55a25e5169b22497cc45356069084c0 | |
| parent | 128fb82c1c40b05ed63409b7d79f486ada6f16ed (diff) | |
| download | box64-9246f4925bbf0835f052e9aa0b4e167857e7ced9.tar.gz box64-9246f4925bbf0835f052e9aa0b4e167857e7ced9.zip | |
[RV64_DYNAREC] Added more 66 0F opcodes for SV and some fixes (#697)
* [RV64_DYNAREC] Added 66 0F 38 00 PSHUFB opcode * [RV64_DYNAREC] Added 66 0F 38 17 PTEST opcode * [RV64_DYNAREC] Added 66 0F 38 3A PMINUW opcode * [RV64_DYNAREC] Added 66 0F C4 PINSRW opcode * [RV64_DYNAREC] Fixed F2 0F 70 PSHUFLW opcode * [RV64_DYNAREC] Added 66 0F DD PADDUSW opcode * [RV64_DYNAREC] Added 66 0F FC PADDB opcode
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 118 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f.c | 8 |
2 files changed, 122 insertions, 4 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 198c1a85..f3e6d2cc 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -162,6 +162,86 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ORI(xFlags, xFlags, 1<<F_ZF); } break; + case 0x38: // SSSE3 opcodes + nextop = F8; + switch(nextop) { + case 0x00: + INST_NAME("PSHUFB Gx, Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + sse_forget_reg(dyn, ninst, x5); + ADDI(x5, xEmu, offsetof(x64emu_t, xmm[x5])); + + // perserve gd + LD(x3, gback, 0); + LD(x4, gback, 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + + for (int i=0; i<16; ++i) { + LBU(x3, wback, fixedaddress+i); + ANDI(x4, x3, 128); + BEQZ(x4, 12); + SB(xZR, gback, i); + BEQZ(xZR, 20); // continue + ANDI(x4, x3, 15); + ADD(x4, x4, x5); + LBU(x4, x4, 0); + SB(x4, gback, i); + } + break; + case 0x17: + INST_NAME("PTEST Gx, Ex"); + nextop = F8; + SETFLAGS(X_ALL, SF_SET); + GETGX(x1); + GETEX(x2, 0); + CLEAR_FLAGS(); + SET_DFNONE(); + IFX(X_ZF|X_CF) { + LD(x5, wback, fixedaddress+0); + LD(x6, wback, fixedaddress+8); + + IFX(X_ZF) { + LD(x3, gback, 0); + LD(x4, gback, 8); + AND(x3, x3, x5); + AND(x4, x4, x6); + OR(x3, x3, x4); + BNEZ(x3, 8); + ORI(xFlags, xFlags, 1<<F_ZF); + } + IFX(X_CF) { + LD(x3, gback, 0); + NOT(x3, x3); + LD(x4, gback, 8); + NOT(x4, x4); + AND(x3, x3, x5); + AND(x4, x4, x6); + OR(x3, x3, x4); + BNEZ(x3, 8); + ORI(xFlags, xFlags, 1<<F_ZF); + } + } + break; + case 0x3A: + INST_NAME("PMINUW Gx, Ex"); // SSE4 opcode! + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + for(int i=0; i<8; ++i) { + // if(GX->uw[i]>EX->uw[i]) GX->uw[i] = EX->uw[i]; + LHU(x3, gback, i*2); + LHU(x4, wback, fixedaddress+i*2); + BLTU(x3, x4, 8); + SH(x4, gback, i*2); + } + break; + default: + DEFAULT; + } + break; case 0x54: INST_NAME("ANDPD Gx, Ex"); nextop = F8; @@ -776,6 +856,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int AND(x1, x1, x5); OR(gd, gd, x1); break; + case 0xC4: + INST_NAME("PINSRW Gx,Ed,Ib"); + nextop = F8; + GETED(1); + GETGX(x3); + u8 = (F8)&7; + SH(ed, gback, u8*2); + break; case 0xC5: INST_NAME("PEXTRW Gd,Ex,Ib"); nextop = F8; @@ -830,6 +918,23 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); SSE_LOOP_Q(x3, x4, AND(x3, x3, x4)); break; + case 0xDD: + INST_NAME("PADDUSW Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + for(int i=0; i<8; ++i) { + // tmp32s = (int32_t)GX->uw[i] + EX->uw[i]; + // GX->uw[i] = (tmp32s>65535)?65535:tmp32s; + LHU(x3, gback, i*2); + LHU(x4, wback, fixedaddress+i*2); + ADDW(x3, x3, x4); + MOV32w(x4, 65536); + BLT(x3, x4, 8); + ADDIW(x3, x4, -1); + SH(x3, gback, i*2); + } + break; case 0xDF: INST_NAME("PANDN Gx,Ex"); nextop = F8; @@ -892,6 +997,19 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); SSE_LOOP_D(x3, x4, SUBW(x3, x3, x4)); break; + case 0xFC: + INST_NAME("PADDB Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + for(int i=0; i<16; ++i) { + // GX->sb[i] += EX->sb[i]; + LB(x3, gback, i); + LB(x4, wback, fixedaddress+i); + ADDW(x3, x3, x4); + SB(x3, gback, i); + } + break; case 0xFD: INST_NAME("PADDW Gx,Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c index 711927ef..ee1e14ca 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f.c @@ -222,10 +222,10 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int idx = (u8>>(3*2))&3; LHU(x6, wback, fixedaddress+idx*2); - SW(x3, gback, 0*2); - SW(x4, gback, 1*2); - SW(x5, gback, 2*2); - SW(x6, gback, 3*2); + SH(x3, gback, 0*2); + SH(x4, gback, 1*2); + SH(x5, gback, 2*2); + SH(x6, gback, 3*2); if (!(MODREG && (gd==ed))) { LD(x3, wback, fixedaddress+8); |