diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-04-11 21:05:37 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-11 15:05:37 +0200 |
| commit | 75592bec01a0c5c1775561ba68bfd6c1878071ca (patch) | |
| tree | 109d4c4a1a103c59ee10d17642fb822456c724eb /src | |
| parent | e96b1c810672a2493050a36d32f4961b3158901c (diff) | |
| download | box64-75592bec01a0c5c1775561ba68bfd6c1878071ca.tar.gz box64-75592bec01a0c5c1775561ba68bfd6c1878071ca.zip | |
[RV64_DYNAREC] Added more opcodes for SV, some fixes & optims also (#686)
* [RV64_DYNAREC] Added F0 09 LOCK OR opcode * [RV64_DYNAREC] Added 66 0F 59 MULPD opcode * [RV64_DYNAREC] Added 66 0F 59 MULPD opcode * [RV64_DYNAREC] Added 28 SUB opcode * [RV64_DYNAREC] Added 66 0F 73 /6 PSLLQ opcode * [RV64_DYNAREC] Added 66 0F 60 PUNPCKLBW opcode * [RV64_DYNAREC] Added 0F 56 ORPS opcode & optims * [RV64_DYNAREC] Added 0F 50 MOVMSKPS opcode * [RV64_DYNAREC] Fixed typos (thanks to cosim!)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00.c | 9 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 32 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 80 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f0.c | 22 |
4 files changed, 131 insertions, 12 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 34c3fedb..97defd9a 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -250,6 +250,15 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; emit_and32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; + case 0x28: + INST_NAME("SUB Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_sub8(dyn, ninst, x1, x2, x4, x5, x6); + EBBACK(x5, 0); + break; case 0x29: INST_NAME("SUB Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 9b267061..bb8e7834 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -268,12 +268,38 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GOCOND(0x40, "CMOV", "Gd, Ed"); #undef GO + case 0x50: + INST_NAME("MOVMSKPS Gd, Ex"); + nextop = F8; + GETGD; + GETEX(x1, 0); + XOR(gd, gd, gd); + for(int i=0; i<4; ++i) { + LWU(x2, wback, fixedaddress+i*4); + SRLI(x2, x2, 31-i); + if (i>0) ANDI(x2, x2, 1<<i); + OR(gd, gd, x2); + } + break; case 0x54: INST_NAME("ANDPS Gx, Ex"); nextop = F8; - GETEX(x1, 0); - GETGX(x2); - SSE_LOOP_Q(x3, x4, AND(x3, x3, x4)); + gd = ((nextop&0x38)>>3)+(rex.r<<3); + if(!(MODREG && gd==(nextop&7)+(rex.b<<3))) { + GETGX(x1); + GETEX(x2, 0); + SSE_LOOP_Q(x3, x4, AND(x3, x3, x4)); + } + break; + case 0x56: + INST_NAME("ORPS Gx, Ex"); + nextop = F8; + gd = ((nextop&0x38)>>3)+(rex.r<<3); + if(!(MODREG && gd==(nextop&7)+(rex.b<<3))) { + GETGX(x1); + GETEX(x2, 0); + SSE_LOOP_Q(x3, x4, OR(x3, x3, x4)); + } break; case 0x57: INST_NAME("XORPS Gx, Ex"); diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index efaa4a14..85ad9b38 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -198,6 +198,26 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(x2); SSE_LOOP_FQ(x3, x4, FADDD(v0, v0, v1)); break; + case 0x59: + INST_NAME("MULPD Gx, Ex"); + nextop = F8; + GETEX(x1, 0); + GETGX(x2); + SSE_LOOP_FQ(x3, x4, { + if(!box64_dynarec_fastnan) { + FEQD(x3, v0, v0); + FEQD(x4, v1, v1); + } + FMULD(v0, v0, v1); + if(!box64_dynarec_fastnan) { + AND(x3, x3, x4); + BEQZ(x3, 16); + FEQD(x3, v0, v0); + BNEZ(x3, 8); + FNEGD(v0, v0); + } + }); + break; case 0x5C: INST_NAME("SUBPD Gx, Ex"); nextop = F8; @@ -206,23 +226,47 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(x2); SSE_LOOP_FQ(x3, x4, FSUBD(v0, v0, v1)); break; + case 0x60: + INST_NAME("PUNPCKLBW Gx,Ex"); + nextop = F8; + GETGX(x2); + for(int i=7; i>0; --i) { // 0 is untouched + // GX->ub[2 * i] = GX->ub[i]; + LBU(x3, gback, i); + SB(x3, gback, 2*i); + } + if (MODREG && gd==(nextop&7)+(rex.b<<3)) { + for(int i=0; i<8; ++i) { + // GX->ub[2 * i + 1] = GX->ub[2 * i]; + LBU(x3, gback, 2*i); + SB(x3, gback, 2*i+1); + } + } else { + GETEX(x1, 0); + for(int i=0; i<8; ++i) { + // GX->ub[2 * i + 1] = EX->ub[i]; + LBU(x3, wback, fixedaddress+i); + SB(x3, gback, 2*i+1); + } + } + break; case 0x61: INST_NAME("PUNPCKLWD Gx,Ex"); nextop = F8; - GETEX(x1, 0); GETGX(x2); for(int i=3; i>0; --i) { // GX->uw[2 * i] = GX->uw[i]; LHU(x3, gback, i*2); SH(x3, gback, 2*i*2); } - if (MODREG && (ed==gd)) { + if (MODREG && gd==(nextop&7)+(rex.b<<3)) { for(int i=0; i<4; ++i) { // GX->uw[2 * i + 1] = GX->uw[2 * i]; LHU(x3, gback, 2*i*2); SH(x3, gback, (2*i+1)*2); } } else { + GETEX(x1, 0); for(int i=0; i<4; ++i) { // GX->uw[2 * i + 1] = EX->uw[i]; LHU(x3, wback, fixedaddress+i*2); @@ -255,7 +299,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x67: INST_NAME("PACKUSWB Gx, Ex"); nextop = F8; - GETEX(x1, 0); GETGX(x2); ADDI(x5, xZR, 0xFF); for(int i=0; i<8; ++i) { @@ -268,11 +311,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int AND(x3, x3, x4); SB(x3, gback, i); } - if (MODREG && (ed==gd)) { + if (MODREG && gd==(nextop&7)+(rex.b<<3)) { // GX->q[1] = GX->q[0]; LD(x3, gback, 0*8); SD(x3, gback, 1*8); } else { + GETEX(x1, 0); for(int i=0; i<8; ++i) { // GX->ub[8+i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]); LH(x3, wback, fixedaddress+i*2); @@ -288,20 +332,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x69: INST_NAME("PUNPCKHWD Gx,Ex"); nextop = F8; - GETEX(x1, 0); GETGX(x2); for(int i=0; i<4; ++i) { // GX->uw[2 * i] = GX->uw[i + 4]; LHU(x3, gback, (i+4)*2); SH(x3, gback, 2*i*2); } - if (MODREG && (ed==gd)) { + if (MODREG && gd==(nextop&7)+(rex.b<<3)) { for(int i=0; i<4; ++i) { // GX->uw[2 * i + 1] = GX->uw[2 * i]; LHU(x3, gback, 2*i*2); SH(x3, gback, (2*i+1)*2); } } else { + GETEX(x1, 0); for(int i=0; i<4; ++i) { // GX->uw[2 * i + 1] = EX->uw[i + 4]; LHU(x3, wback, fixedaddress+(i+4)*2); @@ -332,7 +376,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x6B: INST_NAME("PACKSSDW Gx,Ex"); nextop = F8; - GETEX(x1, 0); GETGX(x2); MOV64x(x5, 32768); NEG(x6, x5); @@ -345,11 +388,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MV(x3, x6); SH(x3, gback, i*2); } - if (MODREG && (ed==gd)) { + if (MODREG && gd==(nextop&7)+(rex.b<<3)) { // GX->q[1] = GX->q[0]; LD(x3, gback, 0*8); SD(x3, gback, 1*8); } else { + GETEX(x1, 0); for(int i=0; i<4; ++i) { // GX->sw[4+i] = (EX->sd[i]<-32768)?-32768:((EX->sd[i]>32767)?32767:EX->sd[i]); LW(x3, wback, fixedaddress+i*4); @@ -491,7 +535,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LD(x4, wback, fixedaddress+8); SRLI(x3, x3, u8); SRLI(x4, x4, u8); - SD(x3, wback, fixedaddress+8); + SD(x3, wback, fixedaddress+0); SD(x4, wback, fixedaddress+8); } break; @@ -523,6 +567,24 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } } break; + case 6: + INST_NAME("PSLLQ Ex, Ib"); + GETEX(x1, 1); + u8 = F8; + if(!u8) break; + if(u8>63) { + // just zero dest + SD(xZR, x1, fixedaddress+0); + SD(xZR, x1, fixedaddress+8); + } else { + LD(x3, wback, fixedaddress+0); + LD(x4, wback, fixedaddress+8); + SLLI(x3, x3, u8); + SLLI(x4, x4, u8); + SD(x3, wback, fixedaddress+0); + SD(x4, wback, fixedaddress+8); + } + break; case 7: INST_NAME("PSLLDQ Ex, Ib"); GETEX(x1, 1); diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index 4696c70c..ef46e9eb 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -57,6 +57,28 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni // TODO: Take care of unligned memory access for all the LOCK ones. // https://github.com/ptitSeb/box64/pull/604 switch(opcode) { + case 0x09: + INST_NAME("LOCK OR Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + SMDMB(); + if (MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + emit_or32(dyn, ninst, rex, ed, gd, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); + MARKLOCK; + LRxw(x1, wback, 1, 1); + OR(x1, x1, gd); + SCxw(x3, x1, wback, 1, 1); + BNEZ_MARKLOCK(x3); + IFX(X_ALL|X_PEND) { + emit_or32(dyn, ninst, rex, x1, gd, x3, x4); + } + } + SMDMB(); + break; case 0x0F: nextop = F8; switch(nextop) { |