diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-04-11 00:28:40 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-10 18:28:40 +0200 |
| commit | 5eb54321e8010cbb1991c9f583e63e0e8f4699cd (patch) | |
| tree | 43583c5ee88a2ec540f2411cfbac508f62c47b80 /src | |
| parent | 9ce44af20405a6a00744500d9f1778894e945297 (diff) | |
| download | box64-5eb54321e8010cbb1991c9f583e63e0e8f4699cd.tar.gz box64-5eb54321e8010cbb1991c9f583e63e0e8f4699cd.zip | |
[RV64_DYNAREC] Added more opcodes for SV (#684)
* [RV64_DYNAREC] Added 0F AB BTS opcode * [RV64_DYNAREC] Added F0 81,83 /4 LOCK AND opcode * [RV64_DYNAREC] Added F0 81,83 /1 LOCK OR opcode * [RV64_DYNAREC] Added 66 0F 58 ADDPD opcode * [RV64_DYNAREC] Added F2 0F 70 PSHUFLW opcode * [RV64_DYNAREC] Added 0F AC SHRD opcode * [RV64_DYNAREC] Fixed a copy-paste typo
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 48 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 8 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 67 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f0.c | 58 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f.c | 29 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 2 |
6 files changed, 209 insertions, 3 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index b2bf4df4..e0ec682f 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -390,7 +390,53 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(xFlags, xFlags, ~1); //F_CF is 1 OR(xFlags, xFlags, x4); break; - + case 0xAB: + INST_NAME("BTS Ed, Gd"); + SETFLAGS(X_CF, SF_SUBSET); + SET_DFNONE(); + nextop = F8; + GETGD; + if (MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 1, 0); + SRAI(x1, gd, 5+rex.w); + SLLI(x1, x1, 2+rex.w); + ADD(x3, wback, x1); + LDxw(x1, x3, fixedaddress); + ed = x1; + wback = x3; + } + if (rex.w) { + ANDI(x2, gd, 0x3f); + } else { + ANDI(x2, gd, 0x1f); + } + SRL(x4, ed, x2); + ANDI(x4, x4, 1); // F_CF is 1 + ANDI(xFlags, xFlags, ~1); + OR(xFlags, xFlags, x4); + ADDI(x3, xZR, 1); + SLL(x3, x3, x2); + OR(ed, ed, x3); + if(wback) { + SDxw(ed, wback, fixedaddress); + SMWRITE(); + } + break; + case 0xAC: + nextop = F8; + INST_NAME("SHRD Ed, Gd, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETED(1); + GETGD; + u8 = F8; + u8&=(rex.w?0x3f:0x1f); + emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4); + WBACK; + break; case 0xAE: nextop = F8; if((nextop&0xF8)==0xE8) { diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index a2212e9a..efaa4a14 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -190,6 +190,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(x2); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); break; + case 0x58: + INST_NAME("ADDPD Gx, Ex"); + nextop = F8; + //TODO: fastnan handling + GETEX(x1, 0); + GETGX(x2); + SSE_LOOP_FQ(x3, x4, FADDD(v0, v0, v1)); + break; case 0x5C: INST_NAME("SUBPD Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index f0245994..6af7aad7 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -306,3 +306,70 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, emit_pf(dyn, ninst, s1, s3, s4); } } + +// emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch +void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4) +{ + CLEAR_FLAGS(); + + IFX(X_PEND) { + if (c) { + MOV64x(s3, c); + SDxw(s3, xEmu, offsetof(x64emu_t, op2)); + } else SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, rex.w?d_shr64:d_shr32); + } else IFX(X_ALL) { + SET_DFNONE(); + } + if(!c) { + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + IFX(X_CF) { + if (c > 1) { + SRAI(s3, s1, c-1); + ANDI(s3, s3, 1); // LSB + BEQZ(s3, 8); + } else { + // no need to shift + ANDI(s3, s1, 1); + BEQZ(s3, 8); + } + ORI(xFlags, xFlags, 1 << F_CF); + } + + SRLIxw(s3, s1, c); + SLLIxw(s1, s2, (rex.w?64:32)-c); + OR(s1, s1, s3); + + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_OF) { + if(c==1) { + SRLI(s3, s1, rex.w?62:30); + SRLI(s4, s1, rex.w?63:31); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index ec3fb824..4696c70c 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -168,6 +168,64 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5, x6); } break; + case 1: // OR + if(opcode==0x81) { + INST_NAME("LOCK OR Ed, Id"); + } else { + INST_NAME("LOCK OR Ed, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i64 = F32S; else i64 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MARKLOCK; + LRxw(x1, wback, 1, 1); + if (i64>=-2048 && i64<2048) { + ORI(x4, x1, i64); + } else { + MOV64xw(x4, i64); + OR(x4, x1, x4); + } + if (!rex.w) ZEROUP(x4); + SCxw(x3, x4, wback, 1, 1); + BNEZ_MARKLOCK(x3); + IFX(X_ALL|X_PEND) + emit_or32c(dyn, ninst, rex, x1, i64, x3, x4); + } + break; + case 4: // AND + if(opcode==0x81) { + INST_NAME("LOCK AND Ed, Id"); + } else { + INST_NAME("LOCK AND Ed, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i64 = F32S; else i64 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + emit_and32c(dyn, ninst, rex, ed, i64, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); + if(opcode==0x81) i64 = F32S; else i64 = F8S; + MARKLOCK; + LRxw(x1, wback, 1, 1); + if (i64>=-2048 && i64<2048) { + ANDI(x4, x1, i64); + } else { + MOV64xw(x4, i64); + AND(x4, x1, x4); + } + if (!rex.w) ZEROUP(x4); + SCxw(x3, x4, wback, 1, 1); + BNEZ_MARKLOCK(x3); + IFX(X_ALL|X_PEND) + emit_and32c(dyn, ninst, rex, x1, i64, x3, x4); + } + break; case 5: // SUB if(opcode==0x81) { INST_NAME("LOCK SUB Ed, Id"); diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c index 8057345f..ec8912fa 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f.c @@ -29,7 +29,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int uint8_t opcode = F8; uint8_t nextop; uint8_t gd, ed; - uint8_t wback; + uint8_t wback, gback; uint8_t u8; uint64_t u64, j64; int v0, v1; @@ -190,6 +190,33 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FNEGD(v0, v0); } break; + case 0x70: // TODO: Optimize this! + INST_NAME("PSHUFLW Gx, Ex, Ib"); + nextop = F8; + GETGX(x1); + GETEX(x2, 1); + u8 = F8; + int32_t idx; + + idx = (u8>>(0*2))&3; + LHU(x3, wback, fixedaddress+idx*2); + idx = (u8>>(1*2))&3; + LHU(x4, wback, fixedaddress+idx*2); + idx = (u8>>(2*2))&3; + LHU(x5, wback, fixedaddress+idx*2); + idx = (u8>>(3*2))&3; + LHU(x6, wback, fixedaddress+idx*2); + + SW(x3, gback, 0*2); + SW(x4, gback, 1*2); + SW(x5, gback, 2*2); + SW(x6, gback, 3*2); + + if (!(MODREG && (gd==ed))) { + LD(x3, wback, fixedaddress+8); + SD(x3, gback, 8); + } + break; case 0xC2: INST_NAME("CMPSD Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 004a6dac..40985d8c 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -956,7 +956,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); //void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); //void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); -//void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); +void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); //void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); |