diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-03-29 23:49:50 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-03-29 17:49:50 +0200 |
| commit | 0f11b55ba3c785ebf3bcb6998012621dae69d873 (patch) | |
| tree | 24986c745658659e1f4d179d5b3f957891e305f6 /src | |
| parent | a665d2f62cb6f841954962005b73fbe371e031b5 (diff) | |
| download | box64-0f11b55ba3c785ebf3bcb6998012621dae69d873.tar.gz box64-0f11b55ba3c785ebf3bcb6998012621dae69d873.zip | |
[RV64_DYNAREC] Added more opcodes (#651)
* [RV64_DYNAREC] Added 66 81,83 /2 ADC opcode * [RV64_DYNAREC] Added 66 0F 73 /3 PSRLDQ opcode * [RV64_DYNAREC] Added 66 0F 7E MOVD opcode * [RV64_DYNAREC] Added F3 0F 5C SUBSS opcode * [RV64_DYNAREC] Added 0F 54 ANDPS opcode * [RV64_DYNAREC] Added 66 0F 6C PUNPCKLQDQ opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 10 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66.c | 10 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 76 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_math.c | 76 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 8 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 2 |
6 files changed, 171 insertions, 11 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index ed473df9..9defa364 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -228,9 +228,15 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GOCOND(0x40, "CMOV", "Gd, Ed"); #undef GO - + case 0x54: + INST_NAME("ANDPS Gx, Ex"); + nextop = F8; + GETEX(x1, 0); + GETGX(x2); + SSE_LOOP_Q(x3, x4, AND(x3, x3, x4)); + break; case 0x57: - INST_NAME("XORPS"); + INST_NAME("XORPS Gx, Ex"); nextop = F8; //TODO: it might be possible to check if SS or SD are used and not purge them to optimize a bit GETGX(x1); diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index df0c09c5..94d3b511 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -295,6 +295,16 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_or16(dyn, ninst, x1, x5, x2, x4); EWBACK; break; + case 2: // ADC + if(opcode==0x81) {INST_NAME("ADC Ew, Iw");} else {INST_NAME("ADC Ew, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEW(x1, (opcode==0x81)?2:1); + if(opcode==0x81) i16 = F16S; else i16 = F8S; + MOV64x(x5, i16); + emit_adc16(dyn, ninst, x1, x5, x2, x4, x6); + EWBACK; + break; case 4: // AND if(opcode==0x81) {INST_NAME("AND Ew, Iw");} else {INST_NAME("AND Ew, Ib");} SETFLAGS(X_ALL, SF_SET_PENDING); diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 7a32c44c..bea17833 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -79,7 +79,19 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GOCOND(0x40, "CMOV", "Gw, Ew"); #undef GO - + case 0x6C: + INST_NAME("PUNPCKLQDQ Gx,Ex"); + nextop = F8; + GETGX(x1); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x2, (nextop&7)+(rex.b<<3), 0); + FSD(v1, gback, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + LD(x3, ed, fixedaddress+0); + SD(x3, gback, 8); + } + break; case 0x6E: INST_NAME("MOVD Gx, Ed"); nextop = F8; @@ -128,6 +140,42 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int DEFAULT; } break; + case 0x73: + nextop = F8; + switch((nextop>>3)&7) { + case 3: + INST_NAME("PSRLDQ Ex, Ib"); + GETEX(x1, 1); + u8 = F8; + if(u8) { + if(u8>15) { + // just zero dest + SD(xZR, x1, fixedaddress+0); + SD(xZR, x1, fixedaddress+8); + } else { + u8*=8; + if (u8 < 64) { + LD(x3, x1, fixedaddress+0); + LD(x4, x1, fixedaddress+8); + SRLI(x3, x3, u8); + SLLI(x5, x4, 64-u8); + OR(x3, x3, x5); + SD(x3, x1, fixedaddress+0); + SRLI(x4, x4, u8); + SD(x4, x1, fixedaddress+8); + } else { + LD(x3, x1, fixedaddress+8); + if (u8-64 > 0) { SRLI(x3, x3, u8-64); } + SD(x3, x1, fixedaddress+0); + SD(xZR, x1, fixedaddress+8); + } + } + } + break; + default: + DEFAULT; + } + break; case 0x76: INST_NAME("PCMPEQD Gx,Ex"); nextop = F8; @@ -135,6 +183,32 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); SSE_LOOP_D(x3, x4, XOR(x3, x3, x4); SNEZ(x3, x3); ADDI(x3, x3, -1)); break; + case 0x7E: + INST_NAME("MOVD Ed,Gx"); + nextop = F8; + GETGX(x1); + if(rex.w) { + if(MODREG) { + ed = xRAX + (nextop&7) + (rex.b<<3); + LD(ed, x1, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + LD(x3, x1, 0); + SD(x3, ed, fixedaddress); + SMWRITE2(); + } + } else { + if(MODREG) { + ed = xRAX + (nextop&7) + (rex.b<<3); + LWU(ed, x1, 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + LWU(x3, x1, 0); + SW(x3, ed, fixedaddress); + SMWRITE2(); + } + } + break; case 0xAF: INST_NAME("IMUL Gw,Ew"); SETFLAGS(X_ALL, SF_PENDING); diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index 5828fd47..65680410 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -871,6 +871,70 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) } } +// emit ADC16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + CLEAR_FLAGS(); + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SH(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, d_adc16); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_AF | X_OF) { + OR(s4, s1, s2); // s3 = op1 | op2 + AND(s5, s1, s2); // s4 = op1 & op2 + } + + ADD(s1, s1, s2); + ANDI(s3, xFlags, 1 << F_CF); + ADD(s1, s1, s3); + + IFX(X_PEND) { + SW(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_AF | X_OF) { + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s4); // s3 = ~res & (op1 | op2) + OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX(X_AF) { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) { + SRLI(s3, s3, 14); + SRLI(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_CF) { + SRLI(s3, s1, 16); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + + SLLI(s1, s1, 48); + SRLI(s1, s1, 48); + + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_SF) { + SRLI(s3, s1, 15); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { @@ -878,7 +942,7 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, rex.w?d_add64:d_add32b); + SET_DF(s3, rex.w?d_adc64:d_adc32b); } else IFX(X_ALL) { SET_DFNONE(); } @@ -903,8 +967,8 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } } IFX(X_AF | X_OF) { - OR(s3, s1, s2); // s3 = op1 | op2 - AND(s4, s1, s2); // s4 = op1 & op2 + OR(s4, s1, s2); // s3 = op1 | op2 + AND(s5, s1, s2); // s4 = op1 & op2 } ADDxw(s1, s1, s2); @@ -915,9 +979,9 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF | X_OF) { - NOT(s2, s1); // s2 = ~res - AND(s3, s2, s3); // s3 = ~res & (op1 | op2) - OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) + NOT(s2, s1); // s2 = ~res + AND(s3, s2, s4); // s3 = ~res & (op1 | op2) + OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 99ac53ae..31f19c64 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -119,7 +119,13 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGXSD_empty(v0); FCVTDS(v0, v1); break; - + case 0x5C: + INST_NAME("SUBSS Gx, Ex"); + nextop = F8; + GETGXSS(v0); + GETEXSS(d0, 0); + FSUBS(v0, v0, d0); + break; case 0x7E: INST_NAME("MOVQ Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index b79a1a17..17c7187e 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -876,7 +876,7 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s //void emit_adc32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); //void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); //void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); -//void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); //void emit_adc16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); //void emit_sbb32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); |