diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-03-26 12:28:53 +0000 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-03-26 12:28:53 +0000 |
| commit | 85f6d8308a0dccda7880fd973f0d3c0bc61d4a31 (patch) | |
| tree | 110d6587a5e1c5beeae909222b62852e9417b917 /src | |
| parent | 06f2750eefeedfeb465a56ea69a9978774017ad7 (diff) | |
| download | box64-85f6d8308a0dccda7880fd973f0d3c0bc61d4a31.tar.gz box64-85f6d8308a0dccda7880fd973f0d3c0bc61d4a31.zip | |
[RV64_DYNAREC] Added a bunch of opcodes, plus some improvments/fixes to SSE macros
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00.c | 59 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 43 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 18 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_math.c | 55 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 54 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 17 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 21 |
7 files changed, 248 insertions, 19 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 4c7d0247..3d393fdc 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -751,7 +751,47 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMWRITELOCK(lock); } break; - + case 0x8A: + INST_NAME("MOV Gb, Eb"); + nextop = F8; + gd = ((nextop&0x38)>>3)+(rex.r<<3); + if(rex.rex) { + gb2 = 0; + gb1 = xRAX + gd; + } else { + gb2 = ((gd&4)>>2); + gb1 = xRAX+(gd&3); + } + gd = x4; + if(MODREG) { + ed = (nextop&7) + (rex.b<<3); + if(rex.rex) { + eb1 = xRAX+ed; + eb2 = 0; + } else { + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = ((ed&4)>>2); // L or H + } + if(eb2) { + SRLI(x1, eb1, 8); + ANDI(x1, x1, 0xff); + } else { + ANDI(x1, eb1, 0xff); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); + SMREADLOCK(lock); + LB(x1, ed, fixedaddress); + } + if(gb2) { + MOV64x(x4, ~0xff00); + AND(gb1, gb1, x4); + SLLI(x1, x1, 8); + } else { + ANDI(gb1, gb1, ~0xff); + } + OR(gb1, gb1, x1); + break; case 0x8B: INST_NAME("MOV Gd, Ed"); nextop=F8; @@ -1312,6 +1352,16 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_shl32(dyn, ninst, rex, ed, x3, x5, x4, x6); WBACK; break; + case 5: + INST_NAME("SHR Ed, CL"); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + ANDI(x3, xRCX, rex.w?0x3f:0x1f); + GETED(0); + if(!rex.w && MODREG) {ZEROUP(ed);} + CBZ_NEXT(x3); + emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); + WBACK; + break; case 7: INST_NAME("SAR Ed, CL"); SETFLAGS(X_ALL, SF_PENDING); @@ -1511,6 +1561,13 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni NOT(x1, x1); EBBACK(x5, 1); break; + case 3: + INST_NAME("NEG Eb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 0); + emit_neg8(dyn, ninst, x1, x2, x4); + EBBACK(x5, 0); + break; case 4: INST_NAME("MUL AL, Ed"); SETFLAGS(X_ALL, SF_PENDING); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index a2449465..543ffae8 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -31,7 +31,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uint8_t opcode = F8; uint8_t nextop, u8; uint8_t gd, ed; - uint8_t wback, wb2; + uint8_t wback, wb2, gback; uint8_t eb1, eb2; int32_t i32, i32_; int cacheupd = 0; @@ -44,6 +44,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni int64_t fixedaddress; int unscaled; MAYUSE(wb2); + MAYUSE(gback); MAYUSE(eb1); MAYUSE(eb2); MAYUSE(q0); @@ -111,6 +112,30 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; + case 0x10: + INST_NAME("MOVUPS Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + LD(x3, wback, fixedaddress+0); + LD(x4, wback, fixedaddress+8); + SD(x3, gback, 0); + SD(x4, gback, 8); + break; + case 0x11: + INST_NAME("MOVUPS Ex,Gx"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + LD(x3, gback, 0); + LD(x4, gback, 8); + SD(x3, wback, fixedaddress+0); + SD(x4, wback, fixedaddress+8); + if(!MODREG) + SMWRITE2(); + break; + + case 0x18: nextop = F8; if((nextop&0xC0)==0xC0) { @@ -169,6 +194,22 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GOCOND(0x40, "CMOV", "Gd, Ed"); #undef GO + case 0x57: + INST_NAME("XORPS"); + nextop = F8; + //TODO: it might be possible to check if SS or SD are used and not purge them to optimize a bit + GETGX(x1); + if(MODREG && gd==(nextop&7)+(rex.b<<3)) + { + // just zero dest + SD(xZR, x1, 0); + SD(xZR, x1, 8); + } else { + GETEX(x2, 0); + SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + } + break; + case 0x77: INST_NAME("EMMS"); // empty MMX, FPU now usable diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 03e44fc8..5299f162 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -30,7 +30,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int uint8_t nextop, u8; int32_t i32; uint8_t gd, ed; - uint8_t wback, wb1, wb2; + uint8_t wback, wb1, wb2, gback; uint8_t eb1, eb2; int64_t j64; uint64_t tmp64u, tmp64u2; @@ -126,22 +126,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PXOR Gx, Ex"); nextop = F8; GETGX(x1); - GETEX(x2, 0); - if(gd==ed) { + if(MODREG && gd==(nextop&7)+(rex.b<<3)) + { // just zero dest SD(xZR, x1, 0); SD(xZR, x1, 8); } else { - //1st - LD(x3, x1, 0); - LD(x4, x2, 0); - XOR(x3, x3, x4); - SD(x3, x1, 0); - // 2nd - LD(x3, x1, 8); - LD(x4, x2, 8); - XOR(x3, x3, x4); - SD(x3, x1, 8); + GETEX(x2, 0); + SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); } break; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index bd2dbb96..e3125f31 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -759,6 +759,61 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } } +// emit NEG8 instruction, from s1, store result in s1 using s2 and s3 as scratch +void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) +{ + CLEAR_FLAGS(); + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_neg8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_AF | X_OF) { + MV(s3, s1); // s3 = op1 + } + + NEG(s1, s1); + ANDI(s1, s1, 0xff); + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, res)); + } + + IFX(X_CF) { + BEQZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + + IFX(X_AF | X_OF) { + OR(s3, s1, s3); // s3 = res | op1 + IFX(X_AF) { + /* af = bc & 0x8 */ + ANDI(s2, s3, 8); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_OF) { + /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ + SRLI(s2, s3, 6); + SRLI(s3, s2, 1); + XOR(s2, s2, s3); + ANDI(s2, s2, 1); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + } + IFX(X_SF) { + ANDI(s3, s1, 1 << F_SF); // 1<<F_SF is sign bit, so just mask + OR(xFlags, xFlags, s3); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s2); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } +} // emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index a9e3a2b9..1ecb57c6 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -130,6 +130,60 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } } +// emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch +void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + int64_t j64; + + CLEAR_FLAGS(); + + IFX(X_PEND) { + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, rex.w?d_shr64:d_shr32); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + IFX(X_CF) { + SUBI(s3, s2, 1); + SRA(s3, s1, s3); + ANDI(s3, s3, 1); // LSB + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + + SRL(s1, s1, s2); + + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + if (!rex.w) { + ZEROUP(s1); + } + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_OF) { + ADDI(s3, xZR, 1); + BEQ(s2, s3, 4+6*4); + SRLI(s3, s1, rex.w?62:30); + SRLI(s4, s1, rex.w?63:31); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF2); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index c50196e0..99ac53ae 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -120,6 +120,23 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FCVTDS(v0, v1); break; + case 0x7E: + INST_NAME("MOVQ Gx, Ex"); + nextop = F8; + // Will load Gx as SD. Is that a good choice? + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0); + GETGXSD_empty(v0); + FMVD(v0, v1); + } else { + GETGXSD_empty(v0); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + FLD(v0, ed, fixedaddress); + } + SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd])+8); + break; + default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 223fc5a3..cb46cf2b 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -292,25 +292,38 @@ FLW(a, ed, fixedaddress); \ } -// Will get pointer to GX in general register a, will purge SS or SD if loaded +// Will get pointer to GX in general register a, will purge SS or SD if loaded. can use gback as load address #define GETGX(a) \ gd = ((nextop&0x38)>>3)+(rex.r<<3); \ sse_forget_reg(dyn, ninst, gd); \ + gback = a; \ ADDI(a, xEmu, offsetof(x64emu_t, xmm[gd])) -// Get Ex address in regenal register a, will purge SS or SD or it's reg and is loaded. May use x3 +// Get Ex address in regenal register a, will purge SS or SD or it's reg and is loaded. May use x3. Use wback as load adress! #define GETEX(a, D) \ if(MODREG) { \ ed = (nextop&7)+(rex.b<<3); \ sse_forget_reg(dyn, ninst, ed); \ fixedaddress = 0; \ ADDI(a, xEmu, offsetof(x64emu_t, xmm[ed])); \ + wback = a; \ } else { \ SMREAD(); \ ed=16; \ addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ } +// Loop for SSE opcode that use 64bits value and write to GX. +#define SSE_LOOP_Q(GX1, EX1, F) \ + LD(GX1, gback, 0); \ + LD(EX1, wback, fixedaddress+0); \ + F; \ + SD(GX1, gback, 0); \ + LD(GX1, gback, 8); \ + LD(EX1, wback, fixedaddress+8); \ + F; \ + SD(GX1, gback, 8) + // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 #define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0) @@ -830,10 +843,10 @@ void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s //void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3); //void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); -//void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); -//void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); //void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); |