diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-04-02 19:44:40 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-02 13:44:40 +0200 |
| commit | 10cec3de2f04c48b60c0e0f91614244b4515e6ef (patch) | |
| tree | c5bd2c034ca376bd04db7be7ac51abc57ea58122 | |
| parent | 76c0992d02be79bf15ce9975cfbbc824b593d50d (diff) | |
| download | box64-10cec3de2f04c48b60c0e0f91614244b4515e6ef.tar.gz box64-10cec3de2f04c48b60c0e0f91614244b4515e6ef.zip | |
[LA64_DYNAREC] Added more SSE/SSE2 instructions (#1400)
* [LA64_DYNAREC] Added more SSE/SSE2 instructions * Thank you test16
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 47 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 56 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 18 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 23 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 14 |
5 files changed, 154 insertions, 4 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 5566183d..cbd5f0e1 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -103,12 +103,57 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LOAD_XEMU_REM(); jump_to_epilog(dyn, 0, xRIP, ninst); break; + case 0x11: + INST_NAME("MOVUPS Ex,Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd, 0); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VOR_V(v1, v0, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + VST(v0, ed, fixedaddress); + SMWRITE2(); + } + break; + case 0x16: + nextop = F8; + if (MODREG) { + INST_NAME("MOVLHPS Gx,Ex"); + GETGX(v0, 1); + v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0); + } else { + INST_NAME("MOVHPS Gx,Ex"); + SMREAD(); + GETGX(v0, 1); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + FLD_D(v1, ed, fixedaddress); + } + VILVL_D(v0, v1, v0); // v0[127:64] = v1[63:0] + break; case 0x1F: INST_NAME("NOP (multibyte)"); nextop = F8; FAKEED; break; - + case 0x29: + INST_NAME("MOVAPS Ex,Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd, 0); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VOR_V(v1, v0, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + VST(v0, ed, fixedaddress); + SMWRITE2(); + } + break; #define GO(GETFLAGS, NO, YES, F, I) \ READFLAGS(F); \ if (la64_lbt) { \ diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index effaf5bf..8a8a06fa 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -55,21 +55,71 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; FAKEED; break; + case 0x6C: + INST_NAME("PUNPCKLQDQ Gx,Ex"); + nextop = F8; + GETGX(v0, 1); + if (MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0); + } else { + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + FLD_D(v1, ed, fixedaddress); + } + VILVL_D(v0, v1, v0); // v0[127:64] = v1[63:0] + break; case 0x6E: INST_NAME("MOVD Gx, Ed"); nextop = F8; GETGX_empty(v0); v1 = fpu_get_scratch(dyn); - GETED(0); + if (MODREG) { + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + if (rex.w) { + MOVGR2FR_D(v1, ed); + } else { + MOVGR2FR_W(v1, ed); + } + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + FLDxw(v1, ed, fixedaddress); + } VXOR_V(v0, v0, v0); if (rex.w) { - MOVGR2FR_D(v1, ed); VEXTRINS_D(v0, v1, 0); // v0[63:0] = v1[63:0] } else { - MOVGR2FR_W(v1, ed); VEXTRINS_W(v0, v1, 0); // v0[31:0] = v1[31:0] } break; + case 0x6F: + INST_NAME("MOVDQA Gx,Ex"); + nextop = F8; + if (MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0); + GETGX_empty(v0); + VOR_V(v0, v1, v1); + } else { + GETGX_empty(v0); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + VLD(v0, ed, fixedaddress); + } + break; + case 0xEF: + INST_NAME("PXOR Gx,Ex"); + nextop = F8; + GETG; + if (MODREG && ((nextop & 7) + (rex.b << 3) == gd)) { + // special case for PXOR Gx, Gx + q0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VXOR_V(q0, q0, q0); + } else { + q0 = sse_get_reg(dyn, ninst, x1, gd, 1); + GETEX(q1, 0, 0); + VXOR_V(q0, q0, q1); + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index 9ad715e6..7d8b9961 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -511,6 +511,24 @@ void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st) // TODO } +// SSE / SSE2 helpers +// get lsx register for a SSE reg, create the entry if needed +int sse_get_reg(dynarec_la64_t* dyn, int ninst, int s1, int a, int forwrite) +{ + if (dyn->lsx.ssecache[a].v != -1) { + if (forwrite) { + dyn->lsx.ssecache[a].write = 1; // update only if forwrite + dyn->lsx.lsxcache[dyn->lsx.ssecache[a].reg].t = LSX_CACHE_XMMW; + } + return dyn->lsx.ssecache[a].reg; + } + dyn->lsx.ssecache[a].reg = fpu_get_reg_xmm(dyn, forwrite ? LSX_CACHE_XMMW : LSX_CACHE_XMMR, a); + int ret = dyn->lsx.ssecache[a].reg; + dyn->lsx.ssecache[a].write = forwrite; + VLD(ret, xEmu, offsetof(x64emu_t, xmm[a])); + return ret; +} + // get lsx register for an SSE reg, but don't try to synch it if it needed to be created int sse_get_reg_empty(dynarec_la64_t* dyn, int ninst, int s1, int a) { diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 74470b68..4d8a8d8c 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -174,13 +174,33 @@ gd = i; \ BSTRPICK_D(gd, gb1, gb2 + 7, gb2); +// Get GX as a quad (might use x1) +#define GETGX(a, w) \ + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ + a = sse_get_reg(dyn, ninst, x1, gd, w) + + #define GETGX_empty(a) \ gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ a = sse_get_reg_empty(dyn, ninst, x1, gd) +// Get EX as a quad, (x1 is used) +#define GETEX(a, w, D) \ + if (MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w); \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, D); \ + a = fpu_get_scratch(dyn); \ + VLD(a, ed, fixedaddress); \ + } + // Write gb (gd) back to original register / memory, using s1 as scratch #define GBBACK() BSTRINS_D(gb1, gd, gb2 + 7, gb2); +// Generic get GD, but reg value in gd (R_RAX is not added) +#define GETG gd = ((nextop & 0x38) >> 3) + (rex.r << 3) + // Write eb (ed) back to original register / memory, using s1 as scratch #define EBBACK() \ if (wb1) { \ @@ -527,6 +547,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define x87_forget STEPNAME(x87_forget) #define sse_purge07cache STEPNAME(sse_purge07cache) +#define sse_get_reg STEPNAME(sse_get_reg) #define sse_get_reg_empty STEPNAME(sse_get_reg_empty) #define fpu_pushcache STEPNAME(fpu_pushcache) @@ -600,6 +621,8 @@ void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st); // SSE/SSE2 helpers // purge the XMM0..XMM7 cache (before function call) void sse_purge07cache(dynarec_la64_t* dyn, int ninst, int s1); +// get lsx register for a SSE reg, create the entry if needed +int sse_get_reg(dynarec_la64_t* dyn, int ninst, int s1, int a, int forwrite); // get lsx register for an SSE reg, but don't try to synch it if it needed to be created int sse_get_reg_empty(dynarec_la64_t* dyn, int ninst, int s1, int a); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 117bb99e..9e584062 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -524,6 +524,11 @@ f24-f31 fs0-fs7 Static registers Callee // MemoryStore(GR[rd][63:0], paddr, DOUBLEWORD) #define ST_D(rd, rj, imm12) EMIT(type_2RI12(0b0010100111, imm12, rj, rd)) +#define FLD_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101110, imm12, rj, fd)) +#define FLD_S(fd, rj, imm12) EMIT(type_2RI12(0b0010101100, imm12, rj, fd)) +#define FST_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101111, imm12, rj, fd)) +#define FST_S(fd, rj, imm12) EMIT(type_2RI12(0b0010101101, imm12, rj, fd)) + #define FADD_S(fd, fj, fk) EMIT(type_3R(0b00000001000000001, fk, fj, fd)) #define FADD_D(fd, fj, fk) EMIT(type_3R(0b00000001000000010, fk, fj, fd)) #define FSUB_S(fd, fj, fk) EMIT(type_3R(0b00000001000000101, fk, fj, fd)) @@ -1671,6 +1676,15 @@ LSX instruction starts with V, LASX instruction starts with XV. LD_D(rd, rj, imm12); \ } while (0) +#define FLDxw(rd, rj, imm12) \ + do { \ + if (rex.w) \ + FLD_D(rd, rj, imm12); \ + else \ + FLD_S(rd, rj, imm12); \ + } while (0) + + #define SDxw(rd, rj, imm12) \ do { \ if (rex.w) \ |