diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-04-09 01:22:00 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-08 19:22:00 +0200 |
| commit | d9c30c8942888d609d89df5b8ea29071d2663b46 (patch) | |
| tree | 2e0275ef7f49151fd2d286e8bc98c16d493d3d11 /src | |
| parent | 6f3d70c69f2b3ebd013a36bded8efe61e9dbc463 (diff) | |
| download | box64-d9c30c8942888d609d89df5b8ea29071d2663b46.tar.gz box64-d9c30c8942888d609d89df5b8ea29071d2663b46.zip | |
[RV64_DYNAREC] Added more opcodes for SV and some fixes (#676)
* [RV64_DYNAREC] Added 66 0F 69 PUNPCKHWD opcode * [RV64_DYNAREC] Added 66 0F D7 PMOVMSKB opcode * [RV64_DYNAREC] Added 00 ADD opcode * [RV64_DYNAREC] Fixed emit_xor32 * [RV64_DYNAREC] Added 66 0F 5C SUBPD opcode * [RV64_DYNAREC] Added 66 0F EE PMAXSW opcode * [RV64_DYNAREC] Added 66 0F 11 MOVUPD opcode * [RV64_DYNAREC] Added 66 0F 10 MOVUPD opcode * [RV64_DYNAREC] Added 66 0F 29 MOVAPD opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00.c | 9 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 75 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_logic.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 43 |
4 files changed, 114 insertions, 15 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 2454662d..34c3fedb 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -53,6 +53,15 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(cacheupd); switch(opcode) { + case 0x00: + INST_NAME("ADD Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_add8(dyn, ninst, x1, x2, x4, x5); + EBBACK(x5, 0); + break; case 0x01: INST_NAME("ADD Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index cbffe1c1..2a19e9ec 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -49,6 +49,21 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MAYUSE(j64); switch(opcode) { + case 0x10: + INST_NAME("MOVUPD Gx,Ex"); + nextop = F8; + GETEX(x1, 0); + GETGX(x2); + SSE_LOOP_MV_Q(x3); + break; + case 0x11: + INST_NAME("MOVUPD Ex,Gx"); + nextop = F8; + GETEX(x1, 0); + GETGX(x2); + SSE_LOOP_MV_Q2(x3); + if(!MODREG) SMWRITE2(); + break; case 0x14: INST_NAME("UNPCKLPD Gx, Ex"); nextop = F8; @@ -92,6 +107,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(x2); SSE_LOOP_MV_Q(x3); break; + case 0x29: + INST_NAME("MOVAPD Ex,Gx"); + nextop = F8; + GETEX(x1, 0); + GETGX(x2); + SSE_LOOP_MV_Q2(x3); + if(!MODREG) SMWRITE2(); + break; case 0x2E: // no special check... case 0x2F: @@ -155,6 +178,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(x2); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); break; + case 0x5C: + INST_NAME("SUBPD Gx, Ex"); + nextop = F8; + //TODO: fastnan handling + GETEX(x1, 0); + GETGX(x2); + SSE_LOOP_FQ(x3, x4, FSUBD(v0, v0, v1)); + break; case 0x61: INST_NAME("PUNPCKLWD Gx,Ex"); nextop = F8; @@ -194,6 +225,30 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LWU(x3, x1, fixedaddress+0*4); SW(x3, x2, 1*4); break; + case 0x69: + INST_NAME("PUNPCKHWD Gx,Ex"); + nextop = F8; + GETEX(x1, 0); + GETGX(x2); + for(int i=0; i<4; ++i) { + // GX->uw[2 * i] = GX->uw[i + 4]; + LHU(x3, gback, (i+4)*2); + SH(x3, gback, 2*i*2); + } + if (MODREG && (ed==gd)) { + for(int i=0; i<4; ++i) { + // GX->uw[2 * i + 1] = GX->uw[2 * i]; + LHU(x3, gback, 2*i*2); + SH(x3, gback, (2*i+1)*2); + } + } else { + for(int i=0; i<4; ++i) { + // GX->uw[2 * i + 1] = EX->uw[i + 4]; + LHU(x3, wback, fixedaddress+(i+4)*2); + SH(x3, gback, (2*i+1)*2); + } + } + break; case 0x6C: INST_NAME("PUNPCKLQDQ Gx,Ex"); nextop = F8; @@ -485,6 +540,19 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SMWRITE2(); } break; + case 0xD7: + INST_NAME("PMOVMSKB Gd, Ex"); + nextop = F8; + GETEX(x2, 0); + GETGD; + MV(gd, xZR); + for (int i=0; i<16; ++i) { + LB(x1, wback, fixedaddress+i); + SLT(x3, x1, xZR); + if (i > 0) SLLI(x3, x3, i); + OR(gd, gd, x3); + } + break; case 0xDB: INST_NAME("PAND Gx,Ex"); nextop = F8; @@ -506,6 +574,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); SSE_LOOP_Q(x3, x4, OR(x3, x3, x4)); break; + case 0xEE: + INST_NAME("PMAXSW Gx,Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + SSE_LOOP_WS(x3, x4, BGE(x3, x4, 8); MV(x3, x4)); + break; case 0xEF: INST_NAME("PXOR Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_logic.c b/src/dynarec/rv64/dynarec_rv64_emit_logic.c index 693a15c1..6d17895f 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_logic.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_logic.c @@ -93,6 +93,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // test sign bit before zeroup. IFX(X_SF) { + if (!rex.w) SEXT_W(s1, s1); BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } @@ -132,6 +133,7 @@ void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i // test sign bit before zeroup. IFX(X_SF) { + if (!rex.w) SEXT_W(s1, s1); BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 62aaf79d..b1947e7f 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -335,12 +335,6 @@ F; \ SW(GX1, gback, i*4); -#define SSE_LOOP_W_ITEM(GX1, EX1, F, i) \ - LHU(GX1, gback, i*2); \ - LHU(EX1, wback, fixedaddress+i*2); \ - F; \ - SH(GX1, gback, i*2); - // Loop for SSE opcode that use 32bits value and write to GX. #define SSE_LOOP_D(GX1, EX1, F) \ SSE_LOOP_D_ITEM(GX1, EX1, F, 0) \ @@ -348,16 +342,21 @@ SSE_LOOP_D_ITEM(GX1, EX1, F, 2) \ SSE_LOOP_D_ITEM(GX1, EX1, F, 3) -#define SSE_LOOP_W(GX1, EX1, F) \ - SSE_LOOP_W_ITEM(GX1, EX1, F, 0) \ - SSE_LOOP_W_ITEM(GX1, EX1, F, 1) \ - SSE_LOOP_W_ITEM(GX1, EX1, F, 2) \ - SSE_LOOP_W_ITEM(GX1, EX1, F, 3) \ - SSE_LOOP_W_ITEM(GX1, EX1, F, 4) \ - SSE_LOOP_W_ITEM(GX1, EX1, F, 5) \ - SSE_LOOP_W_ITEM(GX1, EX1, F, 6) \ - SSE_LOOP_W_ITEM(GX1, EX1, F, 7) +#define SSE_LOOP_W(GX1, EX1, F) \ + for (int i=0; i<8; ++i) { \ + LHU(GX1, gback, i*2); \ + LHU(EX1, wback, fixedaddress+i*2); \ + F; \ + SH(GX1, gback, i*2); \ + } +#define SSE_LOOP_WS(GX1, EX1, F) \ + for (int i=0; i<8; ++i) { \ + LH(GX1, gback, i*2); \ + LH(EX1, wback, fixedaddress+i*2); \ + F; \ + SH(GX1, gback, i*2); \ + } #define SSE_LOOP_DS_ITEM(EX1, F, i) \ LWU(EX1, wback, fixedaddress+i*4); \ @@ -382,6 +381,20 @@ SSE_LOOP_Q_ITEM(GX1, EX1, F, 0) \ SSE_LOOP_Q_ITEM(GX1, EX1, F, 1) + +#define SSE_LOOP_FQ_ITEM(GX1, EX1, F, i) \ + v0 = sse_get_reg_empty(dyn, ninst, x5, GX1, 0); \ + FLD(v0, gback, i*8); \ + v1 = sse_get_reg_empty(dyn, ninst, x5, EX1, 0); \ + FLD(v1, wback, fixedaddress+i*8); \ + F; \ + FSD(v0, gback, i*8); + +#define SSE_LOOP_FQ(GX1, EX1, F) \ + SSE_LOOP_FQ_ITEM(GX1, EX1, F, 0) \ + SSE_LOOP_FQ_ITEM(GX1, EX1, F, 1) + + #define SSE_LOOP_MV_Q_ITEM(s, i) \ LD(s, wback, fixedaddress+i*8); \ SD(s, gback, i*8); |