diff options
| author | xctan <xctan@cirno.icu> | 2024-05-28 21:04:41 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-28 15:04:41 +0200 |
| commit | 7c5bf62fc0747bdda15d58798d6a5f59a18f2a41 (patch) | |
| tree | c636a4c78abb43f42b9d5cf38594b87a3ede16ff /src/dynarec | |
| parent | b79f86b8d1c864cc58d79730a628e72c56ea960d (diff) | |
| download | box64-7c5bf62fc0747bdda15d58798d6a5f59a18f2a41.tar.gz box64-7c5bf62fc0747bdda15d58798d6a5f59a18f2a41.zip | |
[RV64_DYNAREC] Added more MMX opcodes and some optimizations too (#1535)
* [RV64_DYNAREC] Added 0F F7 MASKMOVQ opcode * [RV64_DYNAREC] Added 0F 38 1C PABSB opcode * [RV64_DYNAREC] Added 0F 38 1E PABSD opcode * [RV64_DYNAREC] Added 0F 38 1D PABSW opcode * [RV64_DYNAREC] Added 0F 63 PACKSSWB opcode * [RV64_DYNAREC] Added 0F FC PADDB opcode * [RV64_DYNAREC] Added 0F D4 PADDQ opcode * [RV64_DYNAREC] Added 0F EC PADDSB opcode and optimized 66 0F EC PADDSB opcode * [RV64_DYNAREC] Added 0F DC PADDUSB opcode and optimized 66 0F DC PADDUSB opcode
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 156 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 35 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 7 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 10 |
4 files changed, 187 insertions, 21 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 7c5dfe01..8fb37279 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -444,6 +444,42 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SB(x3, gback, gdoffset + i); } break; + case 0x1C: + INST_NAME("PABSB Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + for (int i = 0; i < 8; ++i) { + LB(x4, wback, fixedaddress + i); + BGE(x4, xZR, 4 + 4); + NEG(x4, x4); + SB(x4, gback, gdoffset + i); + } + break; + case 0x1D: + INST_NAME("PABSW Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + for (int i = 0; i < 4; ++i) { + LH(x4, wback, fixedaddress + i * 2); + BGE(x4, xZR, 4 + 4); + NEG(x4, x4); + SH(x4, gback, gdoffset + i * 2); + } + break; + case 0x1E: + INST_NAME("PABSD Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + for (int i = 0; i < 2; ++i) { + LW(x4, wback, fixedaddress + i * 4); + BGE(x4, xZR, 4 + 4); + NEG(x4, x4); + SW(x4, gback, gdoffset + i * 4); + } + break; case 0xC8 ... 0xCD: u8 = nextop; switch (u8) { @@ -866,6 +902,44 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LWU(x3, wback, fixedaddress); SW(x3, gback, gdoffset + 4 * 1); break; + case 0x63: + INST_NAME("PACKSSWB Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + MOV64x(x5, 127); + MOV64x(x6, -128); + for (int i = 0; i < 4; ++i) { + LH(x3, gback, gdoffset + i * 2); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BGE(x3, x6, 4 + 4); + MV(x3, x6); + } + SB(x3, gback, gdoffset + i); + } + if (MODREG && gd == ed) { + LW(x3, gback, gdoffset + 0); + SW(x3, gback, gdoffset + 4); + } else + for (int i = 0; i < 4; ++i) { + LH(x3, wback, fixedaddress + i * 2); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BGE(x3, x6, 4 + 4); + MV(x3, x6); + } + SB(x3, gback, gdoffset + 4 + i); + } + break; case 0x67: INST_NAME("PACKUSWB Gm, Em"); nextop = F8; @@ -1909,6 +1983,16 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni gd = xRAX + (opcode & 7) + (rex.b << 3); REV8xw(gd, gd, x1, x2, x3, x4); break; + case 0xD4: + INST_NAME("PADDQ Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + LD(x1, wback, fixedaddress); + LD(x2, gback, gdoffset); + ADD(x1, x1, x2); + SD(x1, gback, gdoffset); + break; case 0xD5: INST_NAME("PMULLW Gm, Em"); nextop = F8; @@ -1933,6 +2017,25 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni AND(x1, x1, x2); SD(x1, gback, gdoffset); break; + case 0xDC: + INST_NAME("PADDUSB Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + ADDI(x5, xZR, 0xFF); + for (int i = 0; i < 8; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); + ADD(x3, x3, x4); + if (rv64_zbb) { + MINU(x3, x3, x5); + } else { + BLT(x3, x5, 8); + ADDI(x3, xZR, 0xFF); + } + SB(x3, gback, gdoffset + i); + } + break; case 0xE2: INST_NAME("PSRAD Gm, Em"); nextop = F8; @@ -2008,6 +2111,34 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni OR(x3, x3, x4); SD(x3, gback, gdoffset); break; + case 0xEC: + INST_NAME("PADDSB Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + ADDI(x5, xZR, 0x7f); + ADDI(x6, xZR, 0xf80); + for (int i = 0; i < 8; ++i) { + // tmp16s = (int16_t)GX->sb[i] + EX->sb[i]; + // GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s); + LB(x3, gback, gdoffset + i); + LB(x4, wback, fixedaddress + i); + ADDW(x3, x3, x4); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + SB(x3, gback, gdoffset + i); + } else { + BLT(x3, x5, 12); // tmp16s>127? + SB(x5, gback, gdoffset + i); + J(20); // continue + BLT(x6, x3, 12); // tmp16s<-128? + SB(x6, gback, gdoffset + i); + J(8); // continue + SB(x3, gback, gdoffset + i); + } + } + break; case 0xED: INST_NAME("PADDSW Gm,Em"); nextop = F8; @@ -2060,6 +2191,18 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SW(x1, gback, gdoffset + i * 4); } break; + case 0xF7: + INST_NAME("MASKMOVQ Gm, Em"); + nextop = F8; + GETGM(); + GETEM(x5, 0); + for (int i = 0; i < 8; i++) { + LB(x1, wback, fixedaddress + i); + BLT(xZR, x1, 4 * 3); + LB(x2, gback, gdoffset + i); + SB(x2, xRDI, i); + } + break; case 0xF9: INST_NAME("PSUBW Gm, Em"); nextop = F8; @@ -2067,6 +2210,19 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x2, 0); MMX_LOOP_W(x3, x4, SUBW(x3, x3, x4)); break; + case 0xFC: + INST_NAME("PADDB Gm, Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + for (int i = 0; i < 8; ++i) { + // GM->sb[i] += EM->sb[i]; + LB(x3, gback, gdoffset + i); + LB(x4, wback, fixedaddress + i); + ADDW(x3, x3, x4); + SB(x3, gback, gdoffset + i); + } + break; case 0xFD: INST_NAME("PADDW Gm, Em"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index f66bb2aa..bc1ab239 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -582,7 +582,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - MOV64x(x5, ~(1 << 31)); for (int i = 0; i < 4; ++i) { LW(x4, wback, fixedaddress + i * 4); BGE(x4, xZR, 4 + 4); @@ -2763,8 +2762,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); ADD(x3, x3, x4); - BLT(x3, x5, 8); - ADDI(x3, xZR, 0xFF); + if (rv64_zbb) { + MINU(x3, x3, x5); + } else { + BLT(x3, x5, 8); + ADDI(x3, xZR, 0xFF); + } SB(x3, gback, gdoffset + i); } break; @@ -2993,23 +2996,27 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); + ADDI(x5, xZR, 0x7f); + ADDI(x6, xZR, 0xf80); for (int i = 0; i < 16; ++i) { // tmp16s = (int16_t)GX->sb[i] + EX->sb[i]; // GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s); LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); ADDW(x3, x3, x4); - SLLIW(x3, x3, 16); - SRAIW(x3, x3, 16); - ADDI(x4, xZR, 0x7f); - BLT(x3, x4, 12); // tmp16s>127? - SB(x4, gback, gdoffset + i); - J(24); // continue - ADDI(x4, xZR, 0xf80); - BLT(x4, x3, 12); // tmp16s<-128? - SB(x4, gback, gdoffset + i); - J(8); // continue - SB(x3, gback, gdoffset + i); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + SB(x3, gback, gdoffset + i); + } else { + BLT(x3, x5, 12); // tmp16s>127? + SB(x5, gback, gdoffset + i); + J(20); // continue + BLT(x6, x3, 12); // tmp16s<-128? + SB(x6, gback, gdoffset + i); + J(8); // continue + SB(x3, gback, gdoffset + i); + } } break; case 0xED: diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 6d7f63b1..6a374499 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -174,12 +174,7 @@ #define GETSEW(i, D) \ if (MODREG) { \ wback = xRAX + (nextop & 7) + (rex.b << 3); \ - if (rv64_zbb) \ - SEXTH(i, wback); \ - else { \ - SLLI(i, wback, 48); \ - SRAI(i, i, 48); \ - } \ + SEXTH(i, wback); \ ed = i; \ wb1 = 0; \ } else { \ diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 8bdca605..fecfef4d 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -821,7 +821,15 @@ f28–31 ft8–11 FP temporaries Caller // Sign-extend byte #define SEXTB(rd, rs) EMIT(R_type(0b0110000, 0b00100, rs, 0b001, rd, 0b0010011)) // Sign-extend half-word -#define SEXTH(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011)) +#define SEXTH_(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011)) +// Sign-extend half-word +#define SEXTH(rd, rs) \ + if (rv64_zbb) \ + SEXTH_(rd, rs); \ + else { \ + SLLI(rd, rs, 48); \ + SRAI(rd, rd, 48); \ + } // Zero-extend half-word #define ZEXTH_(rd, rs) EMIT(R_type(0b0000100, 0b00000, rs, 0b100, rd, 0b0111011)) // Zero-extend half-word |