diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f38.c | 55 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c | 65 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 28 |
3 files changed, 83 insertions, 65 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c index b3088fd2..4e33ca33 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f38.c +++ b/src/dynarec/rv64/dynarec_rv64_660f38.c @@ -141,23 +141,13 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, INST_NAME("PHADDSW Gx, Ex"); nextop = F8; GETGX(); - MOV64x(x5, 32767); - MOV64x(x6, -32768); for (int i = 0; i < 4; ++i) { // tmp32s = GX->sw[i*2+0]+GX->sw[i*2+1]; // GX->sw[i] = sat(tmp32s); LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (cpuext.zbb) { - MIN(x3, x3, x5); - MAX(x3, x3, x6); - } else { - BLT(x3, x5, 4 + 4); - MV(x3, x5); - BLT(x6, x3, 4 + 4); - MV(x3, x6); - } + SAT16(x3, x6); SH(x3, gback, gdoffset + i * 2); } if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { @@ -172,15 +162,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (cpuext.zbb) { - MIN(x3, x3, x5); - MAX(x3, x3, x6); - } else { - BLT(x3, x5, 4 + 4); - MV(x3, x5); - BLT(x6, x3, 4 + 4); - MV(x3, x6); - } + SAT16(x3, x6); SH(x3, gback, gdoffset + 2 * (4 + i)); } } @@ -190,8 +172,6 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, nextop = F8; GETGX(); GETEX(x2, 0, 15); - MOV64x(x5, 32767); - MOV64x(x6, -32768); for (int i = 0; i < 8; ++i) { LBU(x3, gback, gdoffset + i * 2); LB(x4, wback, fixedaddress + i * 2); @@ -200,15 +180,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, LB(x4, wback, fixedaddress + i * 2 + 1); MUL(x3, x3, x4); ADD(x3, x3, x7); - if (cpuext.zbb) { - MIN(x3, x3, x5); - MAX(x3, x3, x6); - } else { - BLT(x3, x5, 4 + 4); - MV(x3, x5); - BLT(x6, x3, 4 + 4); - MV(x3, x6); - } + SAT16(x3, x6); SH(x3, gback, gdoffset + i * 2); } break; @@ -497,18 +469,9 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, nextop = F8; GETGX(); GETEX(x2, 0, 12); - MOV64x(x5, 65535); for (int i = 0; i < 4; ++i) { LW(x3, gback, gdoffset + i * 4); - if (cpuext.zbb) { - MIN(x3, x3, x5); - MAX(x3, x3, xZR); - } else { - BGE(x3, xZR, 4 + 4); - MV(x3, xZR); - BLT(x3, x5, 4 + 4); - MV(x3, x5); - } + SATU16(x3, x5); SH(x3, gback, gdoffset + i * 2); } if (MODREG && gd == ed) { @@ -517,15 +480,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, } else for (int i = 0; i < 4; ++i) { LW(x3, wback, fixedaddress + i * 4); - if (cpuext.zbb) { - MIN(x3, x3, x5); - MAX(x3, x3, xZR); - } else { - BGE(x3, xZR, 4 + 4); - MV(x3, xZR); - BLT(x3, x5, 4 + 4); - MV(x3, x5); - } + SATU16(x3, x5); SH(x3, gback, gdoffset + 8 + i * 2); } break; diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c index bb41fbdf..4e70bc8c 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c @@ -104,9 +104,13 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x01: - INST_NAME("VPHADDW Gx, Vx, Ex"); + case 0x03: + if (opcode == 0x01) + INST_NAME("VPHADDW Gx, Vx, Ex"); + else + INST_NAME("VPHADDSW Gx, Vx, Ex"); nextop = F8; - GETEX(x1, 0, vex.l ? 46 : 14); + GETEX(x1, 0, vex.l ? 30 : 14); GETGX(); GETVX(); GETGY(); @@ -125,6 +129,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, vback, vxoffset + 2 * (i * 2 + 0)); LH(x4, vback, vxoffset + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); + if (opcode == 0x03) SAT16(x3, x6); SH(x3, gback, gdoffset + 2 * i); } if (MODREG && ed == vex.v) { @@ -137,6 +142,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); + if (opcode == 0x03) SAT16(x3, x6); SH(x3, gback, gdoffset + 2 * (4 + i)); } } @@ -156,6 +162,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, vback, vyoffset + 2 * (i * 2 + 0)); LH(x4, vback, vyoffset + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); + if (opcode == 0x03) SAT16(x3, x6); SH(x3, gback, gyoffset + 2 * i); } if (MODREG && ed == vex.v) { @@ -168,6 +175,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); + if (opcode == 0x03) SAT16(x3, x6); SH(x3, gback, gyoffset + 2 * (4 + i)); } } @@ -179,7 +187,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x02: INST_NAME("VPHADDD Gx, Vx, Ex"); nextop = F8; - GETEX(x1, 0, vex.l ? 44 : 12); + GETEX(x1, 0, vex.l ? 28 : 12); GETGX(); GETVX(); GETGY(); @@ -249,10 +257,51 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SD(xZR, gback, gyoffset + 8); } break; + case 0x04: + INST_NAME("VPMADDUBSW Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 31 : 15); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + for (int i = 0; i < 8; ++i) { + LBU(x3, vback, vxoffset + i * 2); + LB(x4, wback, fixedaddress + i * 2); + MUL(x7, x3, x4); + LBU(x3, vback, vxoffset + i * 2 + 1); + LB(x4, wback, fixedaddress + i * 2 + 1); + MUL(x3, x3, x4); + ADD(x3, x3, x7); + SAT16(x3, x6); + SH(x3, gback, gdoffset + i * 2); + } + if (vex.l) { + GETEY(); + for (int i = 0; i < 8; ++i) { + LBU(x3, vback, vyoffset + i * 2); + LB(x4, wback, fixedaddress + i * 2); + MUL(x7, x3, x4); + LBU(x3, vback, vyoffset + i * 2 + 1); + LB(x4, wback, fixedaddress + i * 2 + 1); + MUL(x3, x3, x4); + ADD(x3, x3, x7); + SAT16(x3, x6); + SH(x3, gback, gyoffset + i * 2); + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; case 0x05: - INST_NAME("VPHSUBW Gx, Vx, Ex"); + case 0x07: + if (opcode == 0x05) + INST_NAME("VPHSUBW Gx, Vx, Ex"); + else + INST_NAME("VPHSUBSW Gx, Vx, Ex"); nextop = F8; - GETEX(x1, 0, vex.l ? 46 : 14); + GETEX(x1, 0, vex.l ? 30 : 14); GETGX(); GETVX(); GETGY(); @@ -271,6 +320,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, vback, vxoffset + 2 * (i * 2 + 0)); LH(x4, vback, vxoffset + 2 * (i * 2 + 1)); SUBW(x3, x3, x4); + if (opcode == 0x07) SAT16(x3, x6); SH(x3, gback, gdoffset + 2 * i); } if (MODREG && ed == vex.v) { @@ -283,6 +333,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); SUBW(x3, x3, x4); + if (opcode == 0x07) SAT16(x3, x6); SH(x3, gback, gdoffset + 2 * (4 + i)); } } @@ -302,6 +353,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, vback, vyoffset + 2 * (i * 2 + 0)); LH(x4, vback, vyoffset + 2 * (i * 2 + 1)); SUBW(x3, x3, x4); + if (opcode == 0x07) SAT16(x3, x6); SH(x3, gback, gyoffset + 2 * i); } if (MODREG && ed == vex.v) { @@ -314,6 +366,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); SUBW(x3, x3, x4); + if (opcode == 0x07) SAT16(x3, x6); SH(x3, gback, gyoffset + 2 * (4 + i)); } } @@ -325,7 +378,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x06: INST_NAME("VPHSUBD Gx, Vx, Ex"); nextop = F8; - GETEX(x1, 0, vex.l ? 44 : 12); + GETEX(x1, 0, vex.l ? 28 : 12); GETGX(); GETVX(); GETGY(); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 973f21fa..8f01750d 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1950,15 +1950,25 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, #define PURGE_YMM() -// reg = (reg < -32768) ? -32768 : ((reg > 32767) ? 32767 : reg) -#define SAT16(reg, s) \ - LUI(s, 0xFFFF8); /* -32768 */ \ - BGE(reg, s, 4 + 2 * 4); \ - MV(reg, s); \ - J(4 + 4 * 3); \ - LUI(s, 8); /* 32768 */ \ - BLT(reg, s, 4 + 4); \ - ADDIW(reg, s, -1); +// TODO: zbb? +#define SAT16(reg, s) \ + do { \ + LUI(s, 0xFFFF8); /* -32768 */ \ + BGE(reg, s, 4 + 4); \ + MV(reg, s); \ + LUI(s, 0x8); /* 32768 */ \ + BLT(reg, s, 4 + 4); \ + ADDIW(reg, s, -1); \ + } while (0) + +#define SATU16(reg, s) \ + do { \ + LUI(s, 0x10); /* 65536 */ \ + BGE(reg, xZR, 4 + 4); \ + MV(reg, xZR); \ + BLT(reg, s, 4 + 4); \ + ADDIW(reg, s, -1); \ + } while (0) #define FAST_8BIT_OPERATION(dst, src, s1, OP) \ if (MODREG && (cpuext.zbb || cpuext.xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ |