diff options
| author | xctan <xctan@cirno.icu> | 2024-05-30 20:35:27 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-30 14:35:27 +0200 |
| commit | f3d733c3ff27c7127366c65f6ee898a2aeb50fbd (patch) | |
| tree | f0411aee9a272a57751bf33ae9cb94386cd3a200 /src | |
| parent | 19ca78769f4730f751d2baca34cc4358ef68553b (diff) | |
| download | box64-f3d733c3ff27c7127366c65f6ee898a2aeb50fbd.tar.gz box64-f3d733c3ff27c7127366c65f6ee898a2aeb50fbd.zip | |
[RV64_DYNAREC] Added more MMX opcodes and some optimizations too (#1539)
* [RV64_DYNAREC] Added 0F DF PANDN opcode * [RV64_DYNAREC] Added 0F E0 PAVGB opcode * [RV64_DYNAREC] Added 0F E3 PAVGW opcode * [RV64_DYNAREC] Added 0F 74 PCMPEQB opcode * [RV64_DYNAREC] Added 0F 76 PCMPEQD opcode * [RV64_DYNAREC] Added 0F 64 PCMPGTB opcode * [RV64_DYNAREC] Added 0F 66 PCMPGTD opcode and optimized 66 0F 66 PCMPGTD opcode * [RV64_DYNAREC] Added 0F 65 PCMPGTW opcode * [RV64_DYNAREC] Added 0F C5 PEXTRW opcode * [RV64_DYNAREC] Added 0F 38 02 PHADDD opcode * [RV64_DYNAREC] Optimized packed saturate add/sub * [RV64_DYNAREC] Added 0F 38 03 PHADDSW opcode * [RV64_DYNAREC] Added 0F 38 01 PHADDW opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 232 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 116 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 18 |
3 files changed, 306 insertions, 60 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 46313b6d..a7f96a32 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -444,6 +444,103 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SB(x3, gback, gdoffset + i); } break; + case 0x01: + INST_NAME("PHADDW Gm, Em"); + nextop = F8; + GETGM(); + for (int i = 0; i < 2; ++i) { + // tmp32s = GX->sw[i*2+0]+GX->sw[i*2+1]; + // GX->sw[i] = sat(tmp32s); + LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); + LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SH(x3, gback, gdoffset + i * 2); + } + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + // GM->d[1] = GM->d[0]; + LW(x3, gback, gdoffset + 0); + SW(x3, gback, gdoffset + 4); + } else { + GETEM(x2, 0); + for (int i = 0; i < 2; ++i) { + // tmp32s = EX->sw[i*2+0] + EX->sw[i*2+1]; + // GX->sw[4+i] = sat(tmp32s); + LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); + LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SH(x3, gback, gdoffset + 2 * (2 + i)); + } + } + break; + case 0x02: + INST_NAME("PHADDD Gm, Em"); + nextop = F8; + GETGM(); + // GM->sd[0] += GM->sd[1]; + LW(x3, gback, gdoffset + 0 * 4); + LW(x4, gback, gdoffset + 1 * 4); + ADDW(x3, x3, x4); + SW(x3, gback, gdoffset + 0 * 4); + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + // GM->sd[1] = GM->sd[0]; + SW(x3, gback, gdoffset + 1 * 4); + } else { + GETEM(x2, 0); + // GM->sd[1] = EM->sd[0] + EM->sd[1]; + LW(x3, wback, fixedaddress + 0 * 4); + LW(x4, wback, fixedaddress + 1 * 4); + ADDW(x3, x3, x4); + SW(x3, gback, gdoffset + 1 * 4); + } + break; + case 0x03: + INST_NAME("PHADDSW Gm, Em"); + nextop = F8; + GETGM(); + MOV64x(x5, 32767); + MOV64x(x6, -32768); + for (int i = 0; i < 2; ++i) { + // tmp32s = GX->sw[i*2+0]+GX->sw[i*2+1]; + // GX->sw[i] = sat(tmp32s); + LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); + LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); + ADDW(x3, x3, x4); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); + } + SH(x3, gback, gdoffset + i * 2); + } + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + // GM->d[1] = GM->d[0]; + LW(x3, gback, gdoffset + 0); + SW(x3, gback, gdoffset + 4); + } else { + GETEM(x2, 0); + for (int i = 0; i < 2; ++i) { + // tmp32s = EX->sw[i*2+0] + EX->sw[i*2+1]; + // GX->sw[4+i] = sat(tmp32s); + LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); + LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); + ADDW(x3, x3, x4); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); + } + SH(x3, gback, gdoffset + 2 * (2 + i)); + } + } + break; case 0x1C: INST_NAME("PABSB Gm,Em"); nextop = F8; @@ -968,6 +1065,34 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SB(x3, gback, gdoffset + 4 + i); } break; + case 0x64: + INST_NAME("PCMPGTB Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + for (int i = 0; i < 8; ++i) { + // GX->ub[i] = (GX->sb[i]>EX->sb[i])?0xFF:0x00; + LB(x3, wback, fixedaddress + i); + LB(x4, gback, gdoffset + i); + SLT(x3, x3, x4); + NEG(x3, x3); + SB(x3, gback, gdoffset + i); + } + break; + case 0x65: + INST_NAME("PCMPGTW Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + MMX_LOOP_WS(x3, x4, SLT(x3, x4, x3); NEG(x3, x3)); + break; + case 0x66: + INST_NAME("PCMPGTD Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + MMX_LOOP_DS(x3, x4, SLT(x3, x4, x3); NEG(x3, x3)); + break; case 0x67: INST_NAME("PACKUSWB Gm, Em"); nextop = F8; @@ -1280,6 +1405,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0x74: + INST_NAME("PCMPEQB Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + for (int i = 0; i < 8; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); + SUB(x3, x3, x4); + SEQZ(x3, x3); + NEG(x3, x3); + SB(x3, gback, gdoffset + i); + } + break; case 0x75: INST_NAME("PCMPEQW Gm,Em"); nextop = F8; @@ -1287,6 +1426,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x2, 0); MMX_LOOP_W(x3, x4, SUB(x3, x3, x4); SEQZ(x3, x3); NEG(x3, x3)); break; + case 0x76: + INST_NAME("PCMPEQD Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + MMX_LOOP_D(x3, x4, SUB(x3, x3, x4); SEQZ(x3, x3); NEG(x3, x3)); + break; case 0x77: INST_NAME("EMMS"); // empty MMX, FPU now usable @@ -1976,6 +2122,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SDxw(gd, ed, fixedaddress); } break; + case 0xC5: + INST_NAME("PEXTRW Gd,Em,Ib"); + nextop = F8; + GETGD; + GETEM(x2, 0); + u8 = (F8)&3; + LHU(gd, wback, fixedaddress + u8 * 2); + break; case 0xC6: // TODO: Optimize this! INST_NAME("SHUFPS Gx, Ex, Ib"); nextop = F8; @@ -2085,6 +2239,35 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SH(x3, gback, gdoffset + i * 2); } break; + case 0xDF: + INST_NAME("PANDN Gm, Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + LD(x1, gback, gdoffset); + LD(x3, wback, fixedaddress); + if (rv64_zbb) { + ANDN(x1, x3, x1); + } else { + NOT(x1, x1); + AND(x1, x1, x3); + } + SD(x1, gback, gdoffset); + break; + case 0xE0: + INST_NAME("PAVGB Gm, Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + for (int i = 0; i < 8; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); + ADDW(x3, x3, x4); + ADDIW(x3, x3, 1); + SRAIW(x3, x3, 1); + SB(x3, gback, gdoffset + i); + } + break; case 0xE2: INST_NAME("PSRAD Gm, Em"); nextop = F8; @@ -2104,6 +2287,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SW(x3, gback, gdoffset + 4 * i); } break; + case 0xE3: + INST_NAME("PAVGW Gm,Em"); + nextop = F8; + GETGM(); + GETEM(x2, 0); + for (int i = 0; i < 4; ++i) { + LHU(x3, gback, gdoffset + 2 * i); + LHU(x4, wback, fixedaddress + 2 * i); + ADDW(x3, x3, x4); + ADDIW(x3, x3, 1); + SRAIW(x3, x3, 1); + SH(x3, gback, gdoffset + 2 * i); + } + break; case 0xE5: INST_NAME("PMULHW Gm,Em"); nextop = F8; @@ -2165,8 +2362,8 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGM(); GETEM(x2, 0); - ADDI(x5, xZR, 0x7f); - ADDI(x6, xZR, 0xf80); + MOV64x(x5, 127); + MOV64x(x6, -128); for (int i = 0; i < 8; ++i) { // tmp16s = (int16_t)GX->sb[i] + EX->sb[i]; // GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s); @@ -2176,16 +2373,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (rv64_zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); - SB(x3, gback, gdoffset + i); } else { - BLT(x3, x5, 12); // tmp16s>127? - SB(x5, gback, gdoffset + i); - J(20); // continue - BLT(x6, x3, 12); // tmp16s<-128? - SB(x6, gback, gdoffset + i); - J(8); // continue - SB(x3, gback, gdoffset + i); + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); } + SB(x3, gback, gdoffset + i); } break; case 0xED: @@ -2193,19 +2387,23 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGM(); GETEM(x2, 0); + MOV64x(x5, 32767); + MOV64x(x6, -32768); for (int i = 0; i < 4; ++i) { // tmp32s = (int32_t)GM->sw[i] + EM->sw[i]; // GM->sw[i] = (tmp32s>32767)?32767:((tmp32s<-32768)?-32768:tmp32s); LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); ADDW(x3, x3, x4); - LUI(x4, 0xFFFF8); // -32768 - BGE(x3, x4, 12); - SH(x4, gback, gdoffset + 2 * i); - J(20); // continue - LUI(x4, 8); // 32768 - BLT(x3, x4, 8); - ADDIW(x3, x4, -1); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); + } SH(x3, gback, gdoffset + 2 * i); } break; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 019c6cf6..cb6831ee 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -349,14 +349,24 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PHADDSW Gx, Ex"); nextop = F8; GETGX(); + MOV64x(x5, 32767); + MOV64x(x6, -32768); for (int i = 0; i < 4; ++i) { // tmp32s = GX->sw[i*2+0]+GX->sw[i*2+1]; // GX->sw[i] = sat(tmp32s); LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - SAT16(x3, x4); - SH(x3, gback, gdoffset + 2 * i); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); + } + SH(x3, gback, gdoffset + i * 2); } if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { // GX->q[1] = GX->q[0]; @@ -370,13 +380,21 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - SAT16(x3, x4); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); + } SH(x3, gback, gdoffset + 2 * (4 + i)); } } break; case 0x04: - INST_NAME("PADDUBSW Gx, Ex"); + INST_NAME("PMADDUBSW Gx, Ex"); nextop = F8; GETGX(); GETEX(x2, 0); @@ -1732,7 +1750,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETEX(x1, 0); GETGX(); - SSE_LOOP_DS(x3, x4, SLT(x4, x4, x3); SLLI(x3, x4, 63); SRAI(x3, x3, 63)); + SSE_LOOP_DS(x3, x4, SLT(x4, x4, x3); NEG(x3, x4)); break; case 0x67: INST_NAME("PACKUSWB Gx, Ex"); @@ -2754,9 +2772,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); SUB(x3, x3, x4); - NOT(x4, x3); - SRAI(x4, x4, 63); - AND(x3, x3, x4); + if (rv64_zbb) { + MAX(x3, x3, xZR); + } else { + NOT(x4, x3); + SRAI(x4, x4, 63); + AND(x3, x3, x4); + } SB(x3, gback, gdoffset + i); } break; @@ -2765,7 +2787,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - SSE_LOOP_W(x3, x4, SUB(x3, x3, x4); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4)); + SSE_LOOP_W(x3, x4, + SUB(x3, x3, x4); + if (rv64_zbb) { + MAX(x3, x3, xZR); + } else { + NOT(x4, x3); + SRAI(x4, x4, 63); + AND(x3, x3, x4); + } + SH(x3, gback, gdoffset + i * 2); + ); break; case 0xDA: INST_NAME("PMINUB Gx, Ex"); @@ -2976,22 +3008,23 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); + ADDI(x5, xZR, 0x7f); + ADDI(x6, xZR, 0xf80); for (int i = 0; i < 16; ++i) { // tmp16s = (int16_t)GX->sb[i] - EX->sb[i]; // GX->sb[i] = (tmp16s<-128)?-128:((tmp16s>127)?127:tmp16s); LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); SUBW(x3, x3, x4); - SLLIW(x3, x3, 16); - SRAIW(x3, x3, 16); - ADDI(x4, xZR, 0x7f); - BLT(x3, x4, 12); // tmp16s>127? - SB(x4, gback, gdoffset + i); - J(24); // continue - ADDI(x4, xZR, 0xf80); - BLT(x4, x3, 12); // tmp16s<-128? - SB(x4, gback, gdoffset + i); - J(8); // continue + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); + } SB(x3, gback, gdoffset + i); } break; @@ -3000,14 +3033,24 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); + MOV64x(x5, 32767); + MOV64x(x6, -32768); for (int i = 0; i < 8; ++i) { // tmp32s = (int32_t)GX->sw[i] - EX->sw[i]; // GX->sw[i] = sat16(tmp32s); LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); SUBW(x3, x3, x4); - SAT16(x3, x4); - SH(x3, gback, gdoffset + 2 * i); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); + } + SH(x3, gback, gdoffset + i * 2); } break; case 0xEA: @@ -3035,8 +3078,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - ADDI(x5, xZR, 0x7f); - ADDI(x6, xZR, 0xf80); + MOV64x(x5, 127); + MOV64x(x6, -128); for (int i = 0; i < 16; ++i) { // tmp16s = (int16_t)GX->sb[i] + EX->sb[i]; // GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s); @@ -3046,16 +3089,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if (rv64_zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); - SB(x3, gback, gdoffset + i); } else { - BLT(x3, x5, 12); // tmp16s>127? - SB(x5, gback, gdoffset + i); - J(20); // continue - BLT(x6, x3, 12); // tmp16s<-128? - SB(x6, gback, gdoffset + i); - J(8); // continue - SB(x3, gback, gdoffset + i); + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); } + SB(x3, gback, gdoffset + i); } break; case 0xED: @@ -3063,14 +3103,24 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); + MOV64x(x5, 32767); + MOV64x(x6, -32768); for (int i = 0; i < 8; ++i) { // tmp32s = (int32_t)GX->sw[i] + EX->sw[i]; // GX->sw[i] = sat16(tmp32s); LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); ADDW(x3, x3, x4); - SAT16(x3, x4); - SH(x3, gback, gdoffset + 2 * i); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BLT(x6, x3, 4 + 4); + MV(x3, x6); + } + SH(x3, gback, gdoffset + i * 2); } break; case 0xEE: diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index fd680474..2471c71c 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -548,6 +548,14 @@ SW(GX1, gback, gdoffset + i * 4); \ } +#define MMX_LOOP_DS(GX1, EX1, F) \ + for (int i = 0; i < 2; ++i) { \ + LW(GX1, gback, gdoffset + i * 4); \ + LW(EX1, wback, fixedaddress + i * 4); \ + F; \ + SW(GX1, gback, gdoffset + i * 4); \ + } + #define MMX_LOOP_W(GX1, EX1, F) \ for (int i = 0; i < 4; ++i) { \ LHU(GX1, gback, gdoffset + i * 2); \ @@ -1661,16 +1669,6 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int #define FCOMIS(v1, v2, s1, s2, s3, s4, s5) FCOMI(S, v1, v2, s1, s2, s3, s4, s5) #define FCOMID(v1, v2, s1, s2, s3, s4, s5) FCOMI(D, v1, v2, s1, s2, s3, s4, s5) -// reg = (reg < -32768) ? -32768 : ((reg > 32767) ? 32767 : reg) -#define SAT16(reg, s) \ - LUI(s, 0xFFFF8); /* -32768 */ \ - BGE(reg, s, 4 + 2 * 4); \ - MV(reg, s); \ - J(4 + 4 * 3); \ - LUI(s, 8); /* 32768 */ \ - BLT(reg, s, 4 + 4); \ - ADDIW(reg, s, -1); - #define PURGE_YMM0() /* TODO */ #endif //__DYNAREC_RV64_HELPER_H__ |