diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-09-20 17:30:11 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-20 11:30:11 +0200 |
| commit | cd9d19a2bb0bbc411c3e32f9ec6b46df0ce5bfb1 (patch) | |
| tree | 40e3b22b1a432fcdf3ac2ff930eec0fb0eaf06c0 | |
| parent | ca9e43d0f0542d32d48f55a567121feb6666c6fa (diff) | |
| download | box64-cd9d19a2bb0bbc411c3e32f9ec6b46df0ce5bfb1.tar.gz box64-cd9d19a2bb0bbc411c3e32f9ec6b46df0ce5bfb1.zip | |
[RV64_DYNAREC] Added more opcodes and fixed more issues for vector (#1842)
* [RV64_DYNAREC] Added 66 0F 6D PUNPCKHQDQ opcode * [RV64_DYNAREC] Added more opcode * [RV64_DYNAREC] Added more opcodes for vector * [RV64_DYNAREC] Added more opcodes * [RV64_DYNAREC] Added more opcodes * [RV64_DYNAREC] Added more opcode * More fixes
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 21 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 130 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 11 |
3 files changed, 148 insertions, 14 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 91d44c56..96f7e7ee 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -140,6 +140,27 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VSLIDEUP_VI(v0, 8, v1, VECTOR_UNMASKED); } break; + case 0x17: + INST_NAME("MOVHPS Ex, Gx"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); + // EX->q[0] = GX->q[1]; + if (MODREG) { + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); + q0 = fpu_get_scratch(dyn); + VSLIDE1DOWN_VX(q0, xZR, v0, VECTOR_UNMASKED); + VMV_X_S(x4, q0); + VMV_S_X(v1, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + q0 = fpu_get_scratch(dyn); + VSLIDE1DOWN_VX(q0, xZR, v0, VECTOR_UNMASKED); + VMV_X_S(x4, q0); + SD(x4, ed, fixedaddress); + SMWRITE2(); + } + break; case 0x28: INST_NAME("MOVAPS Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 4f79aa9e..aad39907 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -815,6 +815,29 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VLUXEI64_V(v0, ed, q0, VECTOR_MASKED, VECTOR_NFIELD1); } break; + case 0x6D: + INST_NAME("PUNPCKHQDQ Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + // GX->q[0] = GX->q[1]; + // GX->q[1] = EX->q[1]; + GETGX_vector(v0, 1, VECTOR_SEW64); + if (MODREG) { + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); + q0 == fpu_get_scratch(dyn); + VSLIDE1DOWN_VX(q0, xZR, v0, VECTOR_UNMASKED); + VMV_X_S(x4, q0); + if (v0 != v1) { VMV_V_V(v0, v1); } + VMV_S_X(v0, x4); + } else { + q0 = fpu_get_scratch(dyn); + VMV_V_I(VMASK, 0b10); + VSLIDE1DOWN_VX(v0, xZR, v0, VECTOR_UNMASKED); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); + VLE64_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1); + } + break; case 0x6E: INST_NAME("MOVD Gx, Ed"); nextop = F8; @@ -975,6 +998,38 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VSE_V(v1, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } break; + case 0xD1: + case 0xD2: + if (opcode == 0xD1) { + INST_NAME("PSRLW Gx, Ex"); + u8 = VECTOR_SEW16; + i32 = 16; + } else { + INST_NAME("PSRLD Gx, Ex"); + u8 = VECTOR_SEW32; + i32 = 32; + } + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(q0, 1, VECTOR_SEW64); + if (MODREG) { + q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); + } else { + VMV_V_I(VMASK, 0b01); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); + q1 = fpu_get_scratch(dyn); + VLE_V(q1, ed, VECTOR_SEW64, VECTOR_MASKED, VECTOR_NFIELD1); + } + VMV_X_S(x4, q1); + ADDI(x5, xZR, i32); + SET_ELEMENT_WIDTH(x1, u8, 1); + BLTU_MARK(x4, x5); + VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); + B_NEXT_nocond; + MARK; + VSRL_VX(q0, x4, q0, VECTOR_UNMASKED); + break; case 0xD4: INST_NAME("PADDQ Gx, Ex"); nextop = F8; @@ -1026,12 +1081,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, u8, 1); GETGX_vector(q0, 1, u8); GETEX_vector(q1, 0, 0, u8); - v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); - VWSUBU_VV(v0, q1, q0, VECTOR_UNMASKED); - vector_vsetvli(dyn, ninst, x1, u8 + 1, rv64_vlen == 128 ? VECTOR_LMUL2 : VECTOR_LMUL1, 2); - VMAX_VX(v0, xZR, v0, VECTOR_UNMASKED); - vector_vsetvli(dyn, ninst, x1, u8, VECTOR_LMUL1, 1); - VNSRL_WX(q0, xZR, v0, VECTOR_UNMASKED); + VSSUBU_VV(q0, q1, q0, VECTOR_UNMASKED); + break; + case 0xDA: + INST_NAME("PMINUB Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGX_vector(q0, 1, VECTOR_SEW8); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); + VMINU_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xDB: INST_NAME("PAND Gx, Ex"); @@ -1041,6 +1099,21 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, dyn->vector_eew); VAND_VV(q0, q0, q1, VECTOR_UNMASKED); break; + case 0xDC: + case 0xDD: + if (opcode == 0xDC) { + INST_NAME("PADDUSB Gx, Ex"); + u8 = VECTOR_SEW8; + } else { + INST_NAME("PADDUSW Gx, Ex"); + u8 = VECTOR_SEW16; + } + nextop = F8; + SET_ELEMENT_WIDTH(x1, u8, 1); + GETGX_vector(q0, 1, u8); + GETEX_vector(q1, 0, 0, u8); + VSADDU_VV(q0, q1, q0, VECTOR_UNMASKED); + break; case 0xDF: INST_NAME("PANDN Gx, Ex"); nextop = F8; @@ -1050,6 +1123,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VI(q0, 0x1F, q0, VECTOR_UNMASKED); VAND_VV(q0, q0, q1, VECTOR_UNMASKED); break; + case 0xE0: + INST_NAME("PAVGB Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGX_vector(q0, 1, VECTOR_SEW8); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); + CSRRWI(xZR, 0b00 /* rnu */, 0x00A /* vxrm */); + VAADDU_VV(q0, q1, q0, VECTOR_UNMASKED); + break; case 0xE1: INST_NAME("PSRAW Gx,Ex"); nextop = F8; @@ -1141,12 +1223,44 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q0, q1, VECTOR_UNMASKED); } break; - case 0xF9: - INST_NAME("PSUBW Gx, Ex"); + case 0xF5: + INST_NAME("PMADDWD Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VWMUL_VV(v0, q0, q1, VECTOR_UNMASKED); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // warning, no more scratches! + ADDI(x4, xZR, 6); + VID_V(d0, VECTOR_UNMASKED); + VSLL_VI(d0, 1, d0, VECTOR_UNMASKED); // times 2 + VMIN_VX(d0, x4, d0, VECTOR_UNMASKED); + VADD_VI(q0, 1, d0, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2); + VRGATHEREI16_VV(v1, d0, v0, VECTOR_UNMASKED); // 6 4 2 0 + VRGATHEREI16_VV(d0, q0, v0, VECTOR_UNMASKED); // 7 5 3 1 + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VADD_VV(q0, d0, v1, VECTOR_UNMASKED); + break; + case 0xF8: + case 0xF9: + case 0xFA: + if (opcode == 0xF8) { + INST_NAME("PSUBB Gx, Ex"); + u8 = VECTOR_SEW8; + } else if (opcode == 0xF9) { + INST_NAME("PSUBW Gx, Ex"); + u8 = VECTOR_SEW16; + } else { + INST_NAME("PSUBD Gx, Ex"); + u8 = VECTOR_SEW32; + } + nextop = F8; + SET_ELEMENT_WIDTH(x1, u8, 1); + GETGX_vector(q0, 1, u8); + GETEX_vector(q1, 0, 0, u8); VSUB_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xFC ... 0xFE: diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index 9ff6b4ff..619041be 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -42,7 +42,7 @@ int fpu_get_scratch(dynarec_rv64_t* dyn) int fpu_get_scratch_lmul(dynarec_rv64_t* dyn, int lmul) { int reg = SCRATCH0 + dyn->e.fpu_scratch; - int skip = (1 << lmul) - (reg % (1 << lmul)); + int skip = (reg % (1 << lmul)) ? (1 << lmul) - (reg % (1 << lmul)) : 0; dyn->e.fpu_scratch += skip + 1; return reg + skip; } @@ -92,17 +92,18 @@ int fpu_get_reg_xmm(dynarec_rv64_t* dyn, int t, int xmm) return EXTREG(i); } // Reset fpu regs counter -void fpu_reset_reg_extcache(extcache_t* e) +void fpu_reset_reg_extcache(dynarec_rv64_t* dyn, extcache_t* e) { e->fpu_reg = 0; for (int i=0; i<24; ++i) { e->fpuused[i]=0; e->extcache[i].v = 0; } + dyn->vector_sew = VECTOR_SEWNA; } void fpu_reset_reg(dynarec_rv64_t* dyn) { - fpu_reset_reg_extcache(&dyn->e); + fpu_reset_reg_extcache(dyn, &dyn->e); } int extcache_no_i64(dynarec_rv64_t* dyn, int ninst, int st, int a) @@ -732,7 +733,6 @@ void fpu_reset(dynarec_rv64_t* dyn) mmx_reset(&dyn->e); sse_reset(&dyn->e); fpu_reset_reg(dyn); - dyn->vector_sew = VECTOR_SEWNA; } void fpu_reset_ninst(dynarec_rv64_t* dyn, int ninst) @@ -740,8 +740,7 @@ void fpu_reset_ninst(dynarec_rv64_t* dyn, int ninst) x87_reset(&dyn->insts[ninst].e); mmx_reset(&dyn->insts[ninst].e); sse_reset(&dyn->insts[ninst].e); - fpu_reset_reg_extcache(&dyn->insts[ninst].e); - dyn->vector_sew = VECTOR_SEWNA; + fpu_reset_reg_extcache(dyn, &dyn->insts[ninst].e); } int fpu_is_st_freed(dynarec_rv64_t* dyn, int ninst, int st) |