diff options
| author | xctan <xctan@cirno.icu> | 2024-11-13 17:08:16 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-11-13 10:08:16 +0100 |
| commit | eb9e7b9fee7fcf408868c2161e2d5d539af06775 (patch) | |
| tree | 4b575fcd634ce2d85a116c995741df330ff83bc0 /src | |
| parent | b02942c0b0d8491c2d28128c4c948710f47f94f9 (diff) | |
| download | box64-eb9e7b9fee7fcf408868c2161e2d5d539af06775.tar.gz box64-eb9e7b9fee7fcf408868c2161e2d5d539af06775.zip | |
[RV64_DYNAREC] Added more MMX opcodes for vector (#2024)
* [RV64_DYNAREC] Added 0F D1-D3 PSRLW/PSRLD/PSRLQ opcode * [RV64_DYNAREC] Added 0F EC PADDSB opcode * [RV64_DYNAREC] Added 0F DC-DD PADDUSB/PADDUSW opcode * [RV64_DYNAREC] Added 0F FC-FE PADDB/PADDW/PADDD opcodes * [RV64_DYNAREC] Added 0F ED PADDSW opcode * [RV64_DYNAREC] Added 0F 7F MOVQ opcode * [RV64_DYNAREC] Fixed some typos * [RV64_DYNAREC] Optimized RVV MMX PSRLW/D/Q to a mask-less version
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 98 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 4 |
4 files changed, 103 insertions, 9 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 2eedcc22..7429c7eb 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -499,6 +499,22 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VMV_S_X(v0, x4); } break; + case 0x7F: + INST_NAME("MOVQ Em, Gm"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETG; + if (MODREG) { + v1 = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, gd); + v0 = mmx_get_reg_empty_vector(dyn, ninst, x1, x2, x3, nextop & 7); + VMV_V_V(v0, v1); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + v1 = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, gd); + VMV_X_S(x4, v1); + SD(x4, ed, fixedaddress); + } + break; case 0xC2: INST_NAME("CMPPS Gx, Ex, Ib"); nextop = F8; @@ -580,13 +596,91 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VMV_V_V(v0, d0); VSLIDEUP_VI(v0, d1, 2, VECTOR_UNMASKED); break; - case 0xFC: - INST_NAME("PADDB Gm, Em"); + case 0xD1: + case 0xD2: + case 0xD3: + if (opcode == 0xD1) { + INST_NAME("PSRLW Gm, Em"); + u8 = VECTOR_SEW16; + i32 = 16; + } else if (opcode == 0xD2) { + INST_NAME("PSRLD Gm, Em"); + u8 = VECTOR_SEW32; + i32 = 32; + } else { + INST_NAME("PSRLQ Gm, Em"); + u8 = VECTOR_SEW64; + i32 = 64; + } nextop = F8; + q0 = fpu_get_scratch(dyn); + GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, u8, 1); + if (MODREG) { + v1 = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, (nextop & 7)); + VMV_X_S(x4, v1); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, v1, x3, &fixedaddress, rex, NULL, 1, 0); + LD(x4, wback, fixedaddress); + } + SLTIU(x3, x4, i32); + SUB(x3, xZR, x3); + NOT(x3, x3); // mask + VSRL_VX(v0, v0, x4, VECTOR_UNMASKED); + VAND_VX(q0, v0, x3, VECTOR_UNMASKED); + VXOR_VV(v0, v0, q0, VECTOR_UNMASKED); + break; + case 0xDC: + case 0xDD: + if (opcode == 0xDC) { + INST_NAME("PADDUSB Gm, Em"); + u8 = VECTOR_SEW8; + } else { + INST_NAME("PADDUSW Gm, Em"); + u8 = VECTOR_SEW16; + } + nextop = F8; + GETGM_vector(v0); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(v1, 0); + SET_ELEMENT_WIDTH(x1, u8, 1); + VSADDU_VV(v0, v0, v1, VECTOR_UNMASKED); + break; + case 0xEC: + INST_NAME("PADDSB Gm, Em"); + nextop = F8; GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETEM_vector(v1, 0); SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + VSADD_VV(v0, v0, v1, VECTOR_UNMASKED); + break; + case 0xED: + INST_NAME("PADDSW Gm, Em"); + nextop = F8; + GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(v1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VSADD_VV(v0, v0, v1, VECTOR_UNMASKED); + break; + case 0xFC ... 0xFE: + nextop = F8; + if (opcode == 0xFC) { + INST_NAME("PADDB Gm, Em"); + u8 = VECTOR_SEW8; + } else if (opcode == 0xFD) { + INST_NAME("PADDW Gm, Em"); + u8 = VECTOR_SEW16; + } else { + INST_NAME("PADDD Gm, Em"); + u8 = VECTOR_SEW32; + } + GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(v1, 0); + SET_ELEMENT_WIDTH(x1, u8, 1); VADD_VV(v0, v0, v1, VECTOR_UNMASKED); break; case 0x00 ... 0x0F: diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 9450f22d..06cf8400 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -2088,7 +2088,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } break; case 0xF1: - INST_NAME("PSLLQ Gx,Ex"); + INST_NAME("PSLLW Gx,Ex"); nextop = F8; GETGX(); GETEX(x2, 0, 1); @@ -2107,7 +2107,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } break; case 0xF2: - INST_NAME("PSLLQ Gx,Ex"); + INST_NAME("PSLLD Gx,Ex"); nextop = F8; GETGX(); GETEX(x2, 0, 1); diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index bbfc3c72..7cb244fe 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -2089,15 +2089,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0xF2: case 0xF3: if (opcode == 0xF1) { - INST_NAME("PSRLW Gx, Ex"); + INST_NAME("PSLLW Gx, Ex"); u8 = VECTOR_SEW16; i32 = 16; } else if (opcode == 0xF2) { - INST_NAME("PSRLD Gx, Ex"); + INST_NAME("PSLLD Gx, Ex"); u8 = VECTOR_SEW32; i32 = 32; } else { - INST_NAME("PSRLQ Gx, Ex"); + INST_NAME("PSLLQ Gx, Ex"); u8 = VECTOR_SEW64; i32 = 64; } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 2ae12cf3..4e168ab6 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -549,13 +549,13 @@ addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, I12, D); \ } -// Get EM as vector, might use x1, x2 and x3 +// Get EM as vector, might use x1, x2 and x3; requires SEW64 #define GETEM_vector(a, D) \ if (MODREG) { \ a = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, (nextop & 7)); \ } else { \ SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ + addr = geted(dyn, addr, ninst, nextop, &ed, a, x3, &fixedaddress, rex, NULL, 1, D); \ a = fpu_get_scratch(dyn); \ FLD(a, ed, fixedaddress); \ VFMV_S_F(a, a); \ |