diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-09-19 23:07:59 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-19 17:07:59 +0200 |
| commit | 3b7a448b07f01b83ce27d38061882232d4c48ae4 (patch) | |
| tree | 2bfcbfd7a90e6dc165ae12977becd89682b20e69 /src | |
| parent | 8f0c1efc6f7c8dacd32a2f5e6fd393a1769ea9ac (diff) | |
| download | box64-3b7a448b07f01b83ce27d38061882232d4c48ae4.tar.gz box64-3b7a448b07f01b83ce27d38061882232d4c48ae4.zip | |
[RV64_DYNAREC] Added more 66 0F opcodes for vector (#1838)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 168 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass3.h | 2 |
2 files changed, 156 insertions, 14 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 460abf23..180308ca 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -863,6 +863,52 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VRGATHEREI16_VV(q0, v0, q1, VECTOR_UNMASKED); } break; + case 0x71: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 2: + INST_NAME("PSRLW Ex, Ib"); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETEX_vector(q0, 1, 1, VECTOR_SEW16); + u8 = F8; + if (u8) { + if (u8 > 15) { + VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); + } else { + VSRL_VI(q0, u8, q0, VECTOR_UNMASKED); + } + PUTEX_vector(q0, VECTOR_SEW16); + } + break; + case 4: + INST_NAME("PSRAW Ex, Ib"); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETEX_vector(q0, 1, 1, VECTOR_SEW16); + u8 = F8; + if (u8 > 15) u8 = 15; + if (u8) { + VSRA_VI(q0, u8, q0, VECTOR_UNMASKED); + } + PUTEX_vector(q0, VECTOR_SEW16); + break; + case 6: + INST_NAME("PSLLW Ex, Ib"); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETEX_vector(q0, 1, 1, VECTOR_SEW16); + u8 = F8; + if (u8) { + if (u8 > 15) { + VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); + } else { + VSLL_VI(q0, u8, q0, VECTOR_UNMASKED); + } + PUTEX_vector(q0, VECTOR_SEW16); + } + break; + default: + DEFAULT_VECTOR; + } + break; case 0x73: nextop = F8; switch ((nextop >> 3) & 7) { @@ -886,10 +932,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i break; case 0x74 ... 0x76: if (opcode == 0x74) { - INST_NAME("PCMPEQB Gx,Ex"); + INST_NAME("PCMPEQB Gx, Ex"); u8 = VECTOR_SEW8; } else if (opcode == 0x75) { - INST_NAME("PCMPEQW Gx,Ex"); + INST_NAME("PCMPEQW Gx, Ex"); u8 = VECTOR_SEW16; } else { INST_NAME("PCMPEQD Gx, Ex"); @@ -906,20 +952,21 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i break; case 0x7E: return 0; - case 0xEF: - INST_NAME("PXOR Gx, Ex"); + case 0x7F: + INST_NAME("MOVDQA Ex, Gx"); nextop = F8; GETG; - if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); - // special case - q0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); - VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v0 = sse_get_reg_empty_vector(dyn, ninst, x1, ed); + v1 = sse_get_reg_vector(dyn, ninst, x1, gd, 0, dyn->vector_eew); + VMV_V_V(v0, v1); } else { - SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); - q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, dyn->vector_eew); - GETEX_vector(q1, 0, 0, dyn->vector_eew); - VXOR_VV(q0, q0, q1, VECTOR_UNMASKED); + SMREAD(); + v1 = sse_get_reg_vector(dyn, ninst, x1, gd, 0, dyn->vector_eew); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + VSE_V(v1, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } break; case 0xD4: @@ -947,6 +994,39 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SMWRITE2(); } break; + case 0xD7: + INST_NAME("PMOVMSKB Gd, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGD; + GETEX_vector(q0, 0, 0, VECTOR_SEW8); + v0 = fpu_get_scratch(dyn); + VSRL_VI(v0, 7, q0, VECTOR_UNMASKED); + VMSNE_VX(VMASK, xZR, v0, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VMV_X_S(gd, VMASK); + ZEXTH(gd, gd); + break; + case 0xD8: + case 0xD9: + if (opcode == 0xD8) { + INST_NAME("PSUBUSB Gx, Ex"); + u8 = VECTOR_SEW8; + } else { + INST_NAME("PSUBUSW Gx, Ex"); + u8 = VECTOR_SEW16; + } + nextop = F8; + SET_ELEMENT_WIDTH(x1, u8, 1); + GETGX_vector(q0, 1, u8); + GETEX_vector(q1, 0, 0, u8); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VWSUBU_VV(v0, q1, q0, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, u8 + 1, rv64_vlen == 128 ? VECTOR_LMUL2 : VECTOR_LMUL1, 2); + VMAX_VX(v0, xZR, v0, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, u8, VECTOR_LMUL1, 1); + VNSRL_WX(q0, xZR, v0, VECTOR_UNMASKED); + break; case 0xDB: INST_NAME("PAND Gx, Ex"); nextop = F8; @@ -964,6 +1044,27 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VI(q0, 0x1F, q0, VECTOR_UNMASKED); VAND_VV(q0, q0, q1, VECTOR_UNMASKED); break; + case 0xE1: + INST_NAME("PSRAW Gx,Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(q0, 1, VECTOR_SEW64); + VMV_V_I(VMASK, 0b01); + if (MODREG) { + q1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); + q1 = fpu_get_scratch(dyn); + VLE_V(q1, ed, VECTOR_SEW64, VECTOR_MASKED, VECTOR_NFIELD1); + } + v1 = fpu_get_scratch(dyn); + ADDI(x4, xZR, 15); + VMINU_VX(v1, x4, q1, VECTOR_MASKED); + VMV_X_S(x4, v1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VSRA_VX(q0, x4, q0, VECTOR_UNMASKED); + break; case 0xE2: INST_NAME("PSRAD Gx, Ex"); nextop = F8; @@ -985,6 +1086,23 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); VSRA_VX(q0, x4, q0, VECTOR_UNMASKED); break; + case 0xE3: + INST_NAME("PAVGW Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + CSRRWI(xZR, 0b00 /* rnu */, 0x00A /* vxrm */); + VAADDU_VV(q0, q1, q0, VECTOR_UNMASKED); + break; + case 0xE4: + INST_NAME("PMULHUW Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + VMULHU_VV(q0, q1, q0, VECTOR_UNMASKED); + break; case 0xEB: INST_NAME("POR Gx, Ex"); nextop = F8; @@ -1001,6 +1119,30 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW16); VMAX_VV(q0, q0, q1, VECTOR_UNMASKED); break; + case 0xEF: + INST_NAME("PXOR Gx, Ex"); + nextop = F8; + GETG; + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); + // special case + q0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); + VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); + } else { + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); + q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, dyn->vector_eew); + GETEX_vector(q1, 0, 0, dyn->vector_eew); + VXOR_VV(q0, q0, q1, VECTOR_UNMASKED); + } + break; + case 0xF9: + INST_NAME("PSUBW Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + VSUB_VV(q0, q1, q0, VECTOR_UNMASKED); + break; case 0xFC ... 0xFE: nextop = F8; if (opcode == 0xFC) { diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h index 556586f2..995b7f3a 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass3.h +++ b/src/dynarec/rv64/dynarec_rv64_pass3.h @@ -27,7 +27,7 @@ #define DEFAULT_VECTOR \ if (box64_dynarec_log >= LOG_INFO || box64_dynarec_dump || box64_dynarec_missing) { \ - dynarec_log(LOG_NONE, "%p: Dynarec fallback to scalar version because of %s Opcode" \ + dynarec_log(LOG_NONE, "%p: Dynarec fallback to scalar version because of %sOpcode" \ " %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ (void*)ip, rex.is32bits ? "x86 " : "x64 ", \ PKip(0), \ |