diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-11-01 00:38:16 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-31 17:38:16 +0100 |
| commit | 126cf279727818cf58b4d44bf84fb2633ccbb6c5 (patch) | |
| tree | 4e8b2a3e2c5b19d1016856e00d278089316776d2 /src | |
| parent | 0a677c7f83ef43219ee69f21702527ada1592437 (diff) | |
| download | box64-126cf279727818cf58b4d44bf84fb2633ccbb6c5.tar.gz box64-126cf279727818cf58b4d44bf84fb2633ccbb6c5.zip | |
[RV64_DYNAREC] Added more opcodes for vector (#1987)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 122 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 3 |
2 files changed, 125 insertions, 0 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 28a3b7fd..ede57dc2 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -155,6 +155,24 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VLE64_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1); } break; + case 0x16: + INST_NAME("MOVHPD Gx, Eq"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); + if (MODREG) { + // access register instead of memory is bad opcode! + DEFAULT; + return addr; + } + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); + LD(x4, ed, fixedaddress); + d0 = fpu_get_scratch(dyn); + VMV_X_S(x5, v0); + VMV_S_X(d0, x4); + VSLIDE1UP_VX(v0, d0, x5, VECTOR_UNMASKED); + break; case 0x1F: return 0; case 0x28: @@ -190,6 +208,21 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SMWRITE2(); } break; + case 0x2B: + INST_NAME("MOVNTPD Ex, Gx"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); + GETGX_vector(v0, 0, dyn->vector_eew); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v1 = sse_get_reg_empty_vector(dyn, ninst, x1, ed); + VMV_V_V(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + VSE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); + SMWRITE2(); + } + break; case 0x2E: case 0x2F: return 0; @@ -1516,6 +1549,50 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0xA3 ... 0xC1: return 0; + case 0xC2: + INST_NAME("CMPPD Gx, Ex, Ib"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); + GETEX_vector(v1, 0, 1, VECTOR_SEW64); + u8 = F8; + if ((u8 & 7) == 0) { // Equal + VMFEQ_VV(VMASK, v0, v1, VECTOR_UNMASKED); + } else if ((u8 & 7) == 4) { // Not Equal or unordered + VMFEQ_VV(VMASK, v0, v1, VECTOR_UNMASKED); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + } else { + d0 = fpu_get_scratch(dyn); + VMFEQ_VV(VMASK, v0, v0, VECTOR_UNMASKED); + VMFEQ_VV(d0, v1, v1, VECTOR_UNMASKED); + VMAND_MM(VMASK, VMASK, d0); + switch (u8 & 7) { + case 1: // Less than + VMFLT_VV(d0, v0, v1, VECTOR_UNMASKED); + VMAND_MM(VMASK, VMASK, d0); + break; + case 2: // Less or equal + VMFLE_VV(d0, v0, v1, VECTOR_UNMASKED); + VMAND_MM(VMASK, VMASK, d0); + break; + case 3: // NaN + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + break; + case 5: // Greater or equal or unordered + VMFLE_VV(d0, v1, v0, VECTOR_UNMASKED); + VMORN_MM(VMASK, d0, VMASK); + break; + case 6: // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + VMFLT_VV(d0, v1, v0, VECTOR_UNMASKED); + VMORN_MM(VMASK, d0, VMASK); + break; + case 7: // Not NaN + break; + } + } + VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + VXOR_VI(v0, v0, 0x1F, VECTOR_MASKED); + break; case 0xC4: INST_NAME("PINSRW Gx, Ed, Ib"); nextop = F8; @@ -1555,6 +1632,36 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LHU(gd, ed, u8 * 2); } break; + case 0xC6: + INST_NAME("SHUFPD Gx, Ex, Ib"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); + GETEX_vector(v1, 0, 1, VECTOR_SEW64); + u8 = F8; + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + if ((u8 >> 1) & 1) { + VSLIDE1DOWN_VX(d1, v1, xZR, VECTOR_UNMASKED); + v1 = d1; + } else if (v0 == v1) { + v1 = fpu_get_scratch(dyn); + VMV_V_V(v1, v0); + } + if (u8 & 1) { + if (rv64_xtheadvector) { + ADDI(x5, xZR, 1); + VEXT_X_V(x4, v0, x5); + } else { + VSLIDE1DOWN_VX(d0, v0, xZR, VECTOR_UNMASKED); + VMV_X_S(x4, d0); + } + } else { + d0 = v0; + VMV_X_S(x4, d0); + } + VSLIDE1UP_VX(v0, v1, x4, VECTOR_UNMASKED); + break; case 0xD1: case 0xD2: case 0xD3: @@ -1824,6 +1931,21 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 1); break; + case 0xE7: + INST_NAME("MOVNTDQ Ex, Gx"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); + GETGX_vector(v0, 0, dyn->vector_eew); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v1 = sse_get_reg_empty_vector(dyn, ninst, x1, ed); + VMV_V_V(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + VSE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); + SMWRITE2(); + } + break; case 0xE8: INST_NAME("PSUBSB Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 93fe178f..f56b68e7 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -1633,6 +1633,9 @@ f28–31 ft8–11 FP temporaries Caller // Warning: zero-extended on xtheadvector! #define VMV_X_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111 +// Warning: xtheadvector only +#define VEXT_X_V(rd, vs2, rs1) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, rs1, 0b010, rd, 0b1010111)) + // Vector Integer Extension Instructions // https://github.com/riscv/riscv-v-spec/blob/e49574c92b072fd4d71e6cb20f7e8154de5b83fe/v-spec.adoc#123-vector-integer-extension |