diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-10-12 18:46:08 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-12 12:46:08 +0200 |
| commit | 6a3a19da68b6d4d59d368172f2f3e411326258fd (patch) | |
| tree | e4fc7d09d4fe8682de6e415d66f147d5950546e7 /src | |
| parent | efd103004c770e8ec4646c11c24b92a5d8d49e54 (diff) | |
| download | box64-6a3a19da68b6d4d59d368172f2f3e411326258fd.tar.gz box64-6a3a19da68b6d4d59d368172f2f3e411326258fd.zip | |
[RV64_DYNAREC] Fixed more issues for vector (#1928)
* [RV64_DYNAREC] Fixed emitter for xtheadvector * MOVSD can be unaligned * fixed unaligned issues
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f_vector.c | 58 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f_vector.c | 25 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 4 |
3 files changed, 55 insertions, 32 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c index 90e3c21b..040cc313 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c @@ -50,8 +50,8 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("MOVSD Gx, Ex"); nextop = F8; GETG; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); ed = (nextop & 7) + (rex.b << 3); v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW64); v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW64); @@ -64,11 +64,12 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } } else { SMREAD(); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); // unaligned v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); d0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); - VLE64_V(d0, ed, VECTOR_MASKED, VECTOR_NFIELD1); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(d0, ed, VECTOR_MASKED, VECTOR_NFIELD1); VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMERGE_VVM(v0, v0, d0); // implies VMASK } @@ -124,15 +125,17 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("CVTTSD2SI Gd, Ex"); nextop = F8; GETGD; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); v0 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, dyn->vector_eew); } else { SMREAD(); v0 = fpu_get_scratch(dyn); - vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VLE_V(v0, ed, dyn->vector_eew, VECTOR_MASKED, VECTOR_NFIELD1); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); } if (box64_dynarec_fastround) { VFMV_F_S(v0, v0); @@ -157,15 +160,17 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("CVTSD2SI Gd, Ex"); nextop = F8; GETGD; - SET_ELEMENT_WIDTH(x1, (rex.w ? VECTOR_SEW64 : VECTOR_SEW32), 1); - vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); if (MODREG) { + SET_ELEMENT_WIDTH(x1, (rex.w ? VECTOR_SEW64 : VECTOR_SEW32), 1); v0 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, dyn->vector_eew); } else { SMREAD(); v0 = fpu_get_scratch(dyn); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VLE_V(v0, ed, dyn->vector_eew, VECTOR_MASKED, VECTOR_NFIELD1); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, (rex.w ? VECTOR_SEW64 : VECTOR_SEW32), 1); } if (box64_dynarec_fastround) { VFMV_F_S(v0, v0); @@ -193,18 +198,22 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x59: INST_NAME("MULSD Gx, Ex"); nextop = F8; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); - GETGX_vector(v0, 1, VECTOR_SEW64); - vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { SMREAD(); v1 = fpu_get_scratch(dyn); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VLE64_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); } if (box64_dynarec_fastnan) { + vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); VFMUL_VV(v0, v0, v1, VECTOR_MASKED); } else { VFMV_F_S(v0, v0); @@ -230,16 +239,19 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x5E: INST_NAME("DIVSD Gx, Ex"); nextop = F8; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); - GETGX_vector(v0, 1, VECTOR_SEW64); - vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { SMREAD(); v1 = fpu_get_scratch(dyn); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VLE64_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); } if (!box64_dynarec_fastnan) { VFMV_F_S(v0, v0); @@ -262,22 +274,26 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VFMV_S_F(v0, v0); } } else { + vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); VFDIV_VV(v0, v0, v1, VECTOR_MASKED); } break; case 0xC2: INST_NAME("CMPSD Gx, Ex, Ib"); nextop = F8; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); - GETGX_vector(d0, 1, VECTOR_SEW64); if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(d0, 1, VECTOR_SEW64); d1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { SMREAD(); - vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); d1 = fpu_get_scratch(dyn); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); - VLE64_V(d1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(d1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(d0, 1, VECTOR_SEW64); } u8 = F8; VFMV_F_S(d0, d0); diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c index 11975ae8..43b5de83 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c @@ -130,18 +130,22 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x59: INST_NAME("MULSS Gx, Ex"); nextop = F8; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); - GETGX_vector(v0, 1, VECTOR_SEW32); - vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1); if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(v0, 1, VECTOR_SEW32); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); } else { SMREAD(); v1 = fpu_get_scratch(dyn); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VLE32_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(v0, 1, VECTOR_SEW32); } if (box64_dynarec_fastnan) { + vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1); VFMUL_VV(v0, v0, v1, VECTOR_MASKED); } else { VFMV_F_S(v0, v0); @@ -167,22 +171,25 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x5A: INST_NAME("CVTSS2SD Gx, Ex"); nextop = F8; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); - GETGX_vector(v0, 1, VECTOR_SEW32); - vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1); if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(v0, 1, VECTOR_SEW32); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); } else { SMREAD(); v1 = fpu_get_scratch(dyn); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VLE32_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(v0, 1, VECTOR_SEW32); } d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1); VFWCVT_F_F_V(d0, v1, VECTOR_MASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); if (rv64_xtheadvector) { - vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1); VMERGE_VVM(v0, v0, d0); // implies VMASK } else { VMV_X_S(x4, d0); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 342bf818..93fe178f 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -1378,7 +1378,7 @@ f28–31 ft8–11 FP temporaries Caller #define VFSLIDE1UP_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001110...........101.....1010111 #define VFSLIDE1DOWN_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001111...........101.....1010111 -#define VFMV_S_F(vd, rs1) EMIT(I_type(0b010000100000, rs1, 0b101, vd, 0b1010111)) // 010000100000.....101.....1010111 +#define VFMV_S_F(vd, rs1) EMIT(I_type((rv64_xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b101, vd, 0b1010111)) // 010000100000.....101.....1010111 #define VFMV_V_F(vd, rs1) EMIT(I_type(0b010111100000, rs1, 0b101, vd, 0b1010111)) // 010111100000.....101.....1010111 #define VFMERGE_VFM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b101, vd, 0b1010111)) // 0101110..........101.....1010111 @@ -1424,7 +1424,7 @@ f28–31 ft8–11 FP temporaries Caller #define VFSGNJN_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001001...........001.....1010111 #define VFSGNJX_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001010...........001.....1010111 -#define VFMV_F_S(rd, vs2) EMIT(R_type(0b0100001, vs2, 0b00000, 0b001, rd, 0b1010111)) // 0100001.....00000001.....1010111 +#define VFMV_F_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b001, rd, 0b1010111)) // 0100001.....00000001.....1010111 #define VMFEQ_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011000...........001.....1010111 #define VMFLE_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011001...........001.....1010111 |