diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-09-29 16:38:49 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-29 10:38:49 +0200 |
| commit | 2a7eabfb1c4f883e20b3415cf8cfe87840a35e71 (patch) | |
| tree | 97c4e34235072e857467f313a113d56abf705de8 /src | |
| parent | b4a0ae20c9c9c6644d6c5993498f5d173b335928 (diff) | |
| download | box64-2a7eabfb1c4f883e20b3415cf8cfe87840a35e71.tar.gz box64-2a7eabfb1c4f883e20b3415cf8cfe87840a35e71.zip | |
[RV64_DYNAREC] Refined RISC-V vector emitter (#1884)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 18 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 416 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 588 |
3 files changed, 510 insertions, 512 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 104db227..454fa02a 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -102,7 +102,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETGX_vector(v0, 1, dyn->vector_eew); GETEX_vector(v1, 0, 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); - VSLIDEDOWN_VI(q0, 1, v1, VECTOR_UNMASKED); + VSLIDEDOWN_VI(q0, v1, 1, VECTOR_UNMASKED); VMV_X_S(x4, q0); VMV_S_X(v0, x4); } else { @@ -126,7 +126,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, v1 = fpu_get_scratch(dyn); VMV_V_V(v1, v0); } - VSLIDEUP_VI(v0, 1, v1, VECTOR_UNMASKED); + VSLIDEUP_VI(v0, v1, 1, VECTOR_UNMASKED); } else { INST_NAME("MOVHPS Gx, Ex"); SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); @@ -137,7 +137,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, MOV64x(x4, 0xFF); VMV_S_X(VMASK, x4); VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); - VSLIDEUP_VI(v0, 8, v1, VECTOR_UNMASKED); + VSLIDEUP_VI(v0, v1, 8, VECTOR_UNMASKED); } break; case 0x17: @@ -149,13 +149,13 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, if (MODREG) { v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); - VSLIDE1DOWN_VX(q0, xZR, v0, VECTOR_UNMASKED); + VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); VMV_X_S(x4, q0); VMV_S_X(v1, x4); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); q0 = fpu_get_scratch(dyn); - VSLIDE1DOWN_VX(q0, xZR, v0, VECTOR_UNMASKED); + VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); VMV_X_S(x4, q0); SD(x4, ed, fixedaddress); SMWRITE2(); @@ -205,7 +205,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } else { q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VXOR_VV(q0, q0, q1, VECTOR_UNMASKED); + VXOR_VV(q0, q1, q0, VECTOR_UNMASKED); } break; case 0xC6: @@ -220,14 +220,14 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, d1 = fpu_get_scratch(dyn); tmp64u0 = (((u8 >> 2) & 3) << 16) | (u8 & 3); VECTOR_SPLAT_IMM(q0, tmp64u0, x4); - VRGATHEREI16_VV(d0, q0, v0, VECTOR_UNMASKED); + VRGATHEREI16_VV(d0, v0, q0, VECTOR_UNMASKED); tmp64u1 = (((u8 >> 6) & 3) << 16) | ((u8 >> 4) & 3); if (tmp64u1 != tmp64u0) { VECTOR_SPLAT_IMM(q0, tmp64u1, x4); } - VRGATHEREI16_VV(d1, q0, v1, VECTOR_UNMASKED); + VRGATHEREI16_VV(d1, v1, q0, VECTOR_UNMASKED); VMV_V_V(v0, d0); - VSLIDEUP_VI(v0, 2, d1, VECTOR_UNMASKED); + VSLIDEUP_VI(v0, d1, 2, VECTOR_UNMASKED); break; case 0x00 ... 0x0F: case 0x18: diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 81afe063..d43297d3 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -95,14 +95,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = fpu_get_scratch(dyn); VMV_V_V(v1, v0); } - VSLIDEUP_VI(v0, 1, v1, VECTOR_UNMASKED); + VSLIDEUP_VI(v0, v1, 1, VECTOR_UNMASKED); } else { q0 = fpu_get_scratch(dyn); VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); VMV_V_I(VMASK, 0b10); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); - VLUXEI64_V(v0, ed, q0, VECTOR_MASKED, VECTOR_NFIELD1); + VLUXEI64_V(v0, q0, ed, VECTOR_MASKED, VECTOR_NFIELD1); } break; case 0x15: @@ -115,14 +115,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 == fpu_get_scratch(dyn); - VSLIDE1DOWN_VX(q0, xZR, v0, VECTOR_UNMASKED); + VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); VMV_X_S(x4, q0); if (v0 != v1) { VMV_V_V(v0, v1); } VMV_S_X(v0, x4); } else { q0 = fpu_get_scratch(dyn); VMV_V_I(VMASK, 0b10); - VSLIDE1DOWN_VX(v0, xZR, v0, VECTOR_UNMASKED); + VSLIDE1DOWN_VX(v0, v0, xZR, VECTOR_UNMASKED); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); VLE64_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1); @@ -179,8 +179,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = fpu_get_scratch(dyn); ADDI(x4, xZR, 0b000010001111); VMV_V_X(v0, x4); // broadcast the mask - VAND_VV(v0, v0, q1, VECTOR_UNMASKED); - VRGATHER_VV(v1, v0, q0, VECTOR_UNMASKED); // registers cannot be overlapped!! + VAND_VV(v0, q1, v0, VECTOR_UNMASKED); + VRGATHER_VV(v1, q0, v0, VECTOR_UNMASKED); // registers cannot be overlapped!! VMV_V_V(q0, v1); break; case 0x01: @@ -196,14 +196,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_V_V(v0, q0); if (q1 & 1) VMV_V_V(d1, q1); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); - VSLIDEUP_VI(v0, 8, (q1 & 1) ? d1 : q1, VECTOR_UNMASKED); + VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 8, VECTOR_UNMASKED); MOV64x(x4, 0b0101010101010101); VMV_S_X(VMASK, x4); - VCOMPRESS_VM(d0, VMASK, v0); - VXOR_VI(VMASK, 0x1F, VMASK, VECTOR_UNMASKED); - VCOMPRESS_VM(d1, VMASK, v0); + VCOMPRESS_VM(d0, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VCOMPRESS_VM(d1, v0, VMASK); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1); - VADD_VV(q0, d0, d1, VECTOR_UNMASKED); + VADD_VV(q0, d1, d0, VECTOR_UNMASKED); break; case 0x02: INST_NAME("PHADDD Gx, Ex"); @@ -218,14 +218,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_V_V(v0, q0); if (q1 & 1) VMV_V_V(d1, q1); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2); - VSLIDEUP_VI(v0, 4, (q1 & 1) ? d1 : q1, VECTOR_UNMASKED); + VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 4, VECTOR_UNMASKED); MOV64x(x4, 0b01010101); VMV_S_X(VMASK, x4); - VCOMPRESS_VM(d0, VMASK, v0); - VXOR_VI(VMASK, 0x1F, VMASK, VECTOR_UNMASKED); - VCOMPRESS_VM(d1, VMASK, v0); + VCOMPRESS_VM(d0, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VCOMPRESS_VM(d1, v0, VMASK); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 1); - VADD_VV(q0, d0, d1, VECTOR_UNMASKED); + VADD_VV(q0, d1, d0, VECTOR_UNMASKED); break; case 0x04: INST_NAME("PMADDUBSW Gx, Ex"); @@ -236,15 +236,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! - VWMULSU_VV(v0, q0, q1, VECTOR_UNMASKED); + VWMULSU_VV(v0, q1, q0, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); MOV64x(x4, 0b0101010101010101); VMV_S_X(VMASK, x4); - VCOMPRESS_VM(d0, VMASK, v0); - VXOR_VI(VMASK, 0x1F, VMASK, VECTOR_UNMASKED); - VCOMPRESS_VM(d1, VMASK, v0); + VCOMPRESS_VM(d0, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VCOMPRESS_VM(d1, v0, VMASK); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); - VSADD_VV(q0, d0, d1, VECTOR_UNMASKED); + VSADD_VV(q0, d1, d0, VECTOR_UNMASKED); break; case 0x08 ... 0x0A: if (nextop == 0x08) { @@ -272,14 +272,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v0 = fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); // absolute - VSRA_VI(v0, i32, q1, VECTOR_UNMASKED); - VXOR_VV(v1, v0, q0, VECTOR_UNMASKED); - VSUB_VV(v1, v0, v1, VECTOR_UNMASKED); + VSRA_VI(v0, q1, i32, VECTOR_UNMASKED); + VXOR_VV(v1, q0, v0, VECTOR_UNMASKED); + VSUB_VV(v1, v1, v0, VECTOR_UNMASKED); // handle zeroing - VMSEQ_VI(VECTOR_MASKREG, 0, q1, VECTOR_UNMASKED); + VMSEQ_VI(VMASK, q1, 0, VECTOR_UNMASKED); VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); - VADC_VIM(v0, 0x1f, v0); // implies VECTOR_MASKREG - VAND_VV(q0, v1, v0, VECTOR_UNMASKED); + VADC_VIM(v0, v0, 0x1f); // implies VMASK + VAND_VV(q0, v0, v1, VECTOR_UNMASKED); break; case 0x0B: INST_NAME("PMULHRSW Gx, Ex"); @@ -289,12 +289,12 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW16); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); fpu_get_scratch(dyn); - VWMUL_VV(v0, q0, q1, VECTOR_UNMASKED); + VWMUL_VV(v0, q1, q0, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2); - VSRL_VI(v0, 14, v0, VECTOR_UNMASKED); - VADD_VI(v0, 1, v0, VECTOR_UNMASKED); + VSRL_VI(v0, v0, 14, VECTOR_UNMASKED); + VADD_VI(v0, v0, 1, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1); - VNSRL_WI(q0, 1, v0, VECTOR_UNMASKED); + VNSRL_WI(q0, v0, 1, VECTOR_UNMASKED); break; case 0x14: INST_NAME("PBLENDVPS Gx, Ex"); @@ -303,8 +303,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_vector(q0, 1, VECTOR_SEW32); GETEX_vector(q1, 0, 0, VECTOR_SEW32); v0 = sse_get_reg_vector(dyn, ninst, x4, 0, 0, VECTOR_SEW32); - VMSLT_VX(VMASK, xZR, v0, VECTOR_UNMASKED); - VADD_VX(q0, xZR, q1, VECTOR_MASKED); + VMSLT_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + VADD_VX(q0, q1, xZR, VECTOR_MASKED); break; case 0x17: INST_NAME("PTEST Gx, Ex"); @@ -317,17 +317,17 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_DFNONE(); v0 = fpu_get_scratch(dyn); IFX (X_ZF) { - VAND_VV(v0, q0, q1, VECTOR_MASKED); - VMSGT_VX(VMASK, xZR, v0, VECTOR_UNMASKED); + VAND_VV(v0, q1, q0, VECTOR_MASKED); + VMSGT_VX(VMASK, v0, xZR, VECTOR_UNMASKED); VMV_X_S(x4, VMASK); ANDI(x4, x4, 0b11); BNEZ(x3, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_CF) { - VXOR_VI(v0, 0x1F, q0, VECTOR_UNMASKED); - VAND_VV(v0, v0, q1, VECTOR_MASKED); - VMSGT_VX(VMASK, xZR, v0, VECTOR_UNMASKED); + VXOR_VI(v0, q0, 0x1F, VECTOR_UNMASKED); + VAND_VV(v0, q1, v0, VECTOR_MASKED); + VMSGT_VX(VMASK, v0, xZR, VECTOR_UNMASKED); VMV_X_S(x4, VMASK); ANDI(x4, x4, 0b11); BNEZ(x3, 8); @@ -350,9 +350,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, u8); GETGX_empty_vector(q0); v0 = fpu_get_scratch(dyn); - VSRA_VI(v0, 0x1F, q1, VECTOR_UNMASKED); - VXOR_VV(q0, q1, v0, VECTOR_UNMASKED); - VSUB_VV(q0, v0, q0, VECTOR_UNMASKED); + VSRA_VI(v0, q1, 0x1F, VECTOR_UNMASKED); + VXOR_VV(q0, v0, q1, VECTOR_UNMASKED); + VSUB_VV(q0, q0, v0, VECTOR_UNMASKED); break; case 0x20: INST_NAME("PMOVSXBW Gx, Ex"); @@ -362,7 +362,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(q0); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.5); - VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADD_VX(v0, q1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VMV_V_V(q0, v0); break; @@ -376,9 +376,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.25); - VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADD_VX(v0, q1, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5); - VWADD_VX(v1, xZR, v0, VECTOR_UNMASKED); + VWADD_VX(v1, v0, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); VMV_V_V(q0, v1); break; @@ -392,11 +392,11 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.125); - VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADD_VX(v0, q1, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25); - VWADD_VX(v1, xZR, v0, VECTOR_UNMASKED); + VWADD_VX(v1, v0, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); - VWADD_VX(v0, xZR, v1, VECTOR_UNMASKED); + VWADD_VX(v0, v1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); VMV_V_V(q0, v0); break; @@ -408,7 +408,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(q0); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5); - VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADD_VX(v0, q1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); VMV_V_V(q0, v0); break; @@ -422,9 +422,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25); - VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADD_VX(v0, q1, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); - VWADD_VX(v1, xZR, v0, VECTOR_UNMASKED); + VWADD_VX(v1, v0, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); VMV_V_V(q0, v1); break; @@ -436,7 +436,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(q0); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); - VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADD_VX(v0, q1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); VMV_V_V(q0, v0); break; @@ -454,9 +454,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (v0 != q0) VMV_V_V(v0, q0); if (v1 != q1) VMV_V_V(v1, q1); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); - VNSRL_WX(d0, xZR, v0, VECTOR_UNMASKED); - VNSRL_WX(d1, xZR, v1, VECTOR_UNMASKED); - VWMUL_VV(v0, d0, d1, VECTOR_UNMASKED); + VNSRL_WX(d0, v0, xZR, VECTOR_UNMASKED); + VNSRL_WX(d1, v1, xZR, VECTOR_UNMASKED); + VWMUL_VV(v0, d1, d0, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); if (v0 != q0) VMV_V_V(q0, v0); break; @@ -470,14 +470,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(d0, d0, d0, VECTOR_UNMASKED); d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); - VMAX_VX(d0, xZR, q0, VECTOR_UNMASKED); - if (q0 != q1) VMAX_VX(d1, xZR, q1, VECTOR_UNMASKED); + VMAX_VX(d0, q0, xZR, VECTOR_UNMASKED); + if (q0 != q1) VMAX_VX(d1, q1, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5); - VNCLIPU_WX(q0, xZR, d0, VECTOR_UNMASKED); - if (q0 != q1) VNCLIPU_WX(v0, xZR, d1, VECTOR_UNMASKED); + VNCLIPU_WX(q0, d0, xZR, VECTOR_UNMASKED); + if (q0 != q1) VNCLIPU_WX(v0, d1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); if (q0 == q1) VMV_V_V(v0, q0); - VSLIDEUP_VI(q0, 4, v0, VECTOR_UNMASKED); + VSLIDEUP_VI(q0, v0, 4, VECTOR_UNMASKED); break; case 0x30: INST_NAME("PMOVZXBW Gx, Ex"); @@ -487,7 +487,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(q0); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.5); - VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VMV_V_V(q0, v0); break; @@ -501,9 +501,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.25); - VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5); - VWADDU_VX(v1, xZR, v0, VECTOR_UNMASKED); + VWADDU_VX(v1, v0, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); VMV_V_V(q0, v1); break; @@ -517,11 +517,11 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.125); - VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25); - VWADDU_VX(v1, xZR, v0, VECTOR_UNMASKED); + VWADDU_VX(v1, v0, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); - VWADDU_VX(v0, xZR, v1, VECTOR_UNMASKED); + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); VMV_V_V(q0, v0); break; @@ -533,7 +533,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(q0); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5); - VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); VMV_V_V(q0, v0); break; @@ -547,9 +547,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25); - VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); - VWADDU_VX(v1, xZR, v0, VECTOR_UNMASKED); + VWADDU_VX(v1, v0, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); VMV_V_V(q0, v1); break; @@ -561,7 +561,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(q0); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); - VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); VMV_V_V(q0, v0); break; @@ -571,7 +571,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGX_vector(q0, 1, VECTOR_SEW32); GETEX_vector(q1, 0, 0, VECTOR_SEW32); - VMIN_VV(q0, q0, q1, VECTOR_UNMASKED); + VMIN_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0x3A: INST_NAME("PMINUW Gx, Ex"); @@ -579,7 +579,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW16); GETGX_vector(q0, 1, VECTOR_SEW16); - VMINU_VV(q0, q0, q1, VECTOR_UNMASKED); + VMINU_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0x3B: INST_NAME("PMINUD Gx, Ex"); @@ -587,7 +587,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW32); GETGX_vector(q0, 1, VECTOR_SEW32); - VMINU_VV(q0, q0, q1, VECTOR_UNMASKED); + VMINU_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0x3D: INST_NAME("PMAXSD Gx, Ex"); @@ -595,7 +595,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGX_vector(q0, 1, VECTOR_SEW32); GETEX_vector(q1, 0, 0, VECTOR_SEW32); - VMAX_VV(q0, q0, q1, VECTOR_UNMASKED); + VMAX_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0x40: INST_NAME("PMULLD Gx, Ex"); @@ -603,7 +603,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW32); GETGX_vector(q0, 1, VECTOR_SEW32); - VMUL_VV(q0, q0, q1, VECTOR_UNMASKED); + VMUL_VV(q0, q1, q0, VECTOR_UNMASKED); break; default: DEFAULT_VECTOR; @@ -621,7 +621,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i u8 = F8; ADDI(x4, xZR, u8); VMV_V_X(VMASK, x4); - VADD_VI(q0, 0, q1, VECTOR_MASKED); + VADD_VI(q0, q1, 0, VECTOR_MASKED); break; case 0x0F: INST_NAME("PALIGNR Gx, Ex, Ib"); @@ -641,14 +641,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 1); } VMV_V_V(d0, q0); - VSLIDEDOWN_VI(q0, u8 - 16, d0, VECTOR_UNMASKED); + VSLIDEDOWN_VI(q0, d0, u8 - 16, VECTOR_UNMASKED); } else if (u8 == 16) { // nop } else if (u8 > 0) { v0 = fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); - VSLIDEUP_VI(v0, 16 - u8, q0, VECTOR_UNMASKED); + VSLIDEUP_VI(v0, q0, 16 - u8, VECTOR_UNMASKED); if (rv64_vlen >= 256) { // clear high bits before slidedown! d0 = fpu_get_scratch(dyn); @@ -658,8 +658,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_V_V(d0, q1); q1 = d0; } - VSLIDEDOWN_VI(v1, u8, q1, VECTOR_UNMASKED); - VOR_VV(q0, v0, v1, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, u8, VECTOR_UNMASKED); + VOR_VV(q0, v1, v0, VECTOR_UNMASKED); } else { if (q0 != q1) VMV_V_V(q0, q1); } @@ -675,8 +675,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q0, 0, 0, VECTOR_SEW64); v0 = fpu_get_scratch(dyn); ADDI(x4, xZR, 63); - VSRL_VX(v0, x4, q0, VECTOR_UNMASKED); - VMSNE_VX(VMASK, xZR, v0, VECTOR_UNMASKED); + VSRL_VX(v0, q0, x4, VECTOR_UNMASKED); + VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); VMV_X_S(gd, VMASK); ANDI(gd, gd, 0b11); break; @@ -689,7 +689,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (!box64_dynarec_fastnan) { v0 = fpu_get_scratch(dyn); VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); - VMFLT_VV(VMASK, v0, q1, VECTOR_UNMASKED); + VMFLT_VV(VMASK, q1, v0, VECTOR_UNMASKED); } VFSQRT_V(q0, q1, VECTOR_UNMASKED); if (!box64_dynarec_fastnan) { @@ -702,7 +702,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VAND_VV(q0, q0, q1, VECTOR_UNMASKED); + VAND_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0x55: INST_NAME("ANDNPD Gx, Ex"); @@ -710,8 +710,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VXOR_VI(q0, 0x1F, q0, VECTOR_UNMASKED); - VAND_VV(q0, q0, q1, VECTOR_UNMASKED); + VXOR_VI(q0, q0, 0x1F, VECTOR_UNMASKED); + VAND_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0x56: INST_NAME("ORPD Gx, Ex"); @@ -725,7 +725,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } else { GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VOR_VV(q0, q0, q1, VECTOR_UNMASKED); + VOR_VV(q0, q1, q0, VECTOR_UNMASKED); } break; case 0x57: @@ -734,7 +734,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VXOR_VV(q0, q0, q1, VECTOR_UNMASKED); + VXOR_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0x58: INST_NAME("ADDPD Gx, Ex"); @@ -747,12 +747,12 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); } - VFADD_VV(q0, q1, q0, VECTOR_UNMASKED); + VFADD_VV(q0, q0, q1, VECTOR_UNMASKED); if (!box64_dynarec_fastnan) { - VMAND_MM(VMASK, VMASK, v0); + VMAND_MM(VMASK, v0, VMASK); VMFEQ_VV(v0, q0, q0, VECTOR_UNMASKED); - VXOR_VI(v0, 0x1F, v0, VECTOR_UNMASKED); - VMAND_MM(VMASK, VMASK, v0); + VXOR_VI(v0, v0, 0x1F, VECTOR_UNMASKED); + VMAND_MM(VMASK, v0, VMASK); VFSGNJN_VV(q0, q0, q0, VECTOR_MASKED); } break; @@ -767,12 +767,12 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); } - VFMUL_VV(q0, q1, q0, VECTOR_UNMASKED); + VFMUL_VV(q0, q0, q1, VECTOR_UNMASKED); if (!box64_dynarec_fastnan) { - VMAND_MM(VMASK, VMASK, v0); + VMAND_MM(VMASK, v0, VMASK); VMFEQ_VV(v0, q0, q0, VECTOR_UNMASKED); - VXOR_VI(v0, 0x1F, v0, VECTOR_UNMASKED); - VMAND_MM(VMASK, VMASK, v0); + VXOR_VI(v0, v0, 0x1F, VECTOR_UNMASKED); + VMAND_MM(VMASK, v0, VMASK); VFSGNJN_VV(q0, q0, q0, VECTOR_MASKED); } break; @@ -787,12 +787,12 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); } - VFSUB_VV(q0, q1, q0, VECTOR_UNMASKED); + VFSUB_VV(q0, q0, q1, VECTOR_UNMASKED); if (!box64_dynarec_fastnan) { - VMAND_MM(VMASK, VMASK, v0); + VMAND_MM(VMASK, v0, VMASK); VMFEQ_VV(v0, q0, q0, VECTOR_UNMASKED); - VXOR_VI(v0, 0x1F, v0, VECTOR_UNMASKED); - VMAND_MM(VMASK, VMASK, v0); + VXOR_VI(v0, v0, 0x1F, VECTOR_UNMASKED); + VMAND_MM(VMASK, v0, VMASK); VFSGNJN_VV(q0, q0, q0, VECTOR_MASKED); } break; @@ -805,10 +805,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v0 = fpu_get_scratch(dyn); VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); - VFMIN_VV(q0, q1, q0, VECTOR_UNMASKED); - VMAND_MM(VMASK, VMASK, v0); - VXOR_VI(VMASK, 0x1F, VMASK, VECTOR_UNMASKED); - VADD_VX(q0, xZR, q1, VECTOR_MASKED); + VFMIN_VV(q0, q0, q1, VECTOR_UNMASKED); + VMAND_MM(VMASK, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VADD_VX(q0, q1, xZR, VECTOR_MASKED); break; case 0x5E: INST_NAME("DIVPD Gx, Ex"); @@ -821,12 +821,12 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); } - VFDIV_VV(q0, q1, q0, VECTOR_UNMASKED); + VFDIV_VV(q0, q0, q1, VECTOR_UNMASKED); if (!box64_dynarec_fastnan) { - VMAND_MM(VMASK, VMASK, v0); + VMAND_MM(VMASK, v0, VMASK); VMFEQ_VV(v0, q0, q0, VECTOR_UNMASKED); - VXOR_VI(v0, 0x1F, v0, VECTOR_UNMASKED); - VMAND_MM(VMASK, VMASK, v0); + VXOR_VI(v0, v0, 0x1F, VECTOR_UNMASKED); + VMAND_MM(VMASK, v0, VMASK); VFSGNJN_VV(q0, q0, q0, VECTOR_MASKED); } break; @@ -839,10 +839,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v0 = fpu_get_scratch(dyn); VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); - VFMAX_VV(q0, q1, q0, VECTOR_UNMASKED); - VMAND_MM(VMASK, VMASK, v0); - VXOR_VI(VMASK, 0x1F, VMASK, VECTOR_UNMASKED); - VADD_VX(q0, xZR, q1, VECTOR_MASKED); + VFMAX_VV(q0, q0, q1, VECTOR_UNMASKED); + VMAND_MM(VMASK, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VADD_VX(q0, q1, xZR, VECTOR_MASKED); break; case 0x60: INST_NAME("PUNPCKLBW Gx, Ex"); @@ -856,9 +856,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW8); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - VRGATHER_VV(d0, v0, q0, VECTOR_UNMASKED); - VRGATHER_VV(d1, v0, q1, VECTOR_UNMASKED); - VMERGE_VVM(q0, d1, d0); + VRGATHER_VV(d0, q0, v0, VECTOR_UNMASKED); + VRGATHER_VV(d1, q1, v0, VECTOR_UNMASKED); + VMERGE_VVM(q0, d0, d1); break; case 0x61: INST_NAME("PUNPCKLWD Gx, Ex"); @@ -872,9 +872,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW16); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - VRGATHER_VV(d0, v0, q0, VECTOR_UNMASKED); - VRGATHER_VV(d1, v0, q1, VECTOR_UNMASKED); - VMERGE_VVM(q0, d1, d0); + VRGATHER_VV(d0, q0, v0, VECTOR_UNMASKED); + VRGATHER_VV(d1, q1, v0, VECTOR_UNMASKED); + VMERGE_VVM(q0, d0, d1); break; case 0x62: INST_NAME("PUNPCKLDQ Gx, Ex"); @@ -888,9 +888,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW32); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - VRGATHER_VV(d0, v0, q0, VECTOR_UNMASKED); - VRGATHER_VV(d1, v0, q1, VECTOR_UNMASKED); - VMERGE_VVM(q0, d1, d0); + VRGATHER_VV(d0, q0, v0, VECTOR_UNMASKED); + VRGATHER_VV(d1, q1, v0, VECTOR_UNMASKED); + VMERGE_VVM(q0, d0, d1); break; case 0x63: INST_NAME("PACKSSWB Gx, Ex"); @@ -903,13 +903,13 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (rv64_vlen >= 256) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 2); // double the vl for slideup. VMV_V_V(d0, q0); - VSLIDEUP_VI(d0, 8, q1, VECTOR_UNMASKED); // splice q0 and q1 here! + VSLIDEUP_VI(d0, q1, 8, VECTOR_UNMASKED); // splice q0 and q1 here! } else { VMV_V_V(d0, q0); VMV_V_V(d1, q1); } SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - VNCLIP_WI(q0, 0, d0, VECTOR_UNMASKED); + VNCLIP_WI(q0, d0, 0, VECTOR_UNMASKED); break; case 0x64 ... 0x66: if (opcode == 0x64) { @@ -926,10 +926,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, u8, 1); GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VMSLT_VV(VMASK, q0, q1, VECTOR_UNMASKED); + VMSLT_VV(VMASK, q1, q0, VECTOR_UNMASKED); VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); - VMERGE_VIM(q0, 1, q0); // implies vmask and widened it - VRSUB_VX(q0, xZR, q0, VECTOR_UNMASKED); + VMERGE_VIM(q0, q0, 1); // implies vmask and widened it + VRSUB_VX(q0, q0, xZR, VECTOR_UNMASKED); break; case 0x67: INST_NAME("PACKUSWB Gx, Ex"); @@ -943,18 +943,18 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 2); // double the vl for slideup. if (q0 == q1) { VMV_V_V(d0, q0); - VSLIDEUP_VI(d0, 8, q1, VECTOR_UNMASKED); // splice q0 and q1 here! - VMAX_VX(d0, xZR, d0, VECTOR_UNMASKED); + VSLIDEUP_VI(d0, q1, 8, VECTOR_UNMASKED); // splice q0 and q1 here! + VMAX_VX(d0, d0, xZR, VECTOR_UNMASKED); } else { - VSLIDEUP_VI(q0, 8, q1, VECTOR_UNMASKED); // splice q0 and q1 here! - VMAX_VX(d0, xZR, q0, VECTOR_UNMASKED); + VSLIDEUP_VI(q0, q1, 8, VECTOR_UNMASKED); // splice q0 and q1 here! + VMAX_VX(d0, q0, xZR, VECTOR_UNMASKED); } } else { - VMAX_VX(d0, xZR, q0, VECTOR_UNMASKED); - VMAX_VX(d1, xZR, q1, VECTOR_UNMASKED); + VMAX_VX(d0, q0, xZR, VECTOR_UNMASKED); + VMAX_VX(d1, q1, xZR, VECTOR_UNMASKED); } SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - VNCLIPU_WI(q0, 0, d0, VECTOR_UNMASKED); + VNCLIPU_WI(q0, d0, 0, VECTOR_UNMASKED); break; case 0x68 ... 0x6A: if (opcode == 0x68) { @@ -965,7 +965,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_V_X(VMASK, x1); // VMASK = 0b1010101010101010 v0 = fpu_get_scratch(dyn); VIOTA_M(v0, VMASK, VECTOR_UNMASKED); - VADD_VI(v0, 8, v0, VECTOR_UNMASKED); // v0 = 15 15 14 14 13 13 12 12 11 11 10 10 9 9 8 8 + VADD_VI(v0, v0, 8, VECTOR_UNMASKED); // v0 = 15 15 14 14 13 13 12 12 11 11 10 10 9 9 8 8 } else if (opcode == 0x69) { INST_NAME("PUNPCKHWD Gx, Ex"); nextop = F8; @@ -974,7 +974,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_V_X(VMASK, x1); // VMASK = 0b10101010 v0 = fpu_get_scratch(dyn); VIOTA_M(v0, VMASK, VECTOR_UNMASKED); - VADD_VI(v0, 4, v0, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4 + VADD_VI(v0, v0, 4, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4 } else { INST_NAME("PUNPCKHDQ Gx, Ex"); nextop = F8; @@ -982,15 +982,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_V_I(VMASK, 0b1010); v0 = fpu_get_scratch(dyn); VIOTA_M(v0, VMASK, VECTOR_UNMASKED); - VADD_VI(v0, 2, v0, VECTOR_UNMASKED); // v0 = 3 3 2 2 + VADD_VI(v0, v0, 2, VECTOR_UNMASKED); // v0 = 3 3 2 2 } GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - VRGATHER_VV(d0, v0, q0, VECTOR_UNMASKED); - VRGATHER_VV(d1, v0, q1, VECTOR_UNMASKED); - VMERGE_VVM(q0, d1, d0); + VRGATHER_VV(d0, q0, v0, VECTOR_UNMASKED); + VRGATHER_VV(d1, q1, v0, VECTOR_UNMASKED); + VMERGE_VVM(q0, d0, d1); break; case 0x6B: INST_NAME("PACKSSDW Gx, Ex"); @@ -1003,13 +1003,13 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (rv64_vlen >= 256) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 2); // double the vl for slideup. VMV_V_V(d0, q0); - VSLIDEUP_VI(d0, 4, q1, VECTOR_UNMASKED); // splice q0 and q1 here! + VSLIDEUP_VI(d0, q1, 4, VECTOR_UNMASKED); // splice q0 and q1 here! } else { VMV_V_V(d0, q0); VMV_V_V(d1, q1); } SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); - VNCLIP_WI(q0, 0, d0, VECTOR_UNMASKED); + VNCLIP_WI(q0, d0, 0, VECTOR_UNMASKED); break; case 0x6C: INST_NAME("PUNPCKLQDQ Gx, Ex"); @@ -1025,14 +1025,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = fpu_get_scratch(dyn); VMV_V_V(v1, v0); } - VSLIDEUP_VI(v0, 1, v1, VECTOR_UNMASKED); + VSLIDEUP_VI(v0, v1, 1, VECTOR_UNMASKED); } else { q0 = fpu_get_scratch(dyn); VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); VMV_V_I(VMASK, 0b10); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); - VLUXEI64_V(v0, ed, q0, VECTOR_MASKED, VECTOR_NFIELD1); + VLUXEI64_V(v0, q0, ed, VECTOR_MASKED, VECTOR_NFIELD1); } break; case 0x6D: @@ -1045,14 +1045,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 == fpu_get_scratch(dyn); - VSLIDE1DOWN_VX(q0, xZR, v0, VECTOR_UNMASKED); + VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); VMV_X_S(x4, q0); if (v0 != v1) { VMV_V_V(v0, v1); } VMV_S_X(v0, x4); } else { q0 = fpu_get_scratch(dyn); VMV_V_I(VMASK, 0b10); - VSLIDE1DOWN_VX(v0, xZR, v0, VECTOR_UNMASKED); + VSLIDE1DOWN_VX(v0, v0, xZR, VECTOR_UNMASKED); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 0); VLE64_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1); @@ -1070,7 +1070,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMV_V_I(VMASK, 1); - VMERGE_VXM(v0, ed, v0); + VMERGE_VXM(v0, v0, ed); break; case 0x6F: INST_NAME("MOVDQA Gx, Ex"); @@ -1100,10 +1100,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); if (q0 == q1) { v1 = fpu_get_scratch(dyn); - VRGATHEREI16_VV(v1, v0, q1, VECTOR_UNMASKED); + VRGATHEREI16_VV(v1, q1, v0, VECTOR_UNMASKED); VMV_V_V(q0, v1); } else { - VRGATHEREI16_VV(q0, v0, q1, VECTOR_UNMASKED); + VRGATHEREI16_VV(q0, q1, v0, VECTOR_UNMASKED); } break; case 0x71: @@ -1118,7 +1118,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (u8 > 15) { VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else { - VSRL_VI(q0, u8, q0, VECTOR_UNMASKED); + VSRL_VI(q0, q0, u8, VECTOR_UNMASKED); } PUTEX_vector(q0, VECTOR_SEW16); } @@ -1130,7 +1130,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i u8 = F8; if (u8 > 15) u8 = 15; if (u8) { - VSRA_VI(q0, u8, q0, VECTOR_UNMASKED); + VSRA_VI(q0, q0, u8, VECTOR_UNMASKED); } PUTEX_vector(q0, VECTOR_SEW16); break; @@ -1143,7 +1143,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (u8 > 15) { VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else { - VSLL_VI(q0, u8, q0, VECTOR_UNMASKED); + VSLL_VI(q0, q0, u8, VECTOR_UNMASKED); } PUTEX_vector(q0, VECTOR_SEW16); } @@ -1164,7 +1164,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (u8 > 31) { VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else if (u8) { - VSRL_VI(q0, u8, q0, VECTOR_UNMASKED); + VSRL_VI(q0, q0, u8, VECTOR_UNMASKED); } PUTEX_vector(q0, VECTOR_SEW32); } @@ -1176,7 +1176,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i u8 = F8; if (u8 > 31) u8 = 31; if (u8) { - VSRA_VI(q0, u8, q0, VECTOR_UNMASKED); + VSRA_VI(q0, q0, u8, VECTOR_UNMASKED); } PUTEX_vector(q0, VECTOR_SEW32); break; @@ -1189,7 +1189,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (u8 > 31) { VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else { - VSLL_VI(q0, u8, q0, VECTOR_UNMASKED); + VSLL_VI(q0, q0, u8, VECTOR_UNMASKED); } PUTEX_vector(q0, VECTOR_SEW32); } @@ -1211,7 +1211,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else { MOV64x(x4, u8); - VSRL_VX(q0, x4, q0, VECTOR_UNMASKED); + VSRL_VX(q0, q0, x4, VECTOR_UNMASKED); } PUTEX_vector(q0, VECTOR_SEW64); } @@ -1233,7 +1233,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 1); } VMV_V_V(d0, q0); - VSLIDEDOWN_VI(q0, u8, d0, VECTOR_UNMASKED); + VSLIDEDOWN_VI(q0, d0, u8, VECTOR_UNMASKED); } PUTEX_vector(q0, VECTOR_SEW8); break; @@ -1247,7 +1247,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else { MOV64x(x4, u8); - VSLL_VX(q0, x4, q0, VECTOR_UNMASKED); + VSLL_VX(q0, q0, x4, VECTOR_UNMASKED); } PUTEX_vector(q0, VECTOR_SEW64); } @@ -1264,7 +1264,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } else { d0 = fpu_get_scratch(dyn); VXOR_VV(d0, d0, d0, VECTOR_UNMASKED); - VSLIDEUP_VI(d0, u8, q0, VECTOR_UNMASKED); + VSLIDEUP_VI(d0, q0, u8, VECTOR_UNMASKED); if (MODREG) { VMV_V_V(q0, d0); } else { @@ -1290,10 +1290,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, u8, 1); GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VMSEQ_VV(VMASK, q0, q1, VECTOR_UNMASKED); + VMSEQ_VV(VMASK, q1, q0, VECTOR_UNMASKED); VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); - VMERGE_VIM(q0, 1, q0); // implies vmask and widened it - VRSUB_VX(q0, xZR, q0, VECTOR_UNMASKED); + VMERGE_VIM(q0, q0, 1); // implies vmask and widened it + VRSUB_VX(q0, q0, xZR, VECTOR_UNMASKED); break; case 0x7E: return 0; @@ -1333,7 +1333,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i ADDI(x5, xZR, 1 << u8); VMV_S_X(VMASK, x5); v0 = fpu_get_scratch(dyn); - VMERGE_VXM(v0, ed, q0); // uses VMASK + VMERGE_VXM(v0, q0, ed); // uses VMASK VMV_V_V(q0, v0); break; case 0xC5: @@ -1345,7 +1345,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q0, 0, 1, VECTOR_SEW16); u8 = (F8) & 7; v0 = fpu_get_scratch(dyn); - VSLIDEDOWN_VI(v0, u8, q0, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v0, q0, u8, VECTOR_UNMASKED); VMV_X_S(gd, v0); ZEXTH(gd, gd); } else { @@ -1390,7 +1390,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); B_NEXT_nocond; MARK; - VSRL_VX(q0, x4, q0, VECTOR_UNMASKED); + VSRL_VX(q0, q0, x4, VECTOR_UNMASKED); break; case 0xD4: INST_NAME("PADDQ Gx, Ex"); @@ -1398,7 +1398,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(q0, 1, VECTOR_SEW64); GETEX_vector(q1, 0, 0, VECTOR_SEW64); - VADD_VV(q0, q0, q1, VECTOR_UNMASKED); + VADD_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xD5: INST_NAME("PMULLW Gx, Ex"); @@ -1407,7 +1407,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); - VMUL_VV(q0, q0, q1, VECTOR_UNMASKED); + VMUL_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xD6: INST_NAME("MOVQ Ex, Gx"); @@ -1433,8 +1433,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW8); v0 = fpu_get_scratch(dyn); - VSRL_VI(v0, 7, q0, VECTOR_UNMASKED); - VMSNE_VX(VMASK, xZR, v0, VECTOR_UNMASKED); + VSRL_VI(v0, q0, 7, VECTOR_UNMASKED); + VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VMV_X_S(gd, VMASK); ZEXTH(gd, gd); @@ -1452,7 +1452,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, u8, 1); GETGX_vector(q0, 1, u8); GETEX_vector(q1, 0, 0, u8); - VSSUBU_VV(q0, q1, q0, VECTOR_UNMASKED); + VSSUBU_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xDA: INST_NAME("PMINUB Gx, Ex"); @@ -1460,7 +1460,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGX_vector(q0, 1, VECTOR_SEW8); GETEX_vector(q1, 0, 0, VECTOR_SEW8); - VMINU_VV(q0, q0, q1, VECTOR_UNMASKED); + VMINU_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xDB: INST_NAME("PAND Gx, Ex"); @@ -1468,7 +1468,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VAND_VV(q0, q0, q1, VECTOR_UNMASKED); + VAND_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xDC: case 0xDD: @@ -1483,7 +1483,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, u8, 1); GETGX_vector(q0, 1, u8); GETEX_vector(q1, 0, 0, u8); - VSADDU_VV(q0, q1, q0, VECTOR_UNMASKED); + VSADDU_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xDE: INST_NAME("PMAXUB Gx, Ex"); @@ -1491,7 +1491,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGX_vector(q0, 1, VECTOR_SEW8); GETEX_vector(q1, 0, 0, VECTOR_SEW8); - VMAXU_VV(q0, q0, q1, VECTOR_UNMASKED); + VMAXU_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xDF: INST_NAME("PANDN Gx, Ex"); @@ -1499,8 +1499,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VXOR_VI(q0, 0x1F, q0, VECTOR_UNMASKED); - VAND_VV(q0, q0, q1, VECTOR_UNMASKED); + VXOR_VI(q0, q0, 0x1F, VECTOR_UNMASKED); + VAND_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xE0: INST_NAME("PAVGB Gx, Ex"); @@ -1509,7 +1509,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_vector(q0, 1, VECTOR_SEW8); GETEX_vector(q1, 0, 0, VECTOR_SEW8); CSRRWI(xZR, 0b00 /* rnu */, 0x00A /* vxrm */); - VAADDU_VV(q0, q1, q0, VECTOR_UNMASKED); + VAADDU_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xE1: INST_NAME("PSRAW Gx, Ex"); @@ -1527,10 +1527,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } v1 = fpu_get_scratch(dyn); ADDI(x4, xZR, 15); - VMINU_VX(v1, x4, q1, VECTOR_MASKED); + VMINU_VX(v1, q1, x4, VECTOR_MASKED); VMV_X_S(x4, v1); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); - VSRA_VX(q0, x4, q0, VECTOR_UNMASKED); + VSRA_VX(q0, q0, x4, VECTOR_UNMASKED); break; case 0xE2: INST_NAME("PSRAD Gx, Ex"); @@ -1548,10 +1548,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } v1 = fpu_get_scratch(dyn); ADDI(x4, xZR, 31); - VMINU_VX(v1, x4, q1, VECTOR_MASKED); + VMINU_VX(v1, q1, x4, VECTOR_MASKED); VMV_X_S(x4, v1); SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); - VSRA_VX(q0, x4, q0, VECTOR_UNMASKED); + VSRA_VX(q0, q0, x4, VECTOR_UNMASKED); break; case 0xE3: INST_NAME("PAVGW Gx, Ex"); @@ -1560,7 +1560,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); CSRRWI(xZR, 0b00 /* rnu */, 0x00A /* vxrm */); - VAADDU_VV(q0, q1, q0, VECTOR_UNMASKED); + VAADDU_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xE4: INST_NAME("PMULHUW Gx, Ex"); @@ -1568,7 +1568,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); - VMULHU_VV(q0, q1, q0, VECTOR_UNMASKED); + VMULHU_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xE5: INST_NAME("PMULHW Gx, Ex"); @@ -1576,7 +1576,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); - VMULH_VV(q0, q1, q0, VECTOR_UNMASKED); + VMULH_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xE8: INST_NAME("PSUBSB Gx, Ex"); @@ -1584,7 +1584,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGX_vector(q0, 1, VECTOR_SEW8); GETEX_vector(q1, 0, 0, VECTOR_SEW8); - VSSUB_VV(q0, q1, q0, VECTOR_UNMASKED); + VSSUB_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xE9: INST_NAME("PSUBSW Gx, Ex"); @@ -1592,7 +1592,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); - VSSUB_VV(q0, q1, q0, VECTOR_UNMASKED); + VSSUB_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xEA: INST_NAME("PMINSW Gx, Ex"); @@ -1600,7 +1600,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); - VMIN_VV(q0, q0, q1, VECTOR_UNMASKED); + VMIN_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xEB: INST_NAME("POR Gx, Ex"); @@ -1608,7 +1608,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VOR_VV(q0, q0, q1, VECTOR_UNMASKED); + VOR_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xEC: INST_NAME("PADDSB Gx, Ex"); @@ -1616,7 +1616,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGX_vector(q0, 1, VECTOR_SEW8); GETEX_vector(q1, 0, 0, VECTOR_SEW8); - VSADD_VV(q0, q1, q0, VECTOR_UNMASKED); + VSADD_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xED: INST_NAME("PADDSW Gx, Ex"); @@ -1624,7 +1624,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); - VSADD_VV(q0, q1, q0, VECTOR_UNMASKED); + VSADD_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xEE: INST_NAME("PMAXSW Gx, Ex"); @@ -1632,7 +1632,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); - VMAX_VV(q0, q0, q1, VECTOR_UNMASKED); + VMAX_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xEF: INST_NAME("PXOR Gx, Ex"); @@ -1647,7 +1647,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VXOR_VV(q0, q0, q1, VECTOR_UNMASKED); + VXOR_VV(q0, q1, q0, VECTOR_UNMASKED); } break; case 0xF1: @@ -1685,7 +1685,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); B_NEXT_nocond; MARK; - VSLL_VX(q0, x4, q0, VECTOR_UNMASKED); + VSLL_VX(q0, q0, x4, VECTOR_UNMASKED); break; case 0xF5: INST_NAME("PMADDWD Gx, Ex"); @@ -1695,18 +1695,18 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW16); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); - VWMUL_VV(v0, q0, q1, VECTOR_UNMASKED); + VWMUL_VV(v0, q1, q0, VECTOR_UNMASKED); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // warning, no more scratches! ADDI(x4, xZR, 6); VID_V(d0, VECTOR_UNMASKED); - VSLL_VI(d0, 1, d0, VECTOR_UNMASKED); // times 2 - VMIN_VX(d0, x4, d0, VECTOR_UNMASKED); - VADD_VI(q0, 1, d0, VECTOR_UNMASKED); + VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2 + VMIN_VX(d0, d0, x4, VECTOR_UNMASKED); + VADD_VI(q0, d0, 1, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2); - VRGATHEREI16_VV(v1, d0, v0, VECTOR_UNMASKED); // 6 4 2 0 - VRGATHEREI16_VV(d0, q0, v0, VECTOR_UNMASKED); // 7 5 3 1 + VRGATHEREI16_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0 + VRGATHEREI16_VV(d0, v0, q0, VECTOR_UNMASKED); // 7 5 3 1 SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); - VADD_VV(q0, d0, v1, VECTOR_UNMASKED); + VADD_VV(q0, v1, d0, VECTOR_UNMASKED); break; case 0xF6: INST_NAME("PSADBW Gx, Ex"); @@ -1717,20 +1717,20 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! - VWSUBU_VV(v0, q1, q0, VECTOR_UNMASKED); + VWSUBU_VV(v0, q0, q1, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); - VSRA_VI(v1, 15, v0, VECTOR_UNMASKED); - VXOR_VV(v0, v0, v1, VECTOR_UNMASKED); - VSUB_VV(v1, v1, v0, VECTOR_UNMASKED); + VSRA_VI(v1, v0, 15, VECTOR_UNMASKED); + VXOR_VV(v0, v1, v0, VECTOR_UNMASKED); + VSUB_VV(v1, v0, v1, VECTOR_UNMASKED); ADDI(x4, xZR, 0xFF); VXOR_VV(VMASK, VMASK, VMASK, VECTOR_UNMASKED); VMV_S_X(VMASK, x4); VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); - VREDSUM_VS(v0, v0, v1, VECTOR_MASKED); // sum low 64 - VSLIDEDOWN_VI(d0, 8, v1, VECTOR_UNMASKED); + VREDSUM_VS(v0, v1, v0, VECTOR_MASKED); // sum low 64 + VSLIDEDOWN_VI(d0, v1, 8, VECTOR_UNMASKED); VXOR_VV(v1, v1, v1, VECTOR_UNMASKED); - VREDSUM_VS(v1, v1, d0, VECTOR_MASKED); // sum high 64 - VSLIDEUP_VI(v0, 4, v1, VECTOR_UNMASKED); + VREDSUM_VS(v1, d0, v1, VECTOR_MASKED); // sum high 64 + VSLIDEUP_VI(v0, v1, 4, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 1); VMV_V_V(q0, v0); break; @@ -1752,7 +1752,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, u8, 1); GETGX_vector(q0, 1, u8); GETEX_vector(q1, 0, 0, u8); - VSUB_VV(q0, q1, q0, VECTOR_UNMASKED); + VSUB_VV(q0, q0, q1, VECTOR_UNMASKED); break; case 0xFC ... 0xFE: nextop = F8; @@ -1769,7 +1769,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, u8, 1); GETGX_vector(q0, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, dyn->vector_eew); - VADD_VV(q0, q0, q1, VECTOR_UNMASKED); + VADD_VV(q0, q1, q0, VECTOR_UNMASKED); break; default: DEFAULT_VECTOR; diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 7c927e06..e34c6487 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -1248,8 +1248,6 @@ f28–31 ft8–11 FP temporaries Caller #define VECTOR_NFIELD7 0b110 #define VECTOR_NFIELD8 0b111 -#define VECTOR_MASKREG 0 // fixed to v0 - // configuration setting // https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc #define VSETIVLI(rd, zimm, zimm10) EMIT(I_type(0b110000000000 | (zimm10), zimm, 0b111, rd, 0b1010111)) // 11...............111.....1010111 @@ -1280,38 +1278,38 @@ f28–31 ft8–11 FP temporaries Caller // Vector Indexed-Unordered Instructions (including segment part) // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions -#define VLUXEI8_V(vd, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111 -#define VLUXEI16_V(vd, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111 -#define VLUXEI32_V(vd, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111 -#define VLUXEI64_V(vd, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111 -#define VSUXEI8_V(vs3, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111 -#define VSUXEI16_V(vs3, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111 -#define VSUXEI32_V(vs3, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111 -#define VSUXEI64_V(vs3, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111 +#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111 +#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111 +#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111 +#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111 +#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111 +#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111 +#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111 +#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0010, vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111 // Vector Strided Instructions (including segment part) // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#75-vector-strided-instructions -#define VLSE8_V(vd, rs1, rs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b000, vd, 0b0000111)) // ...010...........000.....0000111 -#define VLSE16_V(vd, rs1, rs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b101, vd, 0b0000111)) // ...010...........101.....0000111 -#define VLSE32_V(vd, rs1, rs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b110, vd, 0b0000111)) // ...010...........110.....0000111 -#define VLSE64_V(vd, rs1, rs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b111, vd, 0b0000111)) // ...010...........111.....0000111 -#define VSSE8_V(vs3, rs1, rs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b000, vs3, 0b0100111)) // ...010...........000.....0100111 -#define VSSE16_V(vs3, rs1, rs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b101, vs3, 0b0100111)) // ...010...........101.....0100111 -#define VSSE32_V(vs3, rs1, rs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b110, vs3, 0b0100111)) // ...010...........110.....0100111 -#define VSSE64_V(vs3, rs1, rs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b111, vs3, 0b0100111)) // ...010...........111.....0100111 +#define VLSE8_V(vd, rs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b000, vd, 0b0000111)) // ...010...........000.....0000111 +#define VLSE16_V(vd, rs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b101, vd, 0b0000111)) // ...010...........101.....0000111 +#define VLSE32_V(vd, rs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b110, vd, 0b0000111)) // ...010...........110.....0000111 +#define VLSE64_V(vd, rs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b111, vd, 0b0000111)) // ...010...........111.....0000111 +#define VSSE8_V(vs3, rs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b000, vs3, 0b0100111)) // ...010...........000.....0100111 +#define VSSE16_V(vs3, rs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b101, vs3, 0b0100111)) // ...010...........101.....0100111 +#define VSSE32_V(vs3, rs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b110, vs3, 0b0100111)) // ...010...........110.....0100111 +#define VSSE64_V(vs3, rs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0100, rs2, rs1, 0b111, vs3, 0b0100111)) // ...010...........111.....0100111 // Vector Indexed-Ordered Instructions (including segment part) // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions -#define VLOXEI8_V(vd, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b000, vd, 0b0000111)) // ...011...........000.....0000111 -#define VLOXEI16_V(vd, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b101, vd, 0b0000111)) // ...011...........101.....0000111 -#define VLOXEI32_V(vd, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b110, vd, 0b0000111)) // ...011...........110.....0000111 -#define VLOXEI64_V(vd, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b111, vd, 0b0000111)) // ...011...........111.....0000111 -#define VSOXEI8_V(vs3, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b000, vs3, 0b0100111)) // ...011...........000.....0100111 -#define VSOXEI16_V(vs3, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b101, vs3, 0b0100111)) // ...011...........101.....0100111 -#define VSOXEI32_V(vs3, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b110, vs3, 0b0100111)) // ...011...........110.....0100111 -#define VSOXEI64_V(vs3, rs1, vs2, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b111, vs3, 0b0100111)) // ...011...........111.....0100111 +#define VLOXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b000, vd, 0b0000111)) // ...011...........000.....0000111 +#define VLOXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b101, vd, 0b0000111)) // ...011...........101.....0000111 +#define VLOXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b110, vd, 0b0000111)) // ...011...........110.....0000111 +#define VLOXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b111, vd, 0b0000111)) // ...011...........111.....0000111 +#define VSOXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b000, vs3, 0b0100111)) // ...011...........000.....0100111 +#define VSOXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b101, vs3, 0b0100111)) // ...011...........101.....0100111 +#define VSOXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b110, vs3, 0b0100111)) // ...011...........110.....0100111 +#define VSOXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | 0b0110, vs2, rs1, 0b111, vs3, 0b0100111)) // ...011...........111.....0100111 // Unit-stride F31..29=0ault-Only-First Loads // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#77-unit-stride-fault-only-first-loads @@ -1349,78 +1347,78 @@ f28–31 ft8–11 FP temporaries Caller // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#14-vector-floating-point-instructions // OPFVF -#define VFADD_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0000000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 000000...........101.....1010111 -#define VFSUB_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0000100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 000010...........101.....1010111 -#define VFMIN_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0001000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 000100...........101.....1010111 -#define VFMAX_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0001100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 000110...........101.....1010111 -#define VFSGNJ_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0010000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001000...........101.....1010111 -#define VFSGNJN_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0010010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001001...........101.....1010111 -#define VFSGNJX_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0010100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001010...........101.....1010111 -#define VFSLIDE1UP_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001110...........101.....1010111 -#define VFSLIDE1DOWN_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001111...........101.....1010111 +#define VFADD_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0000000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 000000...........101.....1010111 +#define VFSUB_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0000100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 000010...........101.....1010111 +#define VFMIN_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0001000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 000100...........101.....1010111 +#define VFMAX_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0001100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 000110...........101.....1010111 +#define VFSGNJ_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001000...........101.....1010111 +#define VFSGNJN_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001001...........101.....1010111 +#define VFSGNJX_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001010...........101.....1010111 +#define VFSLIDE1UP_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001110...........101.....1010111 +#define VFSLIDE1DOWN_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001111...........101.....1010111 #define VFMV_S_F(vd, rs1) EMIT(I_type(0b010000100000, rs1, 0b101, vd, 0b1010111)) // 010000100000.....101.....1010111 #define VFMV_V_F(vd, rs1) EMIT(I_type(0b010111100000, rs1, 0b101, vd, 0b1010111)) // 010111100000.....101.....1010111 -#define VFMERGE_VFM(vd, rs1, vs2) EMIT(R_type(0b0101110, vs2, rs1, 0b101, vd, 0b1010111)) // 0101110..........101.....1010111 - -#define VMFEQ_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0110000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011000...........101.....1010111 -#define VMFLE_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0110010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011001...........101.....1010111 -#define VMFLT_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0110110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011011...........101.....1010111 -#define VMFNE_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0111000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011100...........101.....1010111 -#define VMFGT_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0111010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011101...........101.....1010111 -#define VMFGE_VF(vd, rs1, vs2, vm) EMIT(R_type(0b0111110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011111...........101.....1010111 -#define VFDIV_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 100000...........101.....1010111 -#define VFRDIV_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 100001...........101.....1010111 -#define VFMUL_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1001000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 100100...........101.....1010111 -#define VFRSUB_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1001110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 100111...........101.....1010111 -#define VFMADD_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1010000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101000...........101.....1010111 -#define VFNMADD_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1010010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101001...........101.....1010111 -#define VFMSUB_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1010100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101010...........101.....1010111 -#define VFNMSUB_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1010110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101011...........101.....1010111 -#define VFMACC_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1011000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101100...........101.....1010111 -#define VFNMACC_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1011010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101101...........101.....1010111 -#define VFMSAC_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1011100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101110...........101.....1010111 -#define VFNMSAC_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1011110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101111...........101.....1010111 -#define VFWADD_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1100000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 110000...........101.....1010111 -#define VFWSUB_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1100100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 110010...........101.....1010111 -#define VFWADD_WF(vd, rs1, vs2, vm) EMIT(R_type(0b1101000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 110100...........101.....1010111 -#define VFWSUB_WF(vd, rs1, vs2, vm) EMIT(R_type(0b1101100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 110110...........101.....1010111 -#define VFWMUL_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1110000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111000...........101.....1010111 -#define VFWMACC_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1111000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111100...........101.....1010111 -#define VFWNMACC_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1111010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111101...........101.....1010111 -#define VFWMSAC_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1111100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111110...........101.....1010111 -#define VFWNMSAC_VF(vd, rs1, vs2, vm) EMIT(R_type(0b1111110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111111...........101.....1010111 +#define VFMERGE_VFM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b101, vd, 0b1010111)) // 0101110..........101.....1010111 + +#define VMFEQ_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0110000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011000...........101.....1010111 +#define VMFLE_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0110010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011001...........101.....1010111 +#define VMFLT_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0110110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011011...........101.....1010111 +#define VMFNE_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0111000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011100...........101.....1010111 +#define VMFGT_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0111010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011101...........101.....1010111 +#define VMFGE_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0111110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 011111...........101.....1010111 +#define VFDIV_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 100000...........101.....1010111 +#define VFRDIV_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 100001...........101.....1010111 +#define VFMUL_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1001000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 100100...........101.....1010111 +#define VFRSUB_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1001110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 100111...........101.....1010111 +#define VFMADD_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1010000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101000...........101.....1010111 +#define VFNMADD_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1010010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101001...........101.....1010111 +#define VFMSUB_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1010100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101010...........101.....1010111 +#define VFNMSUB_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1010110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101011...........101.....1010111 +#define VFMACC_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1011000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101100...........101.....1010111 +#define VFNMACC_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1011010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101101...........101.....1010111 +#define VFMSAC_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1011100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101110...........101.....1010111 +#define VFNMSAC_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 101111...........101.....1010111 +#define VFWADD_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1100000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 110000...........101.....1010111 +#define VFWSUB_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1100100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 110010...........101.....1010111 +#define VFWADD_WF(vd, vs2, rs1, vm) EMIT(R_type(0b1101000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 110100...........101.....1010111 +#define VFWSUB_WF(vd, vs2, rs1, vm) EMIT(R_type(0b1101100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 110110...........101.....1010111 +#define VFWMUL_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1110000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111000...........101.....1010111 +#define VFWMACC_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1111000 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111100...........101.....1010111 +#define VFWNMACC_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1111010 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111101...........101.....1010111 +#define VFWMSAC_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1111100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111110...........101.....1010111 +#define VFWNMSAC_VF(vd, vs2, rs1, vm) EMIT(R_type(0b1111110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 111111...........101.....1010111 // OPFVV -#define VFADD_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0000000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000000...........001.....1010111 -#define VFREDUSUM_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0000010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000001...........001.....1010111 -#define VFSUB_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0000100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000010...........001.....1010111 -#define VFREDOSUM_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0000110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000011...........001.....1010111 -#define VFMIN_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0001000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000100...........001.....1010111 -#define VFREDMIN_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0001010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000101...........001.....1010111 -#define VFMAX_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0001100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000110...........001.....1010111 -#define VFREDMAX_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0001110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000111...........001.....1010111 -#define VFSGNJ_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001000...........001.....1010111 -#define VFSGNJN_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001001...........001.....1010111 -#define VFSGNJX_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001010...........001.....1010111 +#define VFADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0000000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000000...........001.....1010111 +#define VFREDUSUM_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0000010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000001...........001.....1010111 +#define VFSUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0000100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000010...........001.....1010111 +#define VFREDOSUM_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0000110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000011...........001.....1010111 +#define VFMIN_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0001000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000100...........001.....1010111 +#define VFREDMIN_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000101...........001.....1010111 +#define VFMAX_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0001100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000110...........001.....1010111 +#define VFREDMAX_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 000111...........001.....1010111 +#define VFSGNJ_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001000...........001.....1010111 +#define VFSGNJN_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001001...........001.....1010111 +#define VFSGNJX_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001010...........001.....1010111 #define VFMV_F_S(rd, vs2) EMIT(R_type(0b0100001, vs2, 0b00000, 0b001, rd, 0b1010111)) // 0100001.....00000001.....1010111 -#define VMFEQ_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0110000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011000...........001.....1010111 -#define VMFLE_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0110010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011001...........001.....1010111 -#define VMFLT_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0110110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011011...........001.....1010111 -#define VMFNE_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0111000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011100...........001.....1010111 -#define VFDIV_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 100000...........001.....1010111 -#define VFMUL_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1001000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 100100...........001.....1010111 -#define VFMADD_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101000...........001.....1010111 -#define VFNMADD_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101001...........001.....1010111 -#define VFMSUB_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101010...........001.....1010111 -#define VFNMSUB_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101011...........001.....1010111 -#define VFMACC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1011000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101100...........001.....1010111 -#define VFNMACC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1011010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101101...........001.....1010111 -#define VFMSAC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1011100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101110...........001.....1010111 -#define VFNMSAC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101111...........001.....1010111 +#define VMFEQ_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011000...........001.....1010111 +#define VMFLE_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011001...........001.....1010111 +#define VMFLT_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011011...........001.....1010111 +#define VMFNE_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0111000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011100...........001.....1010111 +#define VFDIV_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 100000...........001.....1010111 +#define VFMUL_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1001000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 100100...........001.....1010111 +#define VFMADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101000...........001.....1010111 +#define VFNMADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101001...........001.....1010111 +#define VFMSUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101010...........001.....1010111 +#define VFNMSUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101011...........001.....1010111 +#define VFMACC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101100...........001.....1010111 +#define VFNMACC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101101...........001.....1010111 +#define VFMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101110...........001.....1010111 +#define VFNMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101111...........001.....1010111 #define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111 #define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111 @@ -1448,147 +1446,147 @@ f28–31 ft8–11 FP temporaries Caller #define VFREC7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00101, 0b001, vd, 0b1010111)) // 010011......00101001.....1010111 #define VFCLASS_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111 -#define VFWADD_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110000...........001.....1010111 -#define VFWREDUSUM_VS(vd, vs1, vs2, vm) EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110001...........001.....1010111 -#define VFWSUB_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1100100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110010...........001.....1010111 -#define VFWREDOSUM_VS(vd, vs1, vs2, vm) EMIT(R_type(0b1100110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110011...........001.....1010111 -#define VFWADD_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1101000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110100...........001.....1010111 -#define VFWSUB_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1101100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110110...........001.....1010111 -#define VFWMUL_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1110000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111000...........001.....1010111 -#define VFWMACC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1111000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111100...........001.....1010111 -#define VFWNMACC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1111010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111101...........001.....1010111 -#define VFWMSAC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1111100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111110...........001.....1010111 -#define VFWNMSAC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1111110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111111...........001.....1010111 +#define VFWADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110000...........001.....1010111 +#define VFWREDUSUM_VS(vd, vs2, vs1, vm) EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110001...........001.....1010111 +#define VFWSUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1100100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110010...........001.....1010111 +#define VFWREDOSUM_VS(vd, vs2, vs1, vm) EMIT(R_type(0b1100110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110011...........001.....1010111 +#define VFWADD_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1101000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110100...........001.....1010111 +#define VFWSUB_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1101100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 110110...........001.....1010111 +#define VFWMUL_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1110000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111000...........001.....1010111 +#define VFWMACC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1111000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111100...........001.....1010111 +#define VFWNMACC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1111010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111101...........001.....1010111 +#define VFWMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1111100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111110...........001.....1010111 +#define VFWNMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1111110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 111111...........001.....1010111 // OPIVX -#define VADD_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0000000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000000...........100.....1010111 -#define VSUB_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0000100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000010...........100.....1010111 -#define VRSUB_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0000110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000011...........100.....1010111 -#define VMINU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0001000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000100...........100.....1010111 -#define VMIN_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0001010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000101...........100.....1010111 -#define VMAXU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0001100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000110...........100.....1010111 -#define VMAX_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0001110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000111...........100.....1010111 -#define VAND_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0010010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001001...........100.....1010111 -#define VOR_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0010100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001010...........100.....1010111 -#define VXOR_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0010110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001011...........100.....1010111 -#define VRGATHER_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0011000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001100...........100.....1010111 -#define VSLIDEUP_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001110...........100.....1010111 -#define VSLIDEDOWN_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001111...........100.....1010111 - -#define VADC_VXM(vd, rs1, vs2) EMIT(R_type(0b0100000, vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111 -#define VMADC_VXM(vd, rs1, vs2) EMIT(R_type(0b0100010, vs2, rs1, 0b100, vd, 0b1010111)) // 0100010..........100.....1010111 -#define VMADC_VX(vd, rs1, vs2) EMIT(R_type(0b0100011, vs2, rs1, 0b100, vd, 0b1010111)) // 0100011..........100.....1010111 -#define VSBC_VXM(vd, rs1, vs2) EMIT(R_type(0b0100100, vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111 -#define VMSBC_VXM(vd, rs1, vs2) EMIT(R_type(0b0100110, vs2, rs1, 0b100, vd, 0b1010111)) // 0100110..........100.....1010111 -#define VMSBC_VX(vd, rs1, vs2) EMIT(R_type(0b0100111, vs2, rs1, 0b100, vd, 0b1010111)) // 0100111..........100.....1010111 -#define VMERGE_VXM(vd, rs1, vs2) EMIT(R_type(0b0101110, vs2, rs1, 0b100, vd, 0b1010111)) // 0101110..........100.....1010111 +#define VADD_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0000000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000000...........100.....1010111 +#define VSUB_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0000100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000010...........100.....1010111 +#define VRSUB_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0000110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000011...........100.....1010111 +#define VMINU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0001000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000100...........100.....1010111 +#define VMIN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0001010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000101...........100.....1010111 +#define VMAXU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0001100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000110...........100.....1010111 +#define VMAX_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0001110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 000111...........100.....1010111 +#define VAND_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001001...........100.....1010111 +#define VOR_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001010...........100.....1010111 +#define VXOR_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0010110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001011...........100.....1010111 +#define VRGATHER_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001100...........100.....1010111 +#define VSLIDEUP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001110...........100.....1010111 +#define VSLIDEDOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001111...........100.....1010111 + +#define VADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100000, vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111 +#define VMADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100010, vs2, rs1, 0b100, vd, 0b1010111)) // 0100010..........100.....1010111 +#define VMADC_VX(vd, vs2, rs1) EMIT(R_type(0b0100011, vs2, rs1, 0b100, vd, 0b1010111)) // 0100011..........100.....1010111 +#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100100, vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111 +#define VMSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100110, vs2, rs1, 0b100, vd, 0b1010111)) // 0100110..........100.....1010111 +#define VMSBC_VX(vd, vs2, rs1) EMIT(R_type(0b0100111, vs2, rs1, 0b100, vd, 0b1010111)) // 0100111..........100.....1010111 +#define VMERGE_VXM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b100, vd, 0b1010111)) // 0101110..........100.....1010111 #define VMV_V_X(vd, rs1) EMIT(I_type(0b010111100000, rs1, 0b100, vd, 0b1010111)) // 010111100000.....100.....1010111 -#define VMSEQ_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0110000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011000...........100.....1010111 -#define VMSNE_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0110010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011001...........100.....1010111 -#define VMSLTU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0110100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011010...........100.....1010111 -#define VMSLT_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0110110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011011...........100.....1010111 -#define VMSLEU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0111000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011100...........100.....1010111 -#define VMSLE_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0111010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011101...........100.....1010111 -#define VMSGTU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0111100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011110...........100.....1010111 -#define VMSGT_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0111110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011111...........100.....1010111 -#define VSADDU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100000...........100.....1010111 -#define VSADD_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100001...........100.....1010111 -#define VSSUBU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1000100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100010...........100.....1010111 -#define VSSUB_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1000110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100011...........100.....1010111 -#define VSLL_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1001010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100101...........100.....1010111 -#define VSMUL_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1001110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100111...........100.....1010111 -#define VSRL_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1010000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101000...........100.....1010111 -#define VSRA_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1010010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101001...........100.....1010111 -#define VSSRL_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1010100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101010...........100.....1010111 -#define VSSRA_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1010110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101011...........100.....1010111 -#define VNSRL_WX(vd, rs1, vs2, vm) EMIT(R_type(0b1011000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101100...........100.....1010111 -#define VNSRA_WX(vd, rs1, vs2, vm) EMIT(R_type(0b1011010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101101...........100.....1010111 -#define VNCLIPU_WX(vd, rs1, vs2, vm) EMIT(R_type(0b1011100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101110...........100.....1010111 -#define VNCLIP_WX(vd, rs1, vs2, vm) EMIT(R_type(0b1011110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101111...........100.....1010111 +#define VMSEQ_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0110000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011000...........100.....1010111 +#define VMSNE_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0110010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011001...........100.....1010111 +#define VMSLTU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0110100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011010...........100.....1010111 +#define VMSLT_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0110110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011011...........100.....1010111 +#define VMSLEU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0111000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011100...........100.....1010111 +#define VMSLE_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0111010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011101...........100.....1010111 +#define VMSGTU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0111100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011110...........100.....1010111 +#define VMSGT_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0111110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 011111...........100.....1010111 +#define VSADDU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100000...........100.....1010111 +#define VSADD_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100001...........100.....1010111 +#define VSSUBU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100010...........100.....1010111 +#define VSSUB_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100011...........100.....1010111 +#define VSLL_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1001010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100101...........100.....1010111 +#define VSMUL_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1001110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 100111...........100.....1010111 +#define VSRL_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1010000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101000...........100.....1010111 +#define VSRA_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1010010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101001...........100.....1010111 +#define VSSRL_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1010100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101010...........100.....1010111 +#define VSSRA_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1010110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101011...........100.....1010111 +#define VNSRL_WX(vd, vs2, rs1, vm) EMIT(R_type(0b1011000 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101100...........100.....1010111 +#define VNSRA_WX(vd, vs2, rs1, vm) EMIT(R_type(0b1011010 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101101...........100.....1010111 +#define VNCLIPU_WX(vd, vs2, rs1, vm) EMIT(R_type(0b1011100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101110...........100.....1010111 +#define VNCLIP_WX(vd, vs2, rs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 101111...........100.....1010111 // OPIVV -#define VADD_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0000000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000000...........000.....1010111 -#define VSUB_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0000100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000010...........000.....1010111 -#define VMINU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0001000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000100...........000.....1010111 -#define VMIN_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0001010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000101...........000.....1010111 -#define VMAXU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0001100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000110...........000.....1010111 -#define VMAX_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0001110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000111...........000.....1010111 -#define VAND_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001001...........000.....1010111 -#define VOR_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001010...........000.....1010111 -#define VXOR_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001011...........000.....1010111 -#define VRGATHER_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0011000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001100...........000.....1010111 -#define VRGATHEREI16_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0011100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001110...........000.....1010111 - -#define VADC_VVM(vd, vs1, vs2) EMIT(R_type(0b0100000, vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111 -#define VMADC_VVM(vd, vs1, vs2) EMIT(R_type(0b0100010, vs2, vs1, 0b000, vd, 0b1010111)) // 0100010..........000.....1010111 -#define VMADC_VV(vd, vs1, vs2) EMIT(R_type(0b0100011, vs2, vs1, 0b000, vd, 0b1010111)) // 0100011..........000.....1010111 -#define VSBC_VVM(vd, vs1, vs2) EMIT(R_type(0b0100100, vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111 -#define VMSBC_VVM(vd, vs1, vs2) EMIT(R_type(0b0100110, vs2, vs1, 0b000, vd, 0b1010111)) // 0100110..........000.....1010111 -#define VMSBC_VV(vd, vs1, vs2) EMIT(R_type(0b0100111, vs2, vs1, 0b000, vd, 0b1010111)) // 0100111..........000.....1010111 -#define VMERGE_VVM(vd, vs1, vs2) EMIT(R_type(0b0101110, vs2, vs1, 0b000, vd, 0b1010111)) // 0101110..........000.....1010111 +#define VADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0000000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000000...........000.....1010111 +#define VSUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0000100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000010...........000.....1010111 +#define VMINU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0001000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000100...........000.....1010111 +#define VMIN_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0001010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000101...........000.....1010111 +#define VMAXU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0001100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000110...........000.....1010111 +#define VMAX_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0001110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 000111...........000.....1010111 +#define VAND_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001001...........000.....1010111 +#define VOR_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001010...........000.....1010111 +#define VXOR_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001011...........000.....1010111 +#define VRGATHER_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001100...........000.....1010111 +#define VRGATHEREI16_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001110...........000.....1010111 + +#define VADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100000, vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111 +#define VMADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100010, vs2, vs1, 0b000, vd, 0b1010111)) // 0100010..........000.....1010111 +#define VMADC_VV(vd, vs2, vs1) EMIT(R_type(0b0100011, vs2, vs1, 0b000, vd, 0b1010111)) // 0100011..........000.....1010111 +#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100100, vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111 +#define VMSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100110, vs2, vs1, 0b000, vd, 0b1010111)) // 0100110..........000.....1010111 +#define VMSBC_VV(vd, vs2, vs1) EMIT(R_type(0b0100111, vs2, vs1, 0b000, vd, 0b1010111)) // 0100111..........000.....1010111 +#define VMERGE_VVM(vd, vs2, vs1) EMIT(R_type(0b0101110, vs2, vs1, 0b000, vd, 0b1010111)) // 0101110..........000.....1010111 #define VMV_V_V(vd, vs1) EMIT(I_type(0b010111100000, vs1, 0b000, vd, 0b1010111)) // 010111100000.....000.....1010111 -#define VMSEQ_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0110000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011000...........000.....1010111 -#define VMSNE_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0110010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011001...........000.....1010111 -#define VMSLTU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0110100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011010...........000.....1010111 -#define VMSLT_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0110110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011011...........000.....1010111 -#define VMSLEU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0111000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011100...........000.....1010111 -#define VMSLE_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0111010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011101...........000.....1010111 -#define VSADDU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100000...........000.....1010111 -#define VSADD_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1000010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100001...........000.....1010111 -#define VSSUBU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1000100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100010...........000.....1010111 -#define VSSUB_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1000110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100011...........000.....1010111 -#define VSLL_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1001010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100101...........000.....1010111 -#define VSMUL_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1001110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100111...........000.....1010111 -#define VSRL_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101000...........000.....1010111 -#define VSRA_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101001...........000.....1010111 -#define VSSRL_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101010...........000.....1010111 -#define VSSRA_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101011...........000.....1010111 -#define VNSRL_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1011000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101100...........000.....1010111 -#define VNSRA_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1011010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101101...........000.....1010111 -#define VNCLIPU_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1011100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101110...........000.....1010111 -#define VNCLIP_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101111...........000.....1010111 -#define VWREDSUMU_VS(vd, vs1, vs2, vm) EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 110000...........000.....1010111 -#define VWREDSUM_VS(vd, vs1, vs2, vm) EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 110001...........000.....1010111 +#define VMSEQ_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011000...........000.....1010111 +#define VMSNE_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011001...........000.....1010111 +#define VMSLTU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011010...........000.....1010111 +#define VMSLT_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011011...........000.....1010111 +#define VMSLEU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0111000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011100...........000.....1010111 +#define VMSLE_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0111010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 011101...........000.....1010111 +#define VSADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100000...........000.....1010111 +#define VSADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100001...........000.....1010111 +#define VSSUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100010...........000.....1010111 +#define VSSUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100011...........000.....1010111 +#define VSLL_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1001010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100101...........000.....1010111 +#define VSMUL_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1001110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 100111...........000.....1010111 +#define VSRL_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101000...........000.....1010111 +#define VSRA_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101001...........000.....1010111 +#define VSSRL_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101010...........000.....1010111 +#define VSSRA_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101011...........000.....1010111 +#define VNSRL_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1011000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101100...........000.....1010111 +#define VNSRA_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1011010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101101...........000.....1010111 +#define VNCLIPU_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1011100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101110...........000.....1010111 +#define VNCLIP_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 101111...........000.....1010111 +#define VWREDSUMU_VS(vd, vs2, vs1, vm) EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 110000...........000.....1010111 +#define VWREDSUM_VS(vd, vs2, vs1, vm) EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 110001...........000.....1010111 // OPIVI -#define VADD_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0000000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 000000...........011.....1010111 -#define VRSUB_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0000110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 000011...........011.....1010111 -#define VAND_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0010010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001001...........011.....1010111 -#define VOR_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0010100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001010...........011.....1010111 -#define VXOR_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0010110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001011...........011.....1010111 -#define VRGATHER_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0011000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001100...........011.....1010111 -#define VSLIDEUP_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0011100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001110...........011.....1010111 -#define VSLIDEDOWN_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0011110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001111...........011.....1010111 - -#define VADC_VIM(vd, simm5, vs2) EMIT(R_type(0b0100000, vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111 -#define VMADC_VIM(vd, simm5, vs2) EMIT(R_type(0b0100010, vs2, simm5, 0b011, vd, 0b1010111)) // 0100010..........011.....1010111 -#define VMADC_VI(vd, simm5, vs2) EMIT(R_type(0b0100011, vs2, simm5, 0b011, vd, 0b1010111)) // 0100011..........011.....1010111 -#define VMERGE_VIM(vd, simm5, vs2) EMIT(R_type(0b0101110, vs2, simm5, 0b011, vd, 0b1010111)) // 0101110..........011.....1010111 +#define VADD_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0000000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 000000...........011.....1010111 +#define VRSUB_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0000110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 000011...........011.....1010111 +#define VAND_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0010010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001001...........011.....1010111 +#define VOR_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0010100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001010...........011.....1010111 +#define VXOR_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0010110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001011...........011.....1010111 +#define VRGATHER_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001100...........011.....1010111 +#define VSLIDEUP_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001110...........011.....1010111 +#define VSLIDEDOWN_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001111...........011.....1010111 + +#define VADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100000, vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111 +#define VMADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100010, vs2, simm5, 0b011, vd, 0b1010111)) // 0100010..........011.....1010111 +#define VMADC_VI(vd, vs2, simm5) EMIT(R_type(0b0100011, vs2, simm5, 0b011, vd, 0b1010111)) // 0100011..........011.....1010111 +#define VMERGE_VIM(vd, vs2, simm5) EMIT(R_type(0b0101110, vs2, simm5, 0b011, vd, 0b1010111)) // 0101110..........011.....1010111 #define VMV_V_I(vd, simm5) EMIT(I_type(0b010111100000, simm5, 0b011, vd, 0b1010111)) // 010111100000.....011.....1010111 -#define VMSEQ_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0110000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011000...........011.....1010111 -#define VMSNE_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0110010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011001...........011.....1010111 -#define VMSLEU_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0111000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011100...........011.....1010111 -#define VMSLE_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0111010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011101...........011.....1010111 -#define VMSGTU_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0111100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011110...........011.....1010111 -#define VMSGT_VI(vd, simm5, vs2, vm) EMIT(R_type(0b0111110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011111...........011.....1010111 - -#define VSADDU_VI(vd, simm5, vs2, vm) EMIT(R_type(0b1000000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 100000...........011.....1010111 -#define VSADD_VI(vd, simm5, vs2, vm) EMIT(R_type(0b1000010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 100001...........011.....1010111 -#define VSLL_VI(vd, simm5, vs2, vm) EMIT(R_type(0b1001010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 100101...........011.....1010111 -#define VSRL_VI(vd, simm5, vs2, vm) EMIT(R_type(0b1010000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101000...........011.....1010111 -#define VSRA_VI(vd, simm5, vs2, vm) EMIT(R_type(0b1010010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101001...........011.....1010111 -#define VSSRL_VI(vd, simm5, vs2, vm) EMIT(R_type(0b1010100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101010...........011.....1010111 -#define VSSRA_VI(vd, simm5, vs2, vm) EMIT(R_type(0b1010110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101011...........011.....1010111 -#define VNSRL_WI(vd, simm5, vs2, vm) EMIT(R_type(0b1011000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101100...........011.....1010111 -#define VNSRA_WI(vd, simm5, vs2, vm) EMIT(R_type(0b1011010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101101...........011.....1010111 -#define VNCLIPU_WI(vd, simm5, vs2, vm) EMIT(R_type(0b1011100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101110...........011.....1010111 -#define VNCLIP_WI(vd, simm5, vs2, vm) EMIT(R_type(0b1011110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101111...........011.....1010111 +#define VMSEQ_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0110000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011000...........011.....1010111 +#define VMSNE_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0110010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011001...........011.....1010111 +#define VMSLEU_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0111000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011100...........011.....1010111 +#define VMSLE_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0111010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011101...........011.....1010111 +#define VMSGTU_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0111100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011110...........011.....1010111 +#define VMSGT_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0111110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 011111...........011.....1010111 + +#define VSADDU_VI(vd, vs2, simm5, vm) EMIT(R_type(0b1000000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 100000...........011.....1010111 +#define VSADD_VI(vd, vs2, simm5, vm) EMIT(R_type(0b1000010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 100001...........011.....1010111 +#define VSLL_VI(vd, vs2, simm5, vm) EMIT(R_type(0b1001010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 100101...........011.....1010111 +#define VSRL_VI(vd, vs2, simm5, vm) EMIT(R_type(0b1010000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101000...........011.....1010111 +#define VSRA_VI(vd, vs2, simm5, vm) EMIT(R_type(0b1010010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101001...........011.....1010111 +#define VSSRL_VI(vd, vs2, simm5, vm) EMIT(R_type(0b1010100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101010...........011.....1010111 +#define VSSRA_VI(vd, vs2, simm5, vm) EMIT(R_type(0b1010110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101011...........011.....1010111 +#define VNSRL_WI(vd, vs2, simm5, vm) EMIT(R_type(0b1011000 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101100...........011.....1010111 +#define VNSRA_WI(vd, vs2, simm5, vm) EMIT(R_type(0b1011010 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101101...........011.....1010111 +#define VNCLIPU_WI(vd, vs2, simm5, vm) EMIT(R_type(0b1011100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101110...........011.....1010111 +#define VNCLIP_WI(vd, vs2, simm5, vm) EMIT(R_type(0b1011110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 101111...........011.....1010111 #define VMV1R_V(vd, vs2) EMIT(R_type(0b1001111, vs2, 0b00000, 0b011, vd, 0b1010111)) // 1001111.....00000011.....1010111 #define VMV2R_V(vd, vs2) EMIT(R_type(0b1001111, vs2, 0b00001, 0b011, vd, 0b1010111)) // 1001111.....00001011.....1010111 @@ -1596,18 +1594,18 @@ f28–31 ft8–11 FP temporaries Caller #define VMV8R_V(vd, vs2) EMIT(R_type(0b1001111, vs2, 0b00111, 0b011, vd, 0b1010111)) // 1001111.....00111011.....1010111 // OPMVV -#define VREDSUM_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0000000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000000...........010.....1010111 -#define VREDAND_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0000010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000001...........010.....1010111 -#define VREDOR_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0000100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000010...........010.....1010111 -#define VREDXOR_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0000110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000011...........010.....1010111 -#define VREDMINU_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0001000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000100...........010.....1010111 -#define VREDMIN_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0001010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000101...........010.....1010111 -#define VREDMAXU_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0001100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000110...........010.....1010111 -#define VREDMAX_VS(vd, vs1, vs2, vm) EMIT(R_type(0b0001110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000111...........010.....1010111 -#define VAADDU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111 -#define VAADD_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001001...........010.....1010111 -#define VASUBU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111 -#define VASUB_VV(vd, vs1, vs2, vm) EMIT(R_type(0b0010110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001011...........010.....1010111 +#define VREDSUM_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0000000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000000...........010.....1010111 +#define VREDAND_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0000010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000001...........010.....1010111 +#define VREDOR_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0000100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000010...........010.....1010111 +#define VREDXOR_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0000110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000011...........010.....1010111 +#define VREDMINU_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000100...........010.....1010111 +#define VREDMIN_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000101...........010.....1010111 +#define VREDMAXU_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000110...........010.....1010111 +#define VREDMAX_VS(vd, vs2, vs1, vm) EMIT(R_type(0b0001110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 000111...........010.....1010111 +#define VAADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001000...........010.....1010111 +#define VAADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001001...........010.....1010111 +#define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111 +#define VASUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001011...........010.....1010111 #define VMV_X_S(rd, vs2) EMIT(R_type(0b0100001, vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111 @@ -1621,15 +1619,15 @@ f28–31 ft8–11 FP temporaries Caller #define VZEXT_VF2(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00110, 0b010, vd, 0b1010111)) // 010010......00110010.....1010111 #define VSEXT_VF2(vd, vs2, vm) EMIT(R_type(0b0100100 | (vm), vs2, 0b00111, 0b010, vd, 0b1010111)) // 010010......00111010.....1010111 -#define VCOMPRESS_VM(vd, vs1, vs2) EMIT(R_type(0b0101111, vs2, vs1, 0b010, vd, 0b1010111)) // 0101111..........010.....1010111 -#define VMANDN_MM(vd, vs1, vs2) EMIT(R_type(0b0110001, vs2, vs1, 0b010, vd, 0b1010111)) // 0110001..........010.....1010111 -#define VMAND_MM(vd, vs1, vs2) EMIT(R_type(0b0110011, vs2, vs1, 0b010, vd, 0b1010111)) // 0110011..........010.....1010111 -#define VMOR_MM(vd, vs1, vs2) EMIT(R_type(0b0110101, vs2, vs1, 0b010, vd, 0b1010111)) // 0110101..........010.....1010111 -#define VMXOR_MM(vd, vs1, vs2) EMIT(R_type(0b0110111, vs2, vs1, 0b010, vd, 0b1010111)) // 0110111..........010.....1010111 -#define VMORN_MM(vd, vs1, vs2) EMIT(R_type(0b0111001, vs2, vs1, 0b010, vd, 0b1010111)) // 0111001..........010.....1010111 -#define VMNAND_MM(vd, vs1, vs2) EMIT(R_type(0b0111011, vs2, vs1, 0b010, vd, 0b1010111)) // 0111011..........010.....1010111 -#define VMNOR_MM(vd, vs1, vs2) EMIT(R_type(0b0111101, vs2, vs1, 0b010, vd, 0b1010111)) // 0111101..........010.....1010111 -#define VMXNOR_MM(vd, vs1, vs2) EMIT(R_type(0b0111111, vs2, vs1, 0b010, vd, 0b1010111)) // 0111111..........010.....1010111 +#define VCOMPRESS_VM(vd, vs2, vs1) EMIT(R_type(0b0101111, vs2, vs1, 0b010, vd, 0b1010111)) // 0101111..........010.....1010111 +#define VMANDN_MM(vd, vs2, vs1) EMIT(R_type(0b0110001, vs2, vs1, 0b010, vd, 0b1010111)) // 0110001..........010.....1010111 +#define VMAND_MM(vd, vs2, vs1) EMIT(R_type(0b0110011, vs2, vs1, 0b010, vd, 0b1010111)) // 0110011..........010.....1010111 +#define VMOR_MM(vd, vs2, vs1) EMIT(R_type(0b0110101, vs2, vs1, 0b010, vd, 0b1010111)) // 0110101..........010.....1010111 +#define VMXOR_MM(vd, vs2, vs1) EMIT(R_type(0b0110111, vs2, vs1, 0b010, vd, 0b1010111)) // 0110111..........010.....1010111 +#define VMORN_MM(vd, vs2, vs1) EMIT(R_type(0b0111001, vs2, vs1, 0b010, vd, 0b1010111)) // 0111001..........010.....1010111 +#define VMNAND_MM(vd, vs2, vs1) EMIT(R_type(0b0111011, vs2, vs1, 0b010, vd, 0b1010111)) // 0111011..........010.....1010111 +#define VMNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111101, vs2, vs1, 0b010, vd, 0b1010111)) // 0111101..........010.....1010111 +#define VMXNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111111, vs2, vs1, 0b010, vd, 0b1010111)) // 0111111..........010.....1010111 #define VMSBF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111 #define VMSOF_M(vd, vs2, vm) EMIT(R_type(0b0101000 | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111 @@ -1640,69 +1638,69 @@ f28–31 ft8–11 FP temporaries Caller #define VID_V(vd, vm) EMIT(R_type(0b0101000 | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111 -#define VDIVU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100000...........010.....1010111 -#define VDIV_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1000010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100001...........010.....1010111 -#define VREMU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1000100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100010...........010.....1010111 -#define VREM_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1000110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100011...........010.....1010111 -#define VMULHU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1001000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100100...........010.....1010111 -#define VMUL_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1001010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100101...........010.....1010111 -#define VMULHSU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1001100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100110...........010.....1010111 -#define VMULH_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1001110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100111...........010.....1010111 -#define VMADD_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101001...........010.....1010111 -#define VNMSUB_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1010110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101011...........010.....1010111 -#define VMACC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1011010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101101...........010.....1010111 -#define VNMSAC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101111...........010.....1010111 -#define VWADDU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110000...........010.....1010111 -#define VWADD_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110001...........010.....1010111 -#define VWSUBU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1100100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110010...........010.....1010111 -#define VWSUB_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1100110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110011...........010.....1010111 -#define VWADDU_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1101000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110100...........010.....1010111 -#define VWADD_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1101010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110101...........010.....1010111 -#define VWSUBU_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1101100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110110...........010.....1010111 -#define VWSUB_WV(vd, vs1, vs2, vm) EMIT(R_type(0b1101110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110111...........010.....1010111 -#define VWMULU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1110000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111000...........010.....1010111 -#define VWMULSU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1110100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111010...........010.....1010111 -#define VWMUL_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1110110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111011...........010.....1010111 -#define VWMACCU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1111000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111100...........010.....1010111 -#define VWMACC_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1111010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111101...........010.....1010111 -#define VWMACCSU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1111110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111111...........010.....1010111 +#define VDIVU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100000...........010.....1010111 +#define VDIV_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100001...........010.....1010111 +#define VREMU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100010...........010.....1010111 +#define VREM_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100011...........010.....1010111 +#define VMULHU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1001000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100100...........010.....1010111 +#define VMUL_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1001010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100101...........010.....1010111 +#define VMULHSU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1001100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100110...........010.....1010111 +#define VMULH_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1001110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100111...........010.....1010111 +#define VMADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101001...........010.....1010111 +#define VNMSUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1010110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101011...........010.....1010111 +#define VMACC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101101...........010.....1010111 +#define VNMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101111...........010.....1010111 +#define VWADDU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110000...........010.....1010111 +#define VWADD_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110001...........010.....1010111 +#define VWSUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1100100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110010...........010.....1010111 +#define VWSUB_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1100110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110011...........010.....1010111 +#define VWADDU_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1101000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110100...........010.....1010111 +#define VWADD_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1101010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110101...........010.....1010111 +#define VWSUBU_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1101100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110110...........010.....1010111 +#define VWSUB_WV(vd, vs2, vs1, vm) EMIT(R_type(0b1101110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110111...........010.....1010111 +#define VWMULU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1110000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111000...........010.....1010111 +#define VWMULSU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1110100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111010...........010.....1010111 +#define VWMUL_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1110110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111011...........010.....1010111 +#define VWMACCU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1111000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111100...........010.....1010111 +#define VWMACC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1111010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111101...........010.....1010111 +#define VWMACCSU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1111110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111111...........010.....1010111 // OPMVX -#define VAADDU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0010000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001000...........110.....1010111 -#define VAADD_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0010010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001001...........110.....1010111 -#define VASUBU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0010100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001010...........110.....1010111 -#define VASUB_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0010110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001011...........110.....1010111 -#define VSLIDE1UP_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001110...........110.....1010111 -#define VSLIDE1DOWN_VX(vd, rs1, vs2, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001111...........110.....1010111 +#define VAADDU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0010000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001000...........110.....1010111 +#define VAADD_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001001...........110.....1010111 +#define VASUBU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001010...........110.....1010111 +#define VASUB_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0010110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001011...........110.....1010111 +#define VSLIDE1UP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001110...........110.....1010111 +#define VSLIDE1DOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001111...........110.....1010111 #define VMV_S_X(vd, rs1) EMIT(I_type(0b010000100000, rs1, 0b110, vd, 0b1010111)) // 010000100000.....110.....1010111 -#define VDIVU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100000...........110.....1010111 -#define VDIV_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100001...........110.....1010111 -#define VREMU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1000100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100010...........110.....1010111 -#define VREM_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1000110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100011...........110.....1010111 -#define VMULHU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1001000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100100...........110.....1010111 -#define VMUL_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1001010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100101...........110.....1010111 -#define VMULHSU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1001100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100110...........110.....1010111 -#define VMULH_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1001110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100111...........110.....1010111 -#define VMADD_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1010010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 101001...........110.....1010111 -#define VNMSUB_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1010110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 101011...........110.....1010111 -#define VMACC_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1011010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 101101...........110.....1010111 -#define VNMSAC_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1011110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 101111...........110.....1010111 -#define VWADDU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1100000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110000...........110.....1010111 -#define VWADD_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1100010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110001...........110.....1010111 -#define VWSUBU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1100100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110010...........110.....1010111 -#define VWSUB_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1100110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110011...........110.....1010111 -#define VWADDU_WX(vd, rs1, vs2, vm) EMIT(R_type(0b1101000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110100...........110.....1010111 -#define VWADD_WX(vd, rs1, vs2, vm) EMIT(R_type(0b1101010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110101...........110.....1010111 -#define VWSUBU_WX(vd, rs1, vs2, vm) EMIT(R_type(0b1101100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110110...........110.....1010111 -#define VWSUB_WX(vd, rs1, vs2, vm) EMIT(R_type(0b1101110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110111...........110.....1010111 -#define VWMULU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1110000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111000...........110.....1010111 -#define VWMULSU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1110100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111010...........110.....1010111 -#define VWMUL_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1110110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111011...........110.....1010111 -#define VWMACCU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1111000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111100...........110.....1010111 -#define VWMACC_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1111010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111101...........110.....1010111 -#define VWMACCUS_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1111100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111110...........110.....1010111 -#define VWMACCSU_VX(vd, rs1, vs2, vm) EMIT(R_type(0b1111110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111111...........110.....1010111 +#define VDIVU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100000...........110.....1010111 +#define VDIV_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100001...........110.....1010111 +#define VREMU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100010...........110.....1010111 +#define VREM_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100011...........110.....1010111 +#define VMULHU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1001000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100100...........110.....1010111 +#define VMUL_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1001010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100101...........110.....1010111 +#define VMULHSU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1001100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100110...........110.....1010111 +#define VMULH_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1001110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100111...........110.....1010111 +#define VMADD_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1010010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 101001...........110.....1010111 +#define VNMSUB_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1010110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 101011...........110.....1010111 +#define VMACC_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1011010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 101101...........110.....1010111 +#define VNMSAC_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 101111...........110.....1010111 +#define VWADDU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1100000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110000...........110.....1010111 +#define VWADD_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1100010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110001...........110.....1010111 +#define VWSUBU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1100100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110010...........110.....1010111 +#define VWSUB_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1100110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110011...........110.....1010111 +#define VWADDU_WX(vd, vs2, rs1, vm) EMIT(R_type(0b1101000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110100...........110.....1010111 +#define VWADD_WX(vd, vs2, rs1, vm) EMIT(R_type(0b1101010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110101...........110.....1010111 +#define VWSUBU_WX(vd, vs2, rs1, vm) EMIT(R_type(0b1101100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110110...........110.....1010111 +#define VWSUB_WX(vd, vs2, rs1, vm) EMIT(R_type(0b1101110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 110111...........110.....1010111 +#define VWMULU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1110000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111000...........110.....1010111 +#define VWMULSU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1110100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111010...........110.....1010111 +#define VWMUL_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1110110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111011...........110.....1010111 +#define VWMACCU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1111000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111100...........110.....1010111 +#define VWMACC_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1111010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111101...........110.....1010111 +#define VWMACCUS_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1111100 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111110...........110.....1010111 +#define VWMACCSU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1111110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 111111...........110.....1010111 #endif //__RV64_EMITTER_H__ |