about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-09-13 18:16:09 +0800
committerGitHub <noreply@github.com>2024-09-13 12:16:09 +0200
commit0acd849f18bf12b4d170b0ab0d80d2f76d02af93 (patch)
tree1f528aba3901ea335ce7b8419f91138785ca4a0d /src
parented4ff15682222f79f2bfbd2263345a2974bfa6c2 (diff)
downloadbox64-0acd849f18bf12b4d170b0ab0d80d2f76d02af93.tar.gz
box64-0acd849f18bf12b4d170b0ab0d80d2f76d02af93.zip
[RV64_DYNAREC] Added 1 more opcode for vector and some refactor and fixes too (#1824)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c1
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c25
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c12
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h24
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass2.h1
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass3.h1
-rw-r--r--src/dynarec/rv64/rv64_emitter.h57
7 files changed, 74 insertions, 47 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index b3e3a7d6..da14a381 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -172,6 +172,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
         case 0x00 ... 0x0F:
         case 0x18:
         case 0x1F:
+        case 0x2C ... 0x2F:
         case 0x31:
         case 0x40 ... 0x4F:
         case 0x80 ... 0xBF:
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index 44e4b3c5..3de36a14 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -65,6 +65,9 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 VLE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
             }
             break;
+        case 0x2E:
+        case 0x2F:
+            return 0;
         case 0x38: // SSSE3 opcodes
             nextop = F8;
             switch (nextop) {
@@ -85,7 +88,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 case 0x01 ... 0x07:
                     // pairwise opcodes are complicated, fallback to scalar.
                     return 0;
-                case 0x08 ... 0x0a:
+                case 0x08 ... 0x0A:
                     if (nextop == 0x08) {
                         INST_NAME("PSIGNB Gx, Ex");
                         SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
@@ -120,6 +123,22 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                     VADC_VIM(v0, 0x1f, v0); // implies VECTOR_MASKREG
                     VAND_VV(q0, v1, v0, VECTOR_UNMASKED);
                     break;
+                case 0x0B:
+                    INST_NAME("PMULHRSW Gx, Ex");
+                    nextop = F8;
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+                    GETGX_vector(q0, 1, VECTOR_SEW16);
+                    GETEX_vector(q1, 0, 0, VECTOR_SEW16);
+                    fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
+                    v0 = fpu_get_scratch(dyn);
+                    fpu_get_scratch(dyn);
+                    VWMUL_VV(v0, q0, q1, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
+                    VSRL_VI(v0, 14, v0, VECTOR_UNMASKED);
+                    VADD_VI(v0, 1, v0, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1);
+                    VNSRL_WI(q0, 1, v0, VECTOR_UNMASKED);
+                    break;
                 default:
                     DEFAULT_VECTOR;
             }
@@ -150,8 +169,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             d0 = fpu_get_scratch(dyn);
             d1 = fpu_get_scratch(dyn);
             if (rv64_vlen >= 256) {
-                vector_vsetvl_emul1(dyn, ninst, x1, VECTOR_SEW16, 2); // double the vl for slideup.
-                VSLIDEUP_VI(q0, 8, q1, VECTOR_UNMASKED);              // splice q0 and q1 here!
+                vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 2); // double the vl for slideup.
+                VSLIDEUP_VI(q0, 8, q1, VECTOR_UNMASKED);                       // splice q0 and q1 here!
                 VMAX_VX(d0, xZR, q0, VECTOR_UNMASKED);
             } else {
                 VMAX_VX(d0, xZR, q0, VECTOR_UNMASKED);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 489646d5..4be3f8ae 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -2434,7 +2434,7 @@ static void sewTransform(dynarec_rv64_t* dyn, int ninst, int s1)
     if (jmp < 0) return;
     if (dyn->insts[jmp].vector_sew_entry == VECTOR_SEWNA) return;
     MESSAGE(LOG_DUMP, "\tSEW changed to %d ---- ninst=%d -> %d\n", dyn->insts[jmp].vector_sew_entry, ninst, jmp);
-    vector_vsetvl_emul1(dyn, ninst, s1, dyn->insts[jmp].vector_sew_entry, 1);
+    vector_vsetvli(dyn, ninst, s1, dyn->insts[jmp].vector_sew_entry, VECTOR_LMUL1, 1);
 }
 
 void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3)
@@ -2528,7 +2528,7 @@ void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n)
     #if STEP > 1
     // for STEP 2 & 3, just need to refresh with current, and undo the changes (push & swap)
     dyn->e = dyn->insts[ninst].e;
-    dyn->vector_sew = dyn->insts[ninst].vector_sew_exit;
+    dyn->vector_sew = dyn->insts[ninst].vector_sew_entry;
     #else
     dyn->e = dyn->insts[reset_n].e;
     dyn->vector_sew = dyn->insts[reset_n].vector_sew_exit;
@@ -2591,7 +2591,7 @@ void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst)
 }
 
 // Simple wrapper for vsetvli
-int vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int multiple)
+int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, int multiple)
 {
     if (sew == VECTOR_SEWNA) return VECTOR_SEW8;
     if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8;
@@ -2600,9 +2600,9 @@ int vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int mul
      * sew:  selected element width
      * lmul: vector register group multiplier
      *
-     *                    mu            tu          sew      lmul=1 */
-    uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | 0b000;
-    ADDI(s1, xZR, (16 >> sew) * multiple);
+     *                    mu            tu          sew      lmul */
+    uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | vlmul;
+    ADDI(s1, xZR, (16 >> sew) * multiple); // TODO: it's possible to reuse s1 sometimes
     VSETVLI(xZR, s1, vtypei);
     return sew;
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 2acd1810..b12c1e00 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1081,16 +1081,16 @@
 #define MODREG ((nextop & 0xC0) == 0xC0)
 
 #ifndef SET_ELEMENT_WIDTH
-#define SET_ELEMENT_WIDTH(s1, sew, set)                                    \
-    do {                                                                   \
-        if (sew == VECTOR_SEWANY && dyn->vector_sew != VECTOR_SEWNA) {     \
-            dyn->vector_eew = dyn->vector_sew;                             \
-        } else if (sew == dyn->vector_sew) {                               \
-            dyn->vector_eew = dyn->vector_sew;                             \
-        } else {                                                           \
-            dyn->vector_eew = vector_vsetvl_emul1(dyn, ninst, s1, sew, 1); \
-        }                                                                  \
-        if (set) dyn->vector_sew = dyn->vector_eew;                        \
+#define SET_ELEMENT_WIDTH(s1, sew, set)                                             \
+    do {                                                                            \
+        if (sew == VECTOR_SEWANY && dyn->vector_sew != VECTOR_SEWNA) {              \
+            dyn->vector_eew = dyn->vector_sew;                                      \
+        } else if (sew == dyn->vector_sew) {                                        \
+            dyn->vector_eew = dyn->vector_sew;                                      \
+        } else {                                                                    \
+            dyn->vector_eew = vector_vsetvli(dyn, ninst, s1, sew, VECTOR_LMUL1, 1); \
+        }                                                                           \
+        if (set) dyn->vector_sew = dyn->vector_eew;                                 \
     } while (0)
 #endif
 
@@ -1286,7 +1286,7 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr);
 #define rv64_move64    STEPNAME(rv64_move64)
 #define rv64_move32    STEPNAME(rv64_move32)
 
-#define vector_vsetvl_emul1 STEPNAME(vector_vsetvl_emul1)
+#define vector_vsetvli STEPNAME(vector_vsetvli)
 
 /* setup r2 to address pointed by */
 uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta);
@@ -1442,7 +1442,7 @@ void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2
 void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val);
 void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup);
 
-int vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int multiple);
+int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, int multiple);
 
 #if STEP < 2
 #define CHECK_CACHE() 0
diff --git a/src/dynarec/rv64/dynarec_rv64_pass2.h b/src/dynarec/rv64/dynarec_rv64_pass2.h
index 909522e3..4f41f628 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass2.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass2.h
@@ -8,6 +8,7 @@
 #define MESSAGE(A, ...) do {} while (0)
 #define EMIT(A)     do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0)
 #define NEW_INST                                                                                                                                                               \
+    dyn->vector_sew = dyn->insts[ninst].vector_sew_entry;                                                                                                                      \
     if (ninst) {                                                                                                                                                               \
         dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size);                                                                              \
         dyn->insts_size += 1 + ((dyn->insts[ninst - 1].x64.size > (dyn->insts[ninst - 1].size / 4)) ? dyn->insts[ninst - 1].x64.size : (dyn->insts[ninst - 1].size / 4)) / 15; \
diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h
index 1254dc4a..556586f2 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass3.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass3.h
@@ -13,6 +13,7 @@
 
 #define MESSAGE(A, ...)  if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
 #define NEW_INST                                                                                                  \
+    dyn->vector_sew = dyn->insts[ninst].vector_sew_entry;                                                         \
     if (box64_dynarec_dump) print_newinst(dyn, ninst);                                                            \
     if (ninst) {                                                                                                  \
         addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst - 1].x64.size, dyn->insts[ninst - 1].size / 4); \
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index c17bb165..af302ce0 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -1226,6 +1226,11 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define VECTOR_SEWNA  0b111  // N/A
 #define VECTOR_SEWANY 0b1000 // any sew would be ok, but not N/A.
 
+#define VECTOR_LMUL1 0b000
+#define VECTOR_LMUL2 0b001
+#define VECTOR_LMUL4 0b010
+#define VECTOR_LMUL8 0b011
+
 #define VECTOR_MASKED   0
 #define VECTOR_UNMASKED 1
 
@@ -1630,32 +1635,32 @@ f28–31  ft8–11  FP temporaries                  Caller
 
 #define VID_V(vd, vm) EMIT(R_type(0b0101000 | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111
 
-#define VDIVU_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1000000 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 100000...........010.....1010111
-#define VDIV_VV(vd, vs1, vs2, vm)     EMIT(R_type(0b1000010 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 100001...........010.....1010111
-#define VREMU_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1000100 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 100010...........010.....1010111
-#define VREM_VV(vd, vs1, vs2, vm)     EMIT(R_type(0b1000110 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 100011...........010.....1010111
-#define VMULHU_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1001000 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 100100...........010.....1010111
-#define VMUL_VV(vd, vs1, vs2, vm)     EMIT(R_type(0b1001010 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 100101...........010.....1010111
-#define VMULHSU_VV(vd, vs1, vs2, vm)  EMIT(R_type(0b1001100 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 100110...........010.....1010111
-#define VMULH_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1001110 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 100111...........010.....1010111
-#define VMADD_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1010010 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 101001...........010.....1010111
-#define VNMSUB_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1010110 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 101011...........010.....1010111
-#define VMACC_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1011010 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 101101...........010.....1010111
-#define VNMSAC_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1011110 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 101111...........010.....1010111
-#define VWADDU_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1100000 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 110000...........010.....1010111
-#define VWADD_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1100010 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 110001...........010.....1010111
-#define VWSUBU_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1100100 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 110010...........010.....1010111
-#define VWSUB_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1100110 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 110011...........010.....1010111
-#define VWADDU_WV(vd, vs1, vs2, vm)   EMIT(R_type(0b1101000 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 110100...........010.....1010111
-#define VWADD_WV(vd, vs1, vs2, vm)    EMIT(R_type(0b1101010 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 110101...........010.....1010111
-#define VWSUBU_WV(vd, vs1, vs2, vm)   EMIT(R_type(0b1101100 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 110110...........010.....1010111
-#define VWSUB_WV(vd, vs1, vs2, vm)    EMIT(R_type(0b1101110 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 110111...........010.....1010111
-#define VWMULU_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1110000 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 111000...........010.....1010111
-#define VWMULSU_VV(vd, vs1, vs2, vm)  EMIT(R_type(0b1110100 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 111010...........010.....1010111
-#define VWMUL_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1110110 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 111011...........010.....1010111
-#define VWMACCU_VV(vd, vs1, vs2, vm)  EMIT(R_type(0b1111000 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 111100...........010.....1010111
-#define VWMACC_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1111010 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 111101...........010.....1010111
-#define VWMACCSU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1111110 | (vm), vs2, vs1, 010, vd, 0b1010111)) // 111111...........010.....1010111
+#define VDIVU_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100000...........010.....1010111
+#define VDIV_VV(vd, vs1, vs2, vm)     EMIT(R_type(0b1000010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100001...........010.....1010111
+#define VREMU_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1000100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100010...........010.....1010111
+#define VREM_VV(vd, vs1, vs2, vm)     EMIT(R_type(0b1000110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100011...........010.....1010111
+#define VMULHU_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1001000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100100...........010.....1010111
+#define VMUL_VV(vd, vs1, vs2, vm)     EMIT(R_type(0b1001010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100101...........010.....1010111
+#define VMULHSU_VV(vd, vs1, vs2, vm)  EMIT(R_type(0b1001100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100110...........010.....1010111
+#define VMULH_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1001110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100111...........010.....1010111
+#define VMADD_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1010010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101001...........010.....1010111
+#define VNMSUB_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1010110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101011...........010.....1010111
+#define VMACC_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1011010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101101...........010.....1010111
+#define VNMSAC_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 101111...........010.....1010111
+#define VWADDU_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1100000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110000...........010.....1010111
+#define VWADD_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1100010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110001...........010.....1010111
+#define VWSUBU_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1100100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110010...........010.....1010111
+#define VWSUB_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1100110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110011...........010.....1010111
+#define VWADDU_WV(vd, vs1, vs2, vm)   EMIT(R_type(0b1101000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110100...........010.....1010111
+#define VWADD_WV(vd, vs1, vs2, vm)    EMIT(R_type(0b1101010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110101...........010.....1010111
+#define VWSUBU_WV(vd, vs1, vs2, vm)   EMIT(R_type(0b1101100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110110...........010.....1010111
+#define VWSUB_WV(vd, vs1, vs2, vm)    EMIT(R_type(0b1101110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 110111...........010.....1010111
+#define VWMULU_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1110000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111000...........010.....1010111
+#define VWMULSU_VV(vd, vs1, vs2, vm)  EMIT(R_type(0b1110100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111010...........010.....1010111
+#define VWMUL_VV(vd, vs1, vs2, vm)    EMIT(R_type(0b1110110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111011...........010.....1010111
+#define VWMACCU_VV(vd, vs1, vs2, vm)  EMIT(R_type(0b1111000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111100...........010.....1010111
+#define VWMACC_VV(vd, vs1, vs2, vm)   EMIT(R_type(0b1111010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111101...........010.....1010111
+#define VWMACCSU_VV(vd, vs1, vs2, vm) EMIT(R_type(0b1111110 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 111111...........010.....1010111
 
 //  OPMVX
 #define VAADDU_VX(vd, rs1, vs2, vm)      EMIT(R_type(0b0010000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001000...........110.....1010111