about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2024-11-13 17:08:16 +0800
committerGitHub <noreply@github.com>2024-11-13 10:08:16 +0100
commiteb9e7b9fee7fcf408868c2161e2d5d539af06775 (patch)
tree4b575fcd634ce2d85a116c995741df330ff83bc0 /src
parentb02942c0b0d8491c2d28128c4c948710f47f94f9 (diff)
downloadbox64-eb9e7b9fee7fcf408868c2161e2d5d539af06775.tar.gz
box64-eb9e7b9fee7fcf408868c2161e2d5d539af06775.zip
[RV64_DYNAREC] Added more MMX opcodes for vector (#2024)
* [RV64_DYNAREC] Added 0F D1-D3 PSRLW/PSRLD/PSRLQ opcode

* [RV64_DYNAREC] Added 0F EC PADDSB opcode

* [RV64_DYNAREC] Added 0F DC-DD PADDUSB/PADDUSW opcode

* [RV64_DYNAREC] Added 0F FC-FE PADDB/PADDW/PADDD opcodes

* [RV64_DYNAREC] Added 0F ED PADDSW opcode

* [RV64_DYNAREC] Added 0F 7F MOVQ opcode

* [RV64_DYNAREC] Fixed some typos

* [RV64_DYNAREC] Optimized RVV MMX PSRLW/D/Q to a mask-less version
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c98
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h4
4 files changed, 103 insertions, 9 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index 2eedcc22..7429c7eb 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -499,6 +499,22 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VMV_S_X(v0, x4);
             }
             break;
+        case 0x7F:
+            INST_NAME("MOVQ Em, Gm");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETG;
+            if (MODREG) {
+                v1 = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, gd);
+                v0 = mmx_get_reg_empty_vector(dyn, ninst, x1, x2, x3, nextop & 7);
+                VMV_V_V(v0, v1);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                v1 = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, gd);
+                VMV_X_S(x4, v1);
+                SD(x4, ed, fixedaddress);
+            }
+            break;
         case 0xC2:
             INST_NAME("CMPPS Gx, Ex, Ib");
             nextop = F8;
@@ -580,13 +596,91 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             VMV_V_V(v0, d0);
             VSLIDEUP_VI(v0, d1, 2, VECTOR_UNMASKED);
             break;
-        case 0xFC:
-            INST_NAME("PADDB Gm, Em");
+        case 0xD1:
+        case 0xD2:
+        case 0xD3:
+            if (opcode == 0xD1) {
+                INST_NAME("PSRLW Gm, Em");
+                u8 = VECTOR_SEW16;
+                i32 = 16;
+            } else if (opcode == 0xD2) {
+                INST_NAME("PSRLD Gm, Em");
+                u8 = VECTOR_SEW32;
+                i32 = 32;
+            } else {
+                INST_NAME("PSRLQ Gm, Em");
+                u8 = VECTOR_SEW64;
+                i32 = 64;
+            }
             nextop = F8;
+            q0 = fpu_get_scratch(dyn);
+            GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, u8, 1);
+            if (MODREG) {
+                v1 = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, (nextop & 7));
+                VMV_X_S(x4, v1);
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &wback, v1, x3, &fixedaddress, rex, NULL, 1, 0);
+                LD(x4, wback, fixedaddress);
+            }
+            SLTIU(x3, x4, i32);
+            SUB(x3, xZR, x3);
+            NOT(x3, x3); // mask
+            VSRL_VX(v0, v0, x4, VECTOR_UNMASKED);
+            VAND_VX(q0, v0, x3, VECTOR_UNMASKED);
+            VXOR_VV(v0, v0, q0, VECTOR_UNMASKED);
+            break;
+        case 0xDC:
+        case 0xDD:
+            if (opcode == 0xDC) {
+                INST_NAME("PADDUSB Gm, Em");
+                u8 = VECTOR_SEW8;
+            } else {
+                INST_NAME("PADDUSW Gm, Em");
+                u8 = VECTOR_SEW16;
+            }
+            nextop = F8;
+            GETGM_vector(v0);
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(v1, 0);
+            SET_ELEMENT_WIDTH(x1, u8, 1);
+            VSADDU_VV(v0, v0, v1, VECTOR_UNMASKED);
+            break;
+        case 0xEC:
+            INST_NAME("PADDSB Gm, Em");
+            nextop = F8;
             GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
             GETEM_vector(v1, 0);
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
+            VSADD_VV(v0, v0, v1, VECTOR_UNMASKED);
+            break;
+        case 0xED:
+            INST_NAME("PADDSW Gm, Em");
+            nextop = F8;
+            GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(v1, 0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+            VSADD_VV(v0, v0, v1, VECTOR_UNMASKED);
+            break;
+        case 0xFC ... 0xFE:
+            nextop = F8;
+            if (opcode == 0xFC) {
+                INST_NAME("PADDB Gm, Em");
+                u8 = VECTOR_SEW8;
+            } else if (opcode == 0xFD) {
+                INST_NAME("PADDW Gm, Em");
+                u8 = VECTOR_SEW16;
+            } else {
+                INST_NAME("PADDD Gm, Em");
+                u8 = VECTOR_SEW32;
+            }
+            GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(v1, 0);
+            SET_ELEMENT_WIDTH(x1, u8, 1);
             VADD_VV(v0, v0, v1, VECTOR_UNMASKED);
             break;
         case 0x00 ... 0x0F:
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index 9450f22d..06cf8400 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -2088,7 +2088,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             break;
         case 0xF1:
-            INST_NAME("PSLLQ Gx,Ex");
+            INST_NAME("PSLLW Gx,Ex");
             nextop = F8;
             GETGX();
             GETEX(x2, 0, 1);
@@ -2107,7 +2107,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             break;
         case 0xF2:
-            INST_NAME("PSLLQ Gx,Ex");
+            INST_NAME("PSLLD Gx,Ex");
             nextop = F8;
             GETGX();
             GETEX(x2, 0, 1);
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index bbfc3c72..7cb244fe 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -2089,15 +2089,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
         case 0xF2:
         case 0xF3:
             if (opcode == 0xF1) {
-                INST_NAME("PSRLW Gx, Ex");
+                INST_NAME("PSLLW Gx, Ex");
                 u8 = VECTOR_SEW16;
                 i32 = 16;
             } else if (opcode == 0xF2) {
-                INST_NAME("PSRLD Gx, Ex");
+                INST_NAME("PSLLD Gx, Ex");
                 u8 = VECTOR_SEW32;
                 i32 = 32;
             } else {
-                INST_NAME("PSRLQ Gx, Ex");
+                INST_NAME("PSLLQ Gx, Ex");
                 u8 = VECTOR_SEW64;
                 i32 = 64;
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 2ae12cf3..4e168ab6 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -549,13 +549,13 @@
         addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, I12, D); \
     }
 
-// Get EM as vector, might use x1, x2 and x3
+// Get EM as vector, might use x1, x2 and x3; requires SEW64
 #define GETEM_vector(a, D)                                                                     \
     if (MODREG) {                                                                              \
         a = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, (nextop & 7));                          \
     } else {                                                                                   \
         SMREAD();                                                                              \
-        addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \
+        addr = geted(dyn, addr, ninst, nextop, &ed, a, x3, &fixedaddress, rex, NULL, 1, D);    \
         a = fpu_get_scratch(dyn);                                                              \
         FLD(a, ed, fixedaddress);                                                              \
         VFMV_S_F(a, a);                                                                        \