about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2024-05-28 21:04:41 +0800
committerGitHub <noreply@github.com>2024-05-28 15:04:41 +0200
commit7c5bf62fc0747bdda15d58798d6a5f59a18f2a41 (patch)
treec636a4c78abb43f42b9d5cf38594b87a3ede16ff /src/dynarec
parentb79f86b8d1c864cc58d79730a628e72c56ea960d (diff)
downloadbox64-7c5bf62fc0747bdda15d58798d6a5f59a18f2a41.tar.gz
box64-7c5bf62fc0747bdda15d58798d6a5f59a18f2a41.zip
[RV64_DYNAREC] Added more MMX opcodes and some optimizations too (#1535)
* [RV64_DYNAREC] Added 0F F7 MASKMOVQ opcode

* [RV64_DYNAREC] Added 0F 38 1C PABSB opcode

* [RV64_DYNAREC] Added 0F 38 1E PABSD opcode

* [RV64_DYNAREC] Added 0F 38 1D PABSW opcode

* [RV64_DYNAREC] Added 0F 63 PACKSSWB opcode

* [RV64_DYNAREC] Added 0F FC PADDB opcode

* [RV64_DYNAREC] Added 0F D4 PADDQ opcode

* [RV64_DYNAREC] Added 0F EC PADDSB opcode and optimized 66 0F EC PADDSB opcode

* [RV64_DYNAREC] Added 0F DC PADDUSB opcode and optimized 66 0F DC PADDUSB opcode
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c156
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c35
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h7
-rw-r--r--src/dynarec/rv64/rv64_emitter.h10
4 files changed, 187 insertions, 21 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 7c5dfe01..8fb37279 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -444,6 +444,42 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         SB(x3, gback, gdoffset + i);
                     }
                     break;
+                case 0x1C:
+                    INST_NAME("PABSB Gm,Em");
+                    nextop = F8;
+                    GETGM();
+                    GETEM(x2, 0);
+                    for (int i = 0; i < 8; ++i) {
+                        LB(x4, wback, fixedaddress + i);
+                        BGE(x4, xZR, 4 + 4);
+                        NEG(x4, x4);
+                        SB(x4, gback, gdoffset + i);
+                    }
+                    break;
+                case 0x1D:
+                    INST_NAME("PABSW Gm,Em");
+                    nextop = F8;
+                    GETGM();
+                    GETEM(x2, 0);
+                    for (int i = 0; i < 4; ++i) {
+                        LH(x4, wback, fixedaddress + i * 2);
+                        BGE(x4, xZR, 4 + 4);
+                        NEG(x4, x4);
+                        SH(x4, gback, gdoffset + i * 2);
+                    }
+                    break;
+                case 0x1E:
+                    INST_NAME("PABSD Gm,Em");
+                    nextop = F8;
+                    GETGM();
+                    GETEM(x2, 0);
+                    for (int i = 0; i < 2; ++i) {
+                        LW(x4, wback, fixedaddress + i * 4);
+                        BGE(x4, xZR, 4 + 4);
+                        NEG(x4, x4);
+                        SW(x4, gback, gdoffset + i * 4);
+                    }
+                    break;
                 case 0xC8 ... 0xCD:
                     u8 = nextop;
                     switch (u8) {
@@ -866,6 +902,44 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             LWU(x3, wback, fixedaddress);
             SW(x3, gback, gdoffset + 4 * 1);
             break;
+        case 0x63:
+            INST_NAME("PACKSSWB Gm,Em");
+            nextop = F8;
+            GETGM();
+            GETEM(x2, 0);
+            MOV64x(x5, 127);
+            MOV64x(x6, -128);
+            for (int i = 0; i < 4; ++i) {
+                LH(x3, gback, gdoffset + i * 2);
+                if (rv64_zbb) {
+                    MIN(x3, x3, x5);
+                    MAX(x3, x3, x6);
+                } else {
+                    BLT(x3, x5, 4 + 4);
+                    MV(x3, x5);
+                    BGE(x3, x6, 4 + 4);
+                    MV(x3, x6);
+                }
+                SB(x3, gback, gdoffset + i);
+            }
+            if (MODREG && gd == ed) {
+                LW(x3, gback, gdoffset + 0);
+                SW(x3, gback, gdoffset + 4);
+            } else
+                for (int i = 0; i < 4; ++i) {
+                    LH(x3, wback, fixedaddress + i * 2);
+                    if (rv64_zbb) {
+                        MIN(x3, x3, x5);
+                        MAX(x3, x3, x6);
+                    } else {
+                        BLT(x3, x5, 4 + 4);
+                        MV(x3, x5);
+                        BGE(x3, x6, 4 + 4);
+                        MV(x3, x6);
+                    }
+                    SB(x3, gback, gdoffset + 4 + i);
+                }
+            break;
         case 0x67:
             INST_NAME("PACKUSWB Gm, Em");
             nextop = F8;
@@ -1909,6 +1983,16 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             gd = xRAX + (opcode & 7) + (rex.b << 3);
             REV8xw(gd, gd, x1, x2, x3, x4);
             break;
+        case 0xD4:
+            INST_NAME("PADDQ Gm,Em");
+            nextop = F8;
+            GETGM();
+            GETEM(x2, 0);
+            LD(x1, wback, fixedaddress);
+            LD(x2, gback, gdoffset);
+            ADD(x1, x1, x2);
+            SD(x1, gback, gdoffset);
+            break;
         case 0xD5:
             INST_NAME("PMULLW Gm, Em");
             nextop = F8;
@@ -1933,6 +2017,25 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             AND(x1, x1, x2);
             SD(x1, gback, gdoffset);
             break;
+        case 0xDC:
+            INST_NAME("PADDUSB Gm,Em");
+            nextop = F8;
+            GETGM();
+            GETEM(x2, 0);
+            ADDI(x5, xZR, 0xFF);
+            for (int i = 0; i < 8; ++i) {
+                LBU(x3, gback, gdoffset + i);
+                LBU(x4, wback, fixedaddress + i);
+                ADD(x3, x3, x4);
+                if (rv64_zbb) {
+                    MINU(x3, x3, x5);
+                } else {
+                    BLT(x3, x5, 8);
+                    ADDI(x3, xZR, 0xFF);
+                }
+                SB(x3, gback, gdoffset + i);
+            }
+            break;
         case 0xE2:
             INST_NAME("PSRAD Gm, Em");
             nextop = F8;
@@ -2008,6 +2111,34 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             OR(x3, x3, x4);
             SD(x3, gback, gdoffset);
             break;
+        case 0xEC:
+            INST_NAME("PADDSB Gm,Em");
+            nextop = F8;
+            GETGM();
+            GETEM(x2, 0);
+            ADDI(x5, xZR, 0x7f);
+            ADDI(x6, xZR, 0xf80);
+            for (int i = 0; i < 8; ++i) {
+                // tmp16s = (int16_t)GX->sb[i] + EX->sb[i];
+                // GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s);
+                LB(x3, gback, gdoffset + i);
+                LB(x4, wback, fixedaddress + i);
+                ADDW(x3, x3, x4);
+                if (rv64_zbb) {
+                    MIN(x3, x3, x5);
+                    MAX(x3, x3, x6);
+                    SB(x3, gback, gdoffset + i);
+                } else {
+                    BLT(x3, x5, 12); // tmp16s>127?
+                    SB(x5, gback, gdoffset + i);
+                    J(20); // continue
+                    BLT(x6, x3, 12); // tmp16s<-128?
+                    SB(x6, gback, gdoffset + i);
+                    J(8); // continue
+                    SB(x3, gback, gdoffset + i);
+                }
+            }
+            break;
         case 0xED:
             INST_NAME("PADDSW Gm,Em");
             nextop = F8;
@@ -2060,6 +2191,18 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SW(x1, gback, gdoffset + i * 4);
             }
             break;
+        case 0xF7:
+            INST_NAME("MASKMOVQ Gm, Em");
+            nextop = F8;
+            GETGM();
+            GETEM(x5, 0);
+            for (int i = 0; i < 8; i++) {
+                LB(x1, wback, fixedaddress + i);
+                BLT(xZR, x1, 4 * 3);
+                LB(x2, gback, gdoffset + i);
+                SB(x2, xRDI, i);
+            }
+            break;
         case 0xF9:
             INST_NAME("PSUBW Gm, Em");
             nextop = F8;
@@ -2067,6 +2210,19 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETEM(x2, 0);
             MMX_LOOP_W(x3, x4, SUBW(x3, x3, x4));
             break;
+        case 0xFC:
+            INST_NAME("PADDB Gm, Em");
+            nextop = F8;
+            GETGM();
+            GETEM(x2, 0);
+            for (int i = 0; i < 8; ++i) {
+                // GM->sb[i] += EM->sb[i];
+                LB(x3, gback, gdoffset + i);
+                LB(x4, wback, fixedaddress + i);
+                ADDW(x3, x3, x4);
+                SB(x3, gback, gdoffset + i);
+            }
+            break;
         case 0xFD:
             INST_NAME("PADDW Gm, Em");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index f66bb2aa..bc1ab239 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -582,7 +582,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     nextop = F8;
                     GETGX();
                     GETEX(x2, 0);
-                    MOV64x(x5, ~(1 << 31));
                     for (int i = 0; i < 4; ++i) {
                         LW(x4, wback, fixedaddress + i * 4);
                         BGE(x4, xZR, 4 + 4);
@@ -2763,8 +2762,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
                 ADD(x3, x3, x4);
-                BLT(x3, x5, 8);
-                ADDI(x3, xZR, 0xFF);
+                if (rv64_zbb) {
+                    MINU(x3, x3, x5);
+                } else {
+                    BLT(x3, x5, 8);
+                    ADDI(x3, xZR, 0xFF);
+                }
                 SB(x3, gback, gdoffset + i);
             }
             break;
@@ -2993,23 +2996,27 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             GETGX();
             GETEX(x2, 0);
+            ADDI(x5, xZR, 0x7f);
+            ADDI(x6, xZR, 0xf80);
             for (int i = 0; i < 16; ++i) {
                 // tmp16s = (int16_t)GX->sb[i] + EX->sb[i];
                 // GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s);
                 LB(x3, gback, gdoffset + i);
                 LB(x4, wback, fixedaddress + i);
                 ADDW(x3, x3, x4);
-                SLLIW(x3, x3, 16);
-                SRAIW(x3, x3, 16);
-                ADDI(x4, xZR, 0x7f);
-                BLT(x3, x4, 12); // tmp16s>127?
-                SB(x4, gback, gdoffset + i);
-                J(24); // continue
-                ADDI(x4, xZR, 0xf80);
-                BLT(x4, x3, 12); // tmp16s<-128?
-                SB(x4, gback, gdoffset + i);
-                J(8); // continue
-                SB(x3, gback, gdoffset + i);
+                if (rv64_zbb) {
+                    MIN(x3, x3, x5);
+                    MAX(x3, x3, x6);
+                    SB(x3, gback, gdoffset + i);
+                } else {
+                    BLT(x3, x5, 12); // tmp16s>127?
+                    SB(x5, gback, gdoffset + i);
+                    J(20); // continue
+                    BLT(x6, x3, 12); // tmp16s<-128?
+                    SB(x6, gback, gdoffset + i);
+                    J(8); // continue
+                    SB(x3, gback, gdoffset + i);
+                }
             }
             break;
         case 0xED:
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 6d7f63b1..6a374499 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -174,12 +174,7 @@
 #define GETSEW(i, D)                                                                           \
     if (MODREG) {                                                                              \
         wback = xRAX + (nextop & 7) + (rex.b << 3);                                            \
-        if (rv64_zbb)                                                                          \
-            SEXTH(i, wback);                                                                   \
-        else {                                                                                 \
-            SLLI(i, wback, 48);                                                                \
-            SRAI(i, i, 48);                                                                    \
-        }                                                                                      \
+        SEXTH(i, wback);                                                                       \
         ed = i;                                                                                \
         wb1 = 0;                                                                               \
     } else {                                                                                   \
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 8bdca605..fecfef4d 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -821,7 +821,15 @@ f28–31  ft8–11  FP temporaries                  Caller
 // Sign-extend byte
 #define SEXTB(rd, rs) EMIT(R_type(0b0110000, 0b00100, rs, 0b001, rd, 0b0010011))
 // Sign-extend half-word
-#define SEXTH(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011))
+#define SEXTH_(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011))
+// Sign-extend half-word
+#define SEXTH(rd, rs)     \
+    if (rv64_zbb)         \
+        SEXTH_(rd, rs);   \
+    else {                \
+        SLLI(rd, rs, 48); \
+        SRAI(rd, rd, 48); \
+    }
 // Zero-extend half-word
 #define ZEXTH_(rd, rs) EMIT(R_type(0b0000100, 0b00000, rs, 0b100, rd, 0b0111011))
 // Zero-extend half-word