about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2023-04-14 01:07:10 +0800
committerGitHub <noreply@github.com>2023-04-13 19:07:10 +0200
commit9246f4925bbf0835f052e9aa0b4e167857e7ced9 (patch)
tree590dfbe8c55a25e5169b22497cc45356069084c0
parent128fb82c1c40b05ed63409b7d79f486ada6f16ed (diff)
downloadbox64-9246f4925bbf0835f052e9aa0b4e167857e7ced9.tar.gz
box64-9246f4925bbf0835f052e9aa0b4e167857e7ced9.zip
[RV64_DYNAREC] Added more 66 0F opcodes for SV and some fixes (#697)
* [RV64_DYNAREC] Added 66 0F 38 00 PSHUFB opcode

* [RV64_DYNAREC] Added 66 0F 38 17 PTEST opcode

* [RV64_DYNAREC] Added 66 0F 38 3A PMINUW opcode

* [RV64_DYNAREC] Added 66 0F C4 PINSRW opcode

* [RV64_DYNAREC] Fixed F2 0F 70 PSHUFLW opcode

* [RV64_DYNAREC] Added 66 0F DD PADDUSW opcode

* [RV64_DYNAREC] Added 66 0F FC PADDB opcode
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c118
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f20f.c8
2 files changed, 122 insertions, 4 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index 198c1a85..f3e6d2cc 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -162,6 +162,86 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 ORI(xFlags, xFlags, 1<<F_ZF);
             }
             break;
+        case 0x38:  // SSSE3 opcodes
+            nextop = F8;
+            switch(nextop) {
+                case 0x00:
+                    INST_NAME("PSHUFB Gx, Ex");
+                    nextop = F8;
+                    GETGX(x1);
+                    GETEX(x2, 0);
+                    sse_forget_reg(dyn, ninst, x5);
+                    ADDI(x5, xEmu, offsetof(x64emu_t, xmm[x5]));
+
+                    // perserve gd
+                    LD(x3, gback, 0);
+                    LD(x4, gback, 8);
+                    SD(x3, x5, 0);
+                    SD(x4, x5, 8);
+
+                    for (int i=0; i<16; ++i) {
+                        LBU(x3, wback, fixedaddress+i);
+                        ANDI(x4, x3, 128);
+                        BEQZ(x4, 12);
+                        SB(xZR, gback, i);
+                        BEQZ(xZR, 20); // continue
+                        ANDI(x4, x3, 15);
+                        ADD(x4, x4, x5);
+                        LBU(x4, x4, 0);
+                        SB(x4, gback, i);
+                    }
+                    break;
+                case 0x17:
+                    INST_NAME("PTEST Gx, Ex");
+                    nextop = F8;
+                    SETFLAGS(X_ALL, SF_SET);
+                    GETGX(x1);
+                    GETEX(x2, 0);
+                    CLEAR_FLAGS();
+                    SET_DFNONE();
+                    IFX(X_ZF|X_CF) {
+                        LD(x5, wback, fixedaddress+0);
+                        LD(x6, wback, fixedaddress+8);
+
+                        IFX(X_ZF) {
+                            LD(x3, gback, 0);
+                            LD(x4, gback, 8);
+                            AND(x3, x3, x5);
+                            AND(x4, x4, x6);
+                            OR(x3, x3, x4);
+                            BNEZ(x3, 8);
+                            ORI(xFlags, xFlags, 1<<F_ZF);
+                        }
+                        IFX(X_CF) {
+                            LD(x3, gback, 0);
+                            NOT(x3, x3);
+                            LD(x4, gback, 8);
+                            NOT(x4, x4);
+                            AND(x3, x3, x5);
+                            AND(x4, x4, x6);
+                            OR(x3, x3, x4);
+                            BNEZ(x3, 8);
+                            ORI(xFlags, xFlags, 1<<F_ZF);
+                        }
+                    }
+                    break;
+                case 0x3A:
+                    INST_NAME("PMINUW Gx, Ex");  // SSE4 opcode!
+                    nextop = F8;
+                    GETGX(x1);
+                    GETEX(x2, 0);
+                    for(int i=0; i<8; ++i) {
+                        // if(GX->uw[i]>EX->uw[i]) GX->uw[i] = EX->uw[i];
+                        LHU(x3, gback, i*2);
+                        LHU(x4, wback, fixedaddress+i*2);
+                        BLTU(x3, x4, 8);
+                        SH(x4, gback, i*2);
+                    }
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
         case 0x54:
             INST_NAME("ANDPD Gx, Ex");
             nextop = F8;
@@ -776,6 +856,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             AND(x1, x1, x5);
             OR(gd, gd, x1);
             break;
+        case 0xC4:
+            INST_NAME("PINSRW Gx,Ed,Ib");
+            nextop = F8;
+            GETED(1);
+            GETGX(x3);
+            u8 = (F8)&7;
+            SH(ed, gback, u8*2);
+            break;
         case 0xC5:
             INST_NAME("PEXTRW Gd,Ex,Ib");
             nextop = F8;
@@ -830,6 +918,23 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(x2, 0);
             SSE_LOOP_Q(x3, x4, AND(x3, x3, x4));
             break;
+        case 0xDD:
+            INST_NAME("PADDUSW Gx,Ex");
+            nextop = F8;
+            GETGX(x1);
+            GETEX(x2, 0);
+            for(int i=0; i<8; ++i) {
+                // tmp32s = (int32_t)GX->uw[i] + EX->uw[i];
+                // GX->uw[i] = (tmp32s>65535)?65535:tmp32s;
+                LHU(x3, gback, i*2);
+                LHU(x4, wback, fixedaddress+i*2);
+                ADDW(x3, x3, x4);
+                MOV32w(x4, 65536);
+                BLT(x3, x4, 8);
+                ADDIW(x3, x4, -1);
+                SH(x3, gback, i*2);
+            }
+            break;
         case 0xDF:
             INST_NAME("PANDN Gx,Ex");
             nextop = F8;
@@ -892,6 +997,19 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(x2, 0);
             SSE_LOOP_D(x3, x4, SUBW(x3, x3, x4));
             break;
+        case 0xFC:
+            INST_NAME("PADDB Gx,Ex");
+            nextop = F8;
+            GETGX(x1);
+            GETEX(x2, 0);
+            for(int i=0; i<16; ++i) {
+                // GX->sb[i] += EX->sb[i];
+                LB(x3, gback, i);
+                LB(x4, wback, fixedaddress+i);
+                ADDW(x3, x3, x4);
+                SB(x3, gback, i);
+            }
+            break;
         case 0xFD:
             INST_NAME("PADDW Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c
index 711927ef..ee1e14ca 100644
--- a/src/dynarec/rv64/dynarec_rv64_f20f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f20f.c
@@ -222,10 +222,10 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             idx = (u8>>(3*2))&3;
             LHU(x6, wback, fixedaddress+idx*2);
 
-            SW(x3, gback, 0*2);
-            SW(x4, gback, 1*2);
-            SW(x5, gback, 2*2);
-            SW(x6, gback, 3*2);
+            SH(x3, gback, 0*2);
+            SH(x4, gback, 1*2);
+            SH(x5, gback, 2*2);
+            SH(x6, gback, 3*2);
 
             if (!(MODREG && (gd==ed))) {
                 LD(x3, wback, fixedaddress+8);