about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2023-04-19 16:11:22 +0800
committerGitHub <noreply@github.com>2023-04-19 10:11:22 +0200
commit6f29d2a5d8208d2f0a8c1e1f992de19ca36b74cc (patch)
tree3b80823cf3b800353193491950d4f44e905985fb
parent008ef41261b9723c05d223b5732d4574879118b2 (diff)
downloadbox64-6f29d2a5d8208d2f0a8c1e1f992de19ca36b74cc.tar.gz
box64-6f29d2a5d8208d2f0a8c1e1f992de19ca36b74cc.zip
[RV64_DYNAREC] Added more opcodes (#712)
* [RV64_DYNAREC] Added 64 33 XOR opcode

* [RV64_DYNAREC] Added 0F C8-CF BSWAP opcode

* [RV64_DYNAREC] Added 66 0F 3A 0B ROUNDSD opcode

* [RV64_DYNAREC] Added F3 0F BC TZCNT opcode

* [RV64_DYNAREC] Added F3 0F E6 CVTDQ2PD opcode

* [RV64_DYNAREC] Added F3 0F 5B CVTTPS2DQ opcode

* [RV64_DYNAREC] Fixed CVTTPS2DQ, CVTDQ2PD and printer
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c62
-rw-r--r--src/dynarec/rv64/dynarec_rv64_64.c12
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c28
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c69
-rw-r--r--src/dynarec/rv64/rv64_emitter.h3
-rw-r--r--src/dynarec/rv64/rv64_printer.c2
6 files changed, 173 insertions, 3 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 86d1eab7..6f356ff4 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1003,6 +1003,68 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SW(x5, gback, 2*4);
             SW(x6, gback, 3*4);
             break;
+
+        case 0xC8:
+        case 0xC9:
+        case 0xCA:
+        case 0xCB:
+        case 0xCC:
+        case 0xCD:
+        case 0xCE:
+        case 0xCF:                  /* BSWAP reg */
+            INST_NAME("BSWAP Reg");
+            gd = xRAX+(opcode&7)+(rex.b<<3);
+            MOV_U12(x1, 0xff);
+            SLLI(x4, x1, 8); // mask 0xff00
+            if (rex.w) {
+                SLLI(x5, x1, 16); // mask 0xff0000
+                SLLI(x6, x1, 24); // mask 0xff000000
+
+                SRLI(x2, gd, 56);
+
+                SRLI(x3, gd, 40);
+                AND(x3, x3, x4);
+                OR(x2, x2, x3);
+
+                SRLI(x3, gd, 24);
+                AND(x3, x3, x5);
+                OR(x2, x2, x3);
+
+                SRLI(x3, gd, 8);
+                AND(x3, x3, x6);
+                OR(x2, x2, x3);
+
+                AND(x3, gd, x6);
+                SLLI(x3, x3, 8);
+                OR(x2, x2, x3);
+
+                AND(x3, gd, x5);
+                SLLI(x3, x3, 24);
+                OR(x2, x2, x3);
+
+                AND(x3, gd, x4);
+                SLLI(x3, x3, 40);
+                OR(x2, x2, x3);
+
+                SLLI(x3, x3, 56);
+                OR(gd, x2, x3);
+            } else {
+                SRLIW(x2, gd, 24);
+
+                SRLIW(x3, gd, 8);
+                AND(x3, x3, x4);
+                OR(x2, x2, x3);
+
+                AND(x3, gd, x4);
+                SLLI(x3, x3, 8);
+                OR(x2, x2, x3);
+
+                AND(x3, gd, x1);
+                SLLI(x3, x3, 24);
+                OR(gd, x2, x3);
+            }
+            break;
+
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c
index b1fcc589..0f5c9087 100644
--- a/src/dynarec/rv64/dynarec_rv64_64.c
+++ b/src/dynarec/rv64/dynarec_rv64_64.c
@@ -73,6 +73,17 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETEDO(x4, 0, x5);
             emit_sub32(dyn, ninst, rex, gd, ed, x3, x4, x5);
             break;
+
+        // case 0x33:
+        //     INST_NAME("XOR Gd, Seg:Ed");
+        //     SETFLAGS(X_ALL, SF_SET_PENDING);
+        //     grab_segdata(dyn, addr, ninst, x4, seg);
+        //     nextop = F8;
+        //     GETGD;
+        //     GETEDO(x4, 0, x5);
+        //     emit_xor32(dyn, ninst, rex, gd, ed, x3, x4);
+        //     break;
+
         case 0x88:
             INST_NAME("MOV Seg:Eb, Gb");
             grab_segdata(dyn, addr, ninst, x4, seg);
@@ -130,6 +141,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMWRITE2();
             }
             break;
+
         case 0x8B:
             INST_NAME("MOV Gd, Seg:Ed");
             grab_segdata(dyn, addr, ninst, x4, seg);
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index 98836ecf..16e84d9a 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -47,6 +47,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
     MAYUSE(eb1);
     MAYUSE(eb2);
     MAYUSE(j64);
+
+    static const int8_t round_round[] = { RD_RNE, RD_RDN, RD_RUP, RD_RTZ };
     
     switch(opcode) {
         case 0x10:
@@ -242,6 +244,32 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     DEFAULT;
             }
             break;
+        case 0x3A:  // these are some more SSSE3+ opcodes
+            opcode = F8;
+            switch(opcode) {
+                case 0x0B:
+                    INST_NAME("ROUNDSD Gx, Ex, Ib");
+                    nextop = F8;
+                    GETGX(x1);
+                    GETEXSD(d0, 0);
+                    u8 = F8;
+                    v1 = fpu_get_scratch(dyn);
+                    if(u8&4) {
+                        u8 = sse_setround(dyn, ninst, x4, x2);
+                        FCVTLD(x5, d0, RD_DYN);
+                        FCVTDL(v1, x5, RD_DYN);
+                        x87_restoreround(dyn, ninst, u8);
+                    } else {
+                        FCVTLD(x5, d0, round_round[u8&3]);
+                        FCVTDL(v1, x5, round_round[u8&3]);
+                    }
+                    FSD(v1, gback, 0);
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
+
         case 0x54:
             INST_NAME("ANDPD Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index d9700507..945a62d6 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -224,7 +224,58 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             SSE_LOOP_MV_Q2(x3);
             if(!MODREG) SMWRITE2();
             break;
-
+        
+        case 0x5B:
+            INST_NAME("CVTTPS2DQ Gx, Ex");
+            nextop = F8;
+            GETEX(x5, 0) ;
+            GETGX(x6);
+            v0 = fpu_get_scratch(dyn);
+            v1 = fpu_get_scratch(dyn);
+            q0 = fpu_get_scratch(dyn);
+            q1 = fpu_get_scratch(dyn);
+            FLW(v0, x5, 0);
+            FLW(v1, x5, 4);
+            FLW(q0, x5, 8);
+            FLW(q1, x5, 12);
+            FCVTWS(x1, v0, RD_RTZ);
+            FCVTWS(x2, v1, RD_RTZ);
+            FCVTWS(x3, q0, RD_RTZ);
+            FCVTWS(x4, q1, RD_RTZ);
+            SW(x1, x6, 0);
+            SW(x2, x6, 4);
+            SW(x3, x6, 8);
+            SW(x4, x6, 12);
+            break;
+        case 0xBC:
+            INST_NAME("TZCNT Gd, Ed");
+            SETFLAGS(X_ZF, SF_SUBSET);
+            SET_DFNONE();
+            nextop = F8;
+            GETED(0);
+            GETGD;
+            if(!rex.w && MODREG) {
+                AND(x4, ed, xMASK);
+                ed = x4;
+            }
+            BNE_MARK(ed, xZR);
+            ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF)));
+            ORI(xFlags, xFlags, 1<<F_CF);
+            MOV32w(gd, rex.w?64:32);
+            B_NEXT_nocond;
+            MARK;
+            NEG(x2, ed);
+            AND(x2, x2, ed);
+            TABLE64(x3, 0x03f79d71b4ca8b09ULL);
+            MUL(x2, x2, x3);
+            SRLI(x2, x2, 64-6);
+            TABLE64(x1, (uintptr_t)&deBruijn64tab);
+            ADD(x1, x1, x2);
+            LBU(gd, x1, 0);
+            ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF)));
+            BNE(gd, xZR, 4+4);
+            ORI(xFlags, xFlags, 1<<F_ZF);
+            break;
         case 0xBD:
             INST_NAME("LZCNT Gd, Ed");
             SETFLAGS(X_ZF|X_CF, SF_SUBSET);
@@ -326,6 +377,22 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             NEG(x2, x2);
             FMVWX(d0, x2);
             break;
+
+        case 0xE6:
+            INST_NAME("CVTDQ2PD Gx, Ex");
+            nextop = F8;
+            GETEX(x1, 0);
+            GETGX(x2);
+            q0 = fpu_get_scratch(dyn);
+            q1 = fpu_get_scratch(dyn);
+            LW(x3, x1, 0);
+            LW(x4, x1, 4);
+            FCVTDW(q0, x3, RD_DYN);
+            FCVTDW(q1, x4, RD_DYN);
+            FSD(q0, x2, 0);
+            FSD(q1, x2, 8);
+            break;
+
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 5ffcc875..29336895 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -398,6 +398,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define RD_RMM      0b100
 // In instruction’s rm field, selects dynamic rounding mode;
 #define RD_RM       0b111
+#define RD_DYN      RD_RM
 
 // load single precision from rs1+imm12 to frd
 #define FLW(frd, rs1, imm12)        EMIT(I_type(imm12, rs1, 0b010, frd, 0b0000111))
@@ -422,7 +423,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 // Convert from signed 32bits to Single
 #define FCVTSW(frd, rs1, rm)        EMIT(R_type(0b1101000, 0b00000, rs1, rm, frd, 0b1010011))
 // Convert from Single to signed 32bits (trucated)
-#define FCVTWS(rd, frs1, tm)        EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011))
+#define FCVTWS(rd, frs1, rm)        EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011))
 
 #define FADDS(frd, frs1, frs2)      EMIT(R_type(0b0000000, frs2, frs1, 0b000, frd, 0b1010011))
 #define FSUBS(frd, frs1, frs2)      EMIT(R_type(0b0000100, frs2, frs1, 0b000, frd, 0b1010011))
diff --git a/src/dynarec/rv64/rv64_printer.c b/src/dynarec/rv64/rv64_printer.c
index ed6167ba..bdc424c1 100644
--- a/src/dynarec/rv64/rv64_printer.c
+++ b/src/dynarec/rv64/rv64_printer.c
@@ -1315,7 +1315,7 @@ const char* rv64_print(uint32_t data, uintptr_t addr)
                     insn.name = "fcvt.d.lu";
                     break;
                 }
-                PRINT_xd_fs1();
+                PRINT_fd_xs1();
             }
             case 0x70: {
                 assert(RS2(data) == 0);