about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-02-20 19:06:38 +0800
committerGitHub <noreply@github.com>2024-02-20 12:06:38 +0100
commit4f3b9f19a73df1e614e310341792ef8440b3df3b (patch)
treeae69a14970a4fc628946e1a82c7a48a27f2b1241 /src
parente562baf30b07e2241770dd4d69ab47d597c93ad6 (diff)
downloadbox64-4f3b9f19a73df1e614e310341792ef8440b3df3b.tar.gz
box64-4f3b9f19a73df1e614e310341792ef8440b3df3b.zip
[DYNAREC_RV64] Added more opcodes and some minor optimizations (#1272)
* Added DD /1 FISTTP i64 opcode

* Some small optimizations

* Added 0F AD SHRD opcode and some minor optimizations on the CF flag computation
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c14
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c3
-rw-r--r--src/dynarec/rv64/dynarec_rv64_db.c7
-rw-r--r--src/dynarec/rv64/dynarec_rv64_dd.c22
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c111
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f20f.c3
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h5
7 files changed, 126 insertions, 39 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 83bfa752..3cd3e92f 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1269,6 +1269,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4);
             WBACK;
             break;
+        case 0xAD:
+            nextop = F8;
+            INST_NAME("SHRD Ed, Gd, CL");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            if (box64_dynarec_safeflags > 1)
+                MAYSETFLAGS();
+            GETGD;
+            GETED(0);
+            if (!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); }
+            ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f);
+            BEQ_NEXT(x3, xZR);
+            emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4);
+            WBACK;
+            break;
         case 0xAE:
             nextop = F8;
             if ((nextop & 0xF8) == 0xE8) {
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index dd4ee93a..adc7855e 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -1288,8 +1288,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 FLW(d0, wback, fixedaddress + 4 * i);
                 FCVTLS(x3, d0, RD_DYN);
                 SEXT_W(x5, x3);
-                SUB(x5, x5, x3);
-                BEQZ(x5, 8);
+                BEQ(x5, x3, 8);
                 LUI(x3, 0x80000); // INT32_MIN
                 SW(x3, gback, gdoffset + 4 * i);
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c
index 71d77451..a647ee11 100644
--- a/src/dynarec/rv64/dynarec_rv64_db.c
+++ b/src/dynarec/rv64/dynarec_rv64_db.c
@@ -236,13 +236,10 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     if (!box64_dynarec_fastround) {
                         FRFLAGS(x5); // get back FPSR to check the IOC bit
                         ANDI(x5, x5, 1 << FR_NV);
-                        BNEZ_MARK(x5);
-                        SEXT_W(x5, x4);
-                        BEQ_MARK2(x5, x4);
-                        MARK;
+                        BEQZ_MARK(x5);
                         MOV32w(x4, 0x80000000);
+                        MARK;
                     }
-                    MARK2;
                     SW(x4, wback, fixedaddress);
                     X87_POP_OR_FAIL(dyn, ninst, x3);
                     break;
diff --git a/src/dynarec/rv64/dynarec_rv64_dd.c b/src/dynarec/rv64/dynarec_rv64_dd.c
index 35273745..d1255655 100644
--- a/src/dynarec/rv64/dynarec_rv64_dd.c
+++ b/src/dynarec/rv64/dynarec_rv64_dd.c
@@ -160,6 +160,28 @@ uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLD(v1, wback, fixedaddress);
                     break;
+                case 1:
+                    INST_NAME("FISTTP i64, ST0");
+                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64);
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0);
+                    if (ST_IS_I64(0)) {
+                        FSD(v1, wback, fixedaddress);
+                    } else {
+                        if (!box64_dynarec_fastround) {
+                            FSFLAGSI(0); // reset all bits
+                        }
+                        FCVTLD(x4, v1, RD_RTZ);
+                        if (!box64_dynarec_fastround) {
+                            FRFLAGS(x5); // get back FPSR to check the IOC bit
+                            ANDI(x5, x5, 1 << FR_NV);
+                            BEQZ_MARK(x5);
+                            MOV64x(x4, 0x8000000000000000);
+                            MARK;
+                        }
+                        SD(x4, wback, fixedaddress);
+                    }
+                    X87_POP_OR_FAIL(dyn, ninst, x3);
+                    break;
                 case 2:
                     INST_NAME("FST double");
                     v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index f5d5ade1..23bf1097 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -148,9 +148,8 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_CF) {
         SUBI(s3, s2, 1);
         SRA(s3, s1, s3);
-        ANDI(s3, s3, 1); // LSB
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        ANDI(s3, s3, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s3);
     }
 
     SRL(s1, s1, s2);
@@ -208,14 +207,12 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     IFX(X_CF) {
         if (c > 1) {
             SRAI(s3, s1, c-1);
-            ANDI(s3, s3, 1); // LSB
-            BEQZ(s3, 8);
+            ANDI(s3, s3, 1); // LSB == F_CF
         } else {
             // no need to shift
-            ANDI(s3, s1, 1);
-            BEQZ(s3, 8);
+            ANDI(s3, s1, 1); // LSB == F_CF
         }
-        ORI(xFlags, xFlags, 1 << F_CF);
+        OR(xFlags, xFlags, s3);
     }
 
     SRLIxw(s1, s1, c);
@@ -273,14 +270,12 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     IFX(X_CF) {
         if (c > 1) {
             SRAI(s3, s1, c-1);
-            ANDI(s3, s3, 1); // LSB
-            BEQZ(s3, 8);
+            ANDI(s3, s3, 1); // LSB == F_CF
         } else {
             // no need to shift
-            ANDI(s3, s1, 1);
-            BEQZ(s3, 8);
+            ANDI(s3, s1, 1); // LSB == F_CF
         }
-        ORI(xFlags, xFlags, 1 << F_CF);
+        OR(xFlags, xFlags, s3);
     }
 
     SRAIxw(s1, s1, c);
@@ -519,14 +514,11 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     IFX(X_CF) {
         if (c > 1) {
             SRAI(s3, s1, c-1);
-            ANDI(s3, s3, 1); // LSB
-            BEQZ(s3, 8);
+            ANDI(s3, s3, 1); // LSB == F_CF
         } else {
-            // no need to shift
-            ANDI(s3, s1, 1);
-            BEQZ(s3, 8);
+            ANDI(s3, s1, 1); // LSB == F_CF
         }
-        ORI(xFlags, xFlags, 1 << F_CF);
+        OR(xFlags, xFlags, s3);
     }
 
     SRLIxw(s3, s1, c);
@@ -586,14 +578,12 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     IFX(X_CF) {
         if (c > 1) {
             SRAI(s3, s1, c-1);
-            ANDI(s3, s3, 1); // LSB
-            BEQZ(s3, 8);
+            ANDI(s3, s3, 1); // LSB == F_CF
         } else {
             // no need to shift
-            ANDI(s3, s1, 1);
-            BEQZ(s3, 8);
+            ANDI(s3, s1, 1); // LSB == F_CF
         }
-        ORI(xFlags, xFlags, 1 << F_CF);
+        OR(xFlags, xFlags, s3);
     }
 
     SRLIxw(s3, s1, c);
@@ -628,7 +618,8 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     }
 }
 
-void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) {
+void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5)
+{
     c&=(rex.w?0x3f:0x1f);
     CLEAR_FLAGS();
     IFX(X_PEND) {
@@ -689,7 +680,67 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     }
 }
 
-void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s4, int s3) {
+
+void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4)
+{
+    int64_t j64;
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SDxw(s5, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w ? d_shrd64 : d_shrd32);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_CF) {
+        SUB(s3, s5, 1);
+        SRA(s3, s1, s3);
+        ANDI(s3, s3, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s3);
+    }
+    IFX(X_OF) {
+        SRLxw(s4, s1, rex.w ? 63 : 31);
+        BEQZ(s4, 8);
+        ORI(xFlags, xFlags, 1 << F_OF2);
+    }
+    ADDI(s4, xZR, (rex.w ? 64 : 32));
+    SUB(s4, s4, s5);
+    SRLxw(s3, s1, s5);
+    SLLxw(s4, s2, s4);
+    OR(s1, s4, s3);
+
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_OF) {
+        ADDI(s5, s5, -1);
+        BNEZ_MARK(s5);
+        SRLIxw(s3, s1, rex.w?63:31);
+        BEXTI(s4, xFlags, F_OF2);
+        XOR(s3, s3, s4);
+        ANDI(xFlags, xFlags, ~(1<<F_OF2));
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_OF2);
+        MARK;
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4)
+{
     int64_t j64;
     CLEAR_FLAGS();
     IFX(X_PEND) {
@@ -703,9 +754,8 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
     SUB(s3, s3, s5);
     IFX(X_CF) {
         SRL(s4, s1, s3);
-        ANDI(s4, s4, 1);
-        BEQZ(s4, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        ANDI(s4, s4, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s4);
     }
     IFX(X_OF) {
         SRLxw(s4, s1, rex.w?63:31);
@@ -746,7 +796,8 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
     }
 }
 
-void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) {
+void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5)
+{
     c&=15;
     CLEAR_FLAGS();
     IFX(X_PEND) {
diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c
index 04f23a70..2d063aba 100644
--- a/src/dynarec/rv64/dynarec_rv64_f20f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f20f.c
@@ -365,8 +365,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 FLD(d0, wback, fixedaddress+8*i);
                 FCVTLD(x3, d0, RD_DYN);
                 SEXT_W(x5, x3);
-                SUB(x5, x5, x3);
-                BEQZ(x5, 8);
+                BEQ(x5, x3, 8);
                 LUI(x3, 0x80000); // INT32_MIN
                 SW(x3, gback, gdoffset+4*i);
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 85ffd5e3..e1048260 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -685,6 +685,9 @@
 // Branch to MARK if reg1==reg2 (use j64)
 #define BEQ_MARK(reg1, reg2)     Bxx_gen(EQ, MARK, reg1, reg2)
 #define BEQ_MARKi(reg1, reg2, i) Bxx_geni(EQ, MARK, reg1, reg2, i)
+// Branch to MARK if reg1==0 (use j64)
+#define BEQZ_MARK(reg)     BEQ_MARK(reg, xZR)
+#define BEQZ_MARKi(reg, i) BEQ_MARKi(reg, xZR, i)
 // Branch to MARK if reg1!=reg2 (use j64)
 #define BNE_MARK(reg1, reg2)     Bxx_gen(NE, MARK, reg1, reg2)
 #define BNE_MARKi(reg1, reg2, i) Bxx_geni(NE, MARK, reg1, reg2, i)
@@ -1161,6 +1164,7 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_ror32c         STEPNAME(emit_ror32c)
 #define emit_shrd32c        STEPNAME(emit_shrd32c)
 #define emit_shld32c        STEPNAME(emit_shld32c)
+#define emit_shrd32         STEPNAME(emit_shld32)
 #define emit_shld32         STEPNAME(emit_shld32)
 #define emit_shld16c        STEPNAME(emit_shld16c)
 #define emit_shrd16c        STEPNAME(emit_shrd16c)
@@ -1299,6 +1303,7 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
 void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
+void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);
 void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);
 void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);