about summary refs log tree commit diff stats
path: root/src/dynarec/rv64/dynarec_rv64_emit_math.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynarec/rv64/dynarec_rv64_emit_math.c')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_math.c212
1 files changed, 161 insertions, 51 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c
index 01579ea3..5d6f7e0e 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_math.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c
@@ -1,7 +1,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
-#include <pthread.h>
 #include <errno.h>
 
 #include "debug.h"
@@ -16,7 +15,6 @@
 #include "emu/x64run_private.h"
 #include "x64trace.h"
 #include "dynarec_native.h"
-#include "../tools/bridge_private.h"
 
 #include "rv64_printer.h"
 #include "dynarec_rv64_private.h"
@@ -37,8 +35,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_CF) {
         if (rex.w) {
             AND(s5, xMASK, s1);
-            AND(s4, xMASK, s2);
-            ADD(s5, s5, s4); // lo
+            if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo
             SRLI(s3, s1, 0x20);
             SRLI(s4, s2, 0x20);
             ADD(s4, s4, s3);
@@ -65,8 +62,12 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_AF | X_OF) {
-        NOT(s5, s1);   // s5 = ~res
-        AND(s3, s5, s3);   // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s3);   // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s5, s1);   // s5 = ~res
+            AND(s3, s5, s3);   // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
@@ -126,8 +127,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
     IFX(X_CF) {
         if (rex.w) {
             AND(s5, xMASK, s1);
-            AND(s4, xMASK, s2);
-            ADD(s5, s5, s4); // lo
+            if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo
             SRLI(s3, s1, 0x20);
             SRLI(s4, s2, 0x20);
             ADD(s4, s4, s3);
@@ -159,8 +159,12 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_AF | X_OF) {
-        NOT(s2, s1);   // s2 = ~res
-        AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s3);   // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s2, s1);   // s2 = ~res
+            AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
@@ -213,8 +217,12 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         SW(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_AF | X_OF) {
-        NOT(s5, s1);   // s5 = ~res
-        AND(s3, s5, s3);   // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s3);    // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s5, s1);   // s5 = ~res
+            AND(s3, s5, s3);   // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
@@ -237,8 +245,7 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         ORI(xFlags, xFlags, 1 << F_CF);
     }
 
-    SLLI(s1, s1, 48);
-    SRLI(s1, s1, 48);
+    ZEXTH(s1, s1);
 
     IFX(X_ZF) {
         BNEZ(s1, 8);
@@ -272,8 +279,12 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     ADD(s1, s1, s2);
 
     IFX(X_AF|X_OF) {
-        NOT(s4, s1);   // s4 = ~res
-        AND(s3, s4, s3);   // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s3);   // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s4, s1);   // s4 = ~res
+            AND(s3, s4, s3);   // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s2);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
@@ -332,8 +343,12 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
     ADDI(s1, s1, c);
 
     IFX(X_AF|X_OF) {
-        NOT(s2, s1);   // s2 = ~res
-        AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s3);   // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s2, s1);   // s2 = ~res
+            AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
@@ -580,8 +595,12 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_AF | X_OF) {
-        NOT(s2, s1);   // s2 = ~res
-        AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s3);   // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s2, s1);   // s2 = ~res
+            AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s2, s3, 0x08); // AF: cc & 0x08
@@ -625,8 +644,9 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         SET_DFNONE();
     }
     IFX(X_AF | X_OF) {
-        ORI(s3, s1, 1);      // s3 = op1 | op2
-        ANDI(s4, s1, 1);      // s4 = op1 & op2
+        NOT(s4, s1);        // s4 = ~op1
+        ORI(s3, s4, 1);      // s3 = ~op1 | op2
+        ANDI(s4, s4, 1);      // s4 = ~op1 & op2
     }
 
     ADDIW(s1, s1, -1);
@@ -635,9 +655,8 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_AF | X_OF) {
-        NOT(s2, s1);   // s2 = ~res
-        AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
-        OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
+        AND(s3, s1, s3);   // s3 = res & (~op1 | op2)
+        OR(s3, s3, s4);   // cc = (res & (~op1 | op2)) | (~op1 & op2)
         IFX(X_AF) {
             ANDI(s2, s3, 0x08); // AF: cc & 0x08
             BEQZ(s2, 8);
@@ -689,8 +708,12 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_AF | X_OF) {
-        NOT(s2, s1);   // s2 = ~res
-        AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s3);    // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s2, s1);   // s2 = ~res
+            AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s5);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s2, s3, 0x08); // AF: cc & 0x08
@@ -781,6 +804,9 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 // emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch
 void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 {
+    IFX(X_ALL) {
+        ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF)));
+    }
     IFX(X_PEND) {
         SH(s1, xEmu, offsetof(x64emu_t, op1));
         SET_DF(s3, d_inc16);
@@ -798,8 +824,12 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_AF | X_OF) {
-        NOT(s2, s1);   // s2 = ~res
-        AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s3);   // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s2, s1);   // s2 = ~res
+            AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
@@ -816,8 +846,7 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         }
     }
 
-    SLLI(s1, s1, 48);
-    SRLI(s1, s1, 48);
+    ZEXTH(s1, s1);
 
     IFX(X_ZF) {
         BNEZ(s1, 8);
@@ -909,6 +938,7 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
     SUBW(s1, s1, s3);
     ANDI(s1, s1, 0xff);
 
+    CLEAR_FLAGS();
     IFX(X_PEND) {
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
@@ -928,6 +958,78 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
     }
 }
 
+// emit ADC8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
+void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) {
+    IFX(X_PEND) {
+        SH(s1, xEmu, offsetof(x64emu_t, op1));
+        SH(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, d_adc8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_AF | X_OF) {
+        OR(s4, s1, s2);  // s3 = op1 | op2
+        AND(s5, s1, s2); // s4 = op1 & op2
+    }
+
+    ADD(s1, s1, s2);
+    ANDI(s3, xFlags, 1 << F_CF);
+    ADD(s1, s1, s3);
+
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        SW(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_AF | X_OF) {
+        if(rv64_zbb) {
+            ANDN(s3, s1, s4);   // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s2, s1);     // s2 = ~res
+            AND(s3, s2, s4); // s3 = ~res & (op1 | op2)
+        }
+        OR(s3, s3, s5);  // cc = (~res & (op1 | op2)) | (op1 & op2)
+        IFX(X_AF) {
+            ANDI(s4, s3, 0x08); // AF: cc & 0x08
+            BEQZ(s4, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX(X_OF) {
+            SRLI(s3, s3, 6);
+            SRLI(s4, s3, 1);
+            XOR(s3, s3, s4);
+            ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF2);
+        }
+    }
+    IFX(X_CF) {
+        SRLI(s3, s1, 8);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+
+    ANDI(s1, s1, 0xff);
+
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_SF) {
+        SRLI(s3, s1, 7);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit ADC8 instruction, from s1, const c, store result in s1 using s3, s4, s5 and s6 as scratch
+void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6) {
+    MOV32w(s5, c&0xff);
+    emit_adc8(dyn, ninst, s1, s5, s3, s4, s6);
+}
+
 // emit SBB8 instruction, from s1, constant c, store result in s1 using s3, s4, s5 and s6 as scratch
 void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5, int s6)
 {
@@ -955,6 +1057,7 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     ANDI(s3, xFlags, 1 << F_CF);
     SUBW(s1, s1, s3);
 
+    CLEAR_FLAGS();
     SLLIW(s1, s1, 16);
     IFX(X_SF) {
         BGE(s1, xZR, 8);
@@ -996,6 +1099,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     ANDI(s3, xFlags, 1 << F_CF);
     SUBxw(s1, s1, s3);
 
+    CLEAR_FLAGS();
     IFX(X_SF) {
         BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
@@ -1091,8 +1195,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
     }
 
     NEG(s1, s1);
-    SLLI(s1, s1, 48);
-    SRLI(s1, s1, 48);
+    ZEXTH(s1, s1);
     IFX(X_PEND) {
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
@@ -1121,7 +1224,8 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
         }    
     }
     IFX(X_SF) {
-        ANDI(s3, s1, 1 << F_SF);    // 1<<F_SF is sign bit, so just mask
+        SRLI(s3, s1, 15-F_SF);    // put sign bit in place
+        ANDI(s3, s3, 1 << F_SF);    // 1<<F_SF is sign bit, so just mask
         OR(xFlags, xFlags, s3);
     }
     IFX(X_PF) {
@@ -1192,7 +1296,6 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
 // emit ADC16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
 {
-    CLEAR_FLAGS();
     IFX(X_PEND) {
         SH(s1, xEmu, offsetof(x64emu_t, op1));
         SH(s2, xEmu, offsetof(x64emu_t, op2));
@@ -1209,12 +1312,17 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     ANDI(s3, xFlags, 1 << F_CF);
     ADD(s1, s1, s3);
 
+    CLEAR_FLAGS();
     IFX(X_PEND) {
         SW(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_AF | X_OF) {
-        NOT(s2, s1);     // s2 = ~res
-        AND(s3, s2, s4); // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s4);   // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s2, s1);     // s2 = ~res
+            AND(s3, s2, s4); // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s5);  // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
@@ -1236,8 +1344,7 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         ORI(xFlags, xFlags, 1 << F_CF);
     }
 
-    SLLI(s1, s1, 48);
-    SRLI(s1, s1, 48);
+    ZEXTH(s1, s1);
 
     IFX(X_ZF) {
         BNEZ(s1, 8);
@@ -1254,9 +1361,8 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
 }
 
 // emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
-void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
+void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6)
 {
-    CLEAR_FLAGS();
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, op1));
         SDxw(s2, xEmu, offsetof(x64emu_t, op2));
@@ -1267,21 +1373,16 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_CF) {
         if (rex.w) {
             AND(s5, xMASK, s1);
-            AND(s4, xMASK, s2);
-            ADD(s5, s5, s4); // lo
+            if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo
             SRLI(s3, s1, 0x20);
             SRLI(s4, s2, 0x20);
             ADD(s4, s4, s3);
             SRLI(s5, s5, 0x20);
             ADD(s5, s5, s4); // hi
-            SRAI(s5, s5, 0x20);
-            BEQZ(s5, 8);
-            ORI(xFlags, xFlags, 1 << F_CF);
+            SRAI(s6, s5, 0x20);
         } else {
             ADD(s5, s1, s2);
-            SRLI(s5, s5, 0x20);
-            BEQZ(s5, 8);
-            ORI(xFlags, xFlags, 1 << F_CF);
+            SRLI(s6, s5, 0x20);
         }
     }
     IFX(X_AF | X_OF) {
@@ -1293,12 +1394,21 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     ANDI(s3, xFlags, 1 << F_CF);
     ADDxw(s1, s1, s3);
 
+    CLEAR_FLAGS();
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
+    IFX(X_CF) {
+        BEQZ(s6, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
     IFX(X_AF | X_OF) {
-        NOT(s2, s1);     // s2 = ~res
-        AND(s3, s2, s4); // s3 = ~res & (op1 | op2)
+        if(rv64_zbb) {
+            ANDN(s3, s1, s4);   // s3 = ~res & (op1 | op2)
+        } else {
+            NOT(s2, s1);     // s2 = ~res
+            AND(s3, s2, s4); // s3 = ~res & (op1 | op2)
+        }
         OR(s3, s3, s5);  // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08