about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-05-21 13:34:31 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-05-21 13:34:31 +0200
commit706ca3649e830bd52529096092496e358ee53085 (patch)
treefd1594a45fb4823679e986d4831e366f22ce66b6
parentbb537e4dd5fe07b08c7d2fe01fe7798869aa959e (diff)
downloadbox64-706ca3649e830bd52529096092496e358ee53085.tar.gz
box64-706ca3649e830bd52529096092496e358ee53085.zip
[RV64_DYNAREC] Fixed a bunch of x87 opcodes
-rw-r--r--src/dynarec/rv64/dynarec_rv64_d9.c94
-rw-r--r--src/dynarec/rv64/dynarec_rv64_db.c46
-rw-r--r--src/dynarec/rv64/dynarec_rv64_df.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c40
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h43
5 files changed, 179 insertions, 46 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c
index d5f2ad6d..ed75f0b5 100644
--- a/src/dynarec/rv64/dynarec_rv64_d9.c
+++ b/src/dynarec/rv64/dynarec_rv64_d9.c
@@ -128,9 +128,79 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             break;
         case 0xE5:
             INST_NAME("FXAM");
+            #if 1
+            i1 = x87_get_current_cache(dyn, ninst, 0, EXT_CACHE_ST_D);
+            // value put in x14
+            if(i1==-1) {
+                if(fpu_is_st_freed(dyn, ninst, 0)) {
+                    MOV32w(x4, 0b100000100000000);
+                    B_MARK3_nocond;
+                } else {
+                    // not in cache, so check Empty status and load it
+                    i2 = -dyn->e.x87stack;
+                    LWU(x3, xEmu, offsetof(x64emu_t, fpu_stack));
+                    if(i2) {
+                        ADDI(x3, x3, i2);
+                    }
+                    MOV32w(x4, 0b100000100000000);
+                    BGE_MARK3(xZR, x3);
+                    // x5 will be the actual top
+                    LWU(x5, xEmu, offsetof(x64emu_t, top));
+                    if(i2) {
+                        ADDI(x5, x5, i2);
+                        ANDI(x5, x5, 7);    // (emu->top + i)&7
+                    }
+                    // load tag
+                    LHU(x3, xEmu, offsetof(x64emu_t, fpu_tags));
+                    MOV32w(x4, 0b100000100000000);
+                    ANDI(x2, x3, 0b11);
+                    BNEZ_MARK3(x2); // empty: C3,C2,C0 = 101
+                    // load x2 with ST0 anyway, for sign extraction
+                    if(rv64_zba) SH3ADD(x1, x2, xEmu); else {SLLI(x2, x2, 3); ADD(x1, xEmu, x2);}
+                    LD(x2, x1, offsetof(x64emu_t, x87));
+                }
+            } else {
+                // simply move from cache reg to x2
+                v1 = dyn->e.x87reg[i1];
+                FMVXD(x2, v1);
+            }
+            // get exponant in x1
+            SRLI(x1, x2, 20+32);
+            ANDI(x1, x1, 0x7ff); // 0x7ff
+            BNEZ_MARK(x1);  // not zero or denormal
+            MOV64x(x3, 0x7fffffffffffffff);
+            AND(x1, x2, x3);
+            MOV32w(x4, 0b100000000000000); // Zero: C3,C2,C0 = 100
+            BEQZ_MARK3(x1);
+            MOV32w(x4, 0b100010000000000); // Denormal: C3,C2,C0 = 110
+            B_MARK3_nocond;
+            MARK;
+            ADDI(x3, xZR, 0x7ff);   // infinite/NaN?
+            MOV32w(x4, 0b000010000000000); // normal: C3,C2,C0 = 010
+            BNE_MARK3(x1, x3);
+            SLLI(x3, x2, 12);
+            SRLI(x3, x3, 12);   // and 0x000fffffffffffff
+            MOV32w(x4, 0b000010100000000); // infinity: C3,C2,C0 = 011
+            BEQZ_MARK3(x3);
+            MOV32w(x4, 0b000000100000000); // NaN: C3,C2,C0 = 001
+            MARK3;
+            // Extract signa & Update SW
+            SRLI(x1, x2, 63);
+            ANDI(x4, x4, ~(1<<9));
+            SLLI(x1, x1, 9);
+            OR(x4, x4, x1); //C1
+            LHU(x1, xEmu, offsetof(x64emu_t, sw));
+            MOV32w(x2, ~0b0100011100000000);
+            AND(x1, x1, x2);
+            OR(x4, x4, x1);
+            SH(x4, xEmu, offsetof(x64emu_t, sw));
+            #else
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_refresh(dyn, ninst, x1, x2, 0);
+            s0 = x87_stackcount(dyn, ninst, x1);
             CALL(fpu_fxam, -1);  // should be possible inline, but is it worth it?
+            x87_unstackcount(dyn, ninst, x1, s0);
+            #endif
             break;
 
         case 0xE8:
@@ -183,21 +253,27 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("F2XM1");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_f2xm1, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xF1:
             INST_NAME("FYL2X");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fyl2x, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xF2:
             INST_NAME("FPTAN");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_ftan, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F);
             if(ST_IS_F(0)) {
                 MOV32w(x1, 0x3f800000);
@@ -212,7 +288,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fpatan, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xF4:
@@ -220,14 +298,18 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fxtract, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xF5:
             INST_NAME("FPREM1");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fprem1, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xF6:
             INST_NAME("FDECSTP");
@@ -250,14 +332,18 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fprem, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xF9:
             INST_NAME("FYL2XP1");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fyl2xp1, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xFA:
@@ -274,7 +360,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fsincos, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xFC:
             INST_NAME("FRNDINT");
@@ -321,19 +409,25 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fscale, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xFE:
             INST_NAME("FSIN");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fsin, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xFF:
             INST_NAME("FCOS");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
+            s0 = x87_stackcount(dyn, ninst, x3);
             CALL(native_fcos, -1);
+            x87_unstackcount(dyn, ninst, x3, s0);
             break;
 
 
diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c
index a647ee11..80e99666 100644
--- a/src/dynarec/rv64/dynarec_rv64_db.c
+++ b/src/dynarec/rv64/dynarec_rv64_db.c
@@ -150,44 +150,14 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0xEF:
             INST_NAME("FUCOMI ST0, STx");
             SETFLAGS(X_ALL, SF_SET);
-            SET_DFNONE();
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
-            IFX(X_ZF | X_PF | X_CF) {
-                if(ST_IS_F(0)) {
-                    FEQS(x5, v1, v1);
-                    FEQS(x4, v2, v2);
-                    AND(x5, x5, x4);
-                    BEQZ(x5, 24); // undefined/NaN
-                    FEQS(x5, v1, v2);
-                    BNEZ(x5, 24); // equal
-                    FLTS(x3, v1, v2); // x3 = (v1<v2)?1:0
-                    OR(xFlags, xFlags, x3); // CF is the least significant bit
-                    J(16); // end
-                    // NaN
-                    ORI(xFlags, xFlags, (1<<F_ZF) | (1<<F_PF) | (1<<F_CF));
-                    J(8); // end
-                    // equal
-                    ORI(xFlags, xFlags, 1<<F_ZF);
-                    // end
-                } else {
-                    FEQD(x5, v1, v1);
-                    FEQD(x4, v2, v2);
-                    AND(x5, x5, x4);
-                    BEQZ(x5, 24); // undefined/NaN
-                    FEQD(x5, v1, v2);
-                    BNEZ(x5, 24); // equal
-                    FLTD(x3, v1, v2); // x3 = (v1<v2)?1:0
-                    OR(xFlags, xFlags, x3); // CF is the least significant bit
-                    J(16); // end
-                    // NaN
-                    ORI(xFlags, xFlags, (1<<F_ZF) | (1<<F_PF) | (1<<F_CF));
-                    J(8); // end
-                    // equal
-                    ORI(xFlags, xFlags, 1<<F_ZF);
-                    // end
-                }
+            if (ST_IS_F(0)) {
+                FCOMIS(v1, v2, x1, x2, x3, x4, x5);
+            } else {
+                FCOMID(v1, v2, x1, x2, x3, x4, x5);
             }
+
             break;
         case 0xF0:
         case 0xF1:
@@ -202,9 +172,9 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
             if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                FCOMIS(v1, v2, x1, x2, x3, x4, x5);
             } else {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                FCOMID(v1, v2, x1, x2, x3, x4, x5);
             }
             break;
 
@@ -312,7 +282,9 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         if(ed!=x1) {
                             MV(x1, ed);
                         }
+                        s0 = x87_stackcount(dyn, ninst, x3);
                         CALL(native_fstp, -1);
+                        x87_unstackcount(dyn, ninst, x3, s0);
                     }
                     X87_POP_OR_FAIL(dyn, ninst, x3);
                     break;
diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c
index 2a2884c5..c0bc6ae8 100644
--- a/src/dynarec/rv64/dynarec_rv64_df.c
+++ b/src/dynarec/rv64/dynarec_rv64_df.c
@@ -212,7 +212,9 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     x87_forget(dyn, ninst, x1, x2, 0);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
                     if (ed != x1) { MV(x1, ed); }
+                    s0 = x87_stackcount(dyn, ninst, x3);
                     CALL(fpu_fbst, -1);
+                    x87_unstackcount(dyn, ninst, x3, s0);
                     X87_POP_OR_FAIL(dyn, ninst, x3);
                     break;
                 case 7:
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 7aa51a74..c87cd4f7 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -827,11 +827,11 @@ void grab_segdata(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, int reg, int s
     MESSAGE(LOG_DUMP, "----%s Offset\n", (segment==_FS)?"FS":"GS");
 }
 
-void x87_stackcount(dynarec_rv64_t* dyn, int ninst, int scratch)
+int x87_stackcount(dynarec_rv64_t* dyn, int ninst, int scratch)
 {
     MAYUSE(scratch);
     if(!dyn->e.x87stack)
-        return;
+        return 0;
     if(dyn->e.mmxcount)
         mmx_purgecache(dyn, ninst, 0, scratch);
     MESSAGE(LOG_DUMP, "\tSynch x87 Stackcount (%d)\n", dyn->e.x87stack);
@@ -848,10 +848,35 @@ void x87_stackcount(dynarec_rv64_t* dyn, int ninst, int scratch)
     // reset x87stack, but not the stack count of extcache
     dyn->e.x87stack = 0;
     dyn->e.stack_next -= dyn->e.stack;
+    int ret = dyn->e.stack;
     dyn->e.stack = 0;
     MESSAGE(LOG_DUMP, "\t------x87 Stackcount\n");
+    return ret;
+}
+void x87_unstackcount(dynarec_rv64_t* dyn, int ninst, int scratch, int count)
+{
+    MAYUSE(scratch);
+    if(!count)
+        return;
+    if(dyn->e.mmxcount)
+        mmx_purgecache(dyn, ninst, 0, scratch);
+    MESSAGE(LOG_DUMP, "\tSynch x87 Unstackcount (%d)\n", count);
+    int a = -count;
+    // Add x87stack to emu fpu_stack
+    LW(scratch, xEmu, offsetof(x64emu_t, fpu_stack));
+    ADDI(scratch, scratch, a);
+    SW(scratch, xEmu, offsetof(x64emu_t, fpu_stack));
+    // Sub x87stack to top, with and 7
+    LW(scratch, xEmu, offsetof(x64emu_t, top));
+    SUBI(scratch, scratch, a);
+    ANDI(scratch, scratch, 7);
+    SW(scratch, xEmu, offsetof(x64emu_t, top));
+    // reset x87stack, but not the stack count of extcache
+    dyn->e.x87stack = count;
+    dyn->e.stack = count;
+    dyn->e.stack_next += dyn->e.stack;
+    MESSAGE(LOG_DUMP, "\t------x87 Unstackcount\n");
 }
-
 int extcache_st_coherency(dynarec_rv64_t* dyn, int ninst, int a, int b)
 {
     int i1 = extcache_get_st(dyn, ninst, a);
@@ -1217,7 +1242,6 @@ int x87_get_st_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int
 
 void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
 {
-    x87_stackcount(dyn, ninst, s1);
     int ret = -1;
     for (int i=0; (i<8) && (ret==-1); ++i)
         if(dyn->e.x87cache[i] == st)
@@ -1230,11 +1254,12 @@ void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
     // Get top
     LW(s2, xEmu, offsetof(x64emu_t, top));
     // Update
-    if(st) {
-        ADDI(s2, s2, st);
+    int a = st - dyn->e.x87stack;
+    if(a) {
+        ADDI(s2, s2, a);
         ANDI(s2, s2, 7);    // (emu->top + i)&7
     }
-    ADD(s1, xEmu, s2);
+    if(rv64_zba) SH3ADD(s1, s2, xEmu); else {SLLI(s2, s2, 3); ADD(s1, xEmu, s2);}
     if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_F) {
         FCVTDS(SCRATCH0, reg);
         FSD(SCRATCH0, s1, offsetof(x64emu_t, x87));
@@ -1250,7 +1275,6 @@ void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
 
 void x87_forget(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
 {
-    x87_stackcount(dyn, ninst, s1);
     int ret = -1;
     for (int i=0; (i<8) && (ret==-1); ++i)
         if(dyn->e.x87cache[i] == st)
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 1731de5a..6d7f63b1 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -717,8 +717,12 @@
 #define BEQ_MARK3(reg1, reg2) Bxx_gen(EQ, MARK3, reg1, reg2)
 // Branch to MARK3 if reg1!=reg2 (use j64)
 #define BNE_MARK3(reg1, reg2) Bxx_gen(NE, MARK3, reg1, reg2)
+// Branch to MARK3 if reg1!>=reg2 (use j64)
+#define BGE_MARK3(reg1, reg2) Bxx_gen(GE, MARK3, reg1, reg2)
 // Branch to MARK3 if reg1!=0 (use j64)
 #define BNEZ_MARK3(reg) BNE_MARK3(reg, xZR)
+// Branch to MARK3 if reg1==0 (use j64)
+#define BEQZ_MARK3(reg) BEQ_MARK3(reg, xZR)
 // Branch to MARK3 instruction unconditionnal (use j64)
 #define B_MARK3_nocond Bxx_gen(__, MARK3, 0, 0)
 // Branch to MARKLOCK if reg1!=reg2 (use j64)
@@ -1199,6 +1203,7 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr);
 #define x87_forget            STEPNAME(x87_forget)
 #define x87_reget_st          STEPNAME(x87_reget_st)
 #define x87_stackcount        STEPNAME(x87_stackcount)
+#define x87_unstackcount      STEPNAME(x87_unstackcount)
 #define x87_swapreg           STEPNAME(x87_swapreg)
 #define x87_setround          STEPNAME(x87_setround)
 #define x87_restoreround      STEPNAME(x87_restoreround)
@@ -1340,7 +1345,9 @@ void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
 
 // x87 helper
 // cache of the local stack counter, to avoid upadte at every call
-void x87_stackcount(dynarec_rv64_t* dyn, int ninst, int scratch);
+int x87_stackcount(dynarec_rv64_t* dyn, int ninst, int scratch);
+// restore local stack counter
+void x87_unstackcount(dynarec_rv64_t* dyn, int ninst, int scratch, int count);
 // fpu push. Return the Dd value to be used
 int x87_do_push(dynarec_rv64_t* dyn, int ninst, int s1, int t);
 // fpu push. Do not allocate a cache register. Needs a scratch register to do x87stack synch (or 0 to not do it)
@@ -1625,6 +1632,40 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
 #define FCOMS(v1, v2, s1, s2, s3, s4, s5) FCOM(S, v1, v2, s1, s2, s3, s4, s5)
 #define FCOMD(v1, v2, s1, s2, s3, s4, s5) FCOM(D, v1, v2, s1, s2, s3, s4, s5)
 
+#define FCOMI(w, v1, v2, s1, s2, s3, s4, s5)                    \
+    IFX(X_OF|X_AF|X_SF|X_PEND) {                                \
+        MOV64x(s2, ~((1<<F_OF2)|(1<<F_AF)|(1<<F_SF)));          \
+        AND(xFlags, xFlags, s2);                                \
+    }                                                           \
+    IFX(X_CF|X_PF|X_ZF|X_PEND) {                                \
+        MOV32w(s2, 0b01000101);                                 \
+        if(rv64_zbb) {                                          \
+            ANDN(xFlags, xFlags, s2);                           \
+        } else {                                                \
+            NOT(s3, s2);                                        \
+            AND(xFlags, xFlags, s3);                            \
+        }                                                       \
+        FEQ##w(s5, v1, v1);                                     \
+        FEQ##w(s4, v2, v2);                                     \
+        AND(s5, s5, s4);                                        \
+        BEQZ(s5, 5*4); /* undefined/NaN */                      \
+        FEQ##w(s5, v1, v2);                                     \
+        BNEZ(s5, 5*4);       /* equal */                        \
+        FLT##w(s1, v1, v2); /* s1 = (v1<v2)?1:0 */              \
+        J(4*4); /* end */                                       \
+        /* undefined/NaN */                                     \
+        MV(s1, s2);                                             \
+        J(2*4); /* end */                                       \
+        /* equal */                                             \
+        ADDI(s1, xZR, 0b01000000);                              \
+        /* end */                                               \
+        OR(xFlags, xFlags, s1);                                 \
+    }                                                           \
+    SET_DFNONE()
+
+#define FCOMIS(v1, v2, s1, s2, s3, s4, s5) FCOMI(S, v1, v2, s1, s2, s3, s4, s5)
+#define FCOMID(v1, v2, s1, s2, s3, s4, s5) FCOMI(D, v1, v2, s1, s2, s3, s4, s5)
+
 // reg = (reg < -32768) ? -32768 : ((reg > 32767) ? 32767 : reg)
 #define SAT16(reg, s)             \
     LUI(s, 0xFFFF8); /* -32768 */ \