about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-07-29 23:28:34 +0800
committerGitHub <noreply@github.com>2025-07-29 17:28:34 +0200
commit8180cb321818abcec2ffe41d908c3095380e79b1 (patch)
tree045b72e814099235caf6d80484c803c1dc88ef18
parent68e16f515472576fcb7589d3bda5185ead05e3a9 (diff)
downloadbox64-8180cb321818abcec2ffe41d908c3095380e79b1.tar.gz
box64-8180cb321818abcec2ffe41d908c3095380e79b1.zip
[RV64][LA64] Added partial FLUSHTO0 support (#2855)
* [RV64][LA64] Removed obselete TODOs

* more

* more

* more

* more
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c36
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_0f.c39
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c55
-rw-r--r--src/emu/x64emu.c2
4 files changed, 125 insertions, 7 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index 81eac050..c52eb714 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -1506,13 +1506,47 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         GETED(0);
                         ST_W(ed, xEmu, offsetof(x64emu_t, mxcsr));
                         if (BOX64ENV(sse_flushto0)) {
-                            // TODO
+                            /* LA <-> x86
+                            16/24 <-> 5    inexact
+                            17/25 <-> 4    underflow
+                            18/26 <-> 3    overflow
+                            19/27 <-> 2    divide by zero
+                                x <-> 1    denormal
+                            20/28 <-> 0    invalid operation
+                            */
+                            // Doing x86 -> LA here, ignore denormal
+                            XOR(x4, x4, x4);
+                            BSTRPICK_W(x3, ed, 5, 5);
+                            BSTRINS_W(x4, x3, 16, 16);
+                            BSTRPICK_W(x3, ed, 4, 4);
+                            BSTRINS_W(x4, x3, 17, 17);
+                            BSTRPICK_W(x3, ed, 3, 3);
+                            BSTRINS_W(x4, x3, 18, 18);
+                            BSTRPICK_W(x3, ed, 2, 2);
+                            BSTRINS_W(x4, x3, 19, 19);
+                            BSTRPICK_W(x3, ed, 0, 0);
+                            BSTRINS_W(x4, x3, 20, 20);
+                            MOVGR2FCSR(FCSR2, x4);
                         }
                         break;
                     case 3:
                         INST_NAME("STMXCSR Md");
                         addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
                         LD_WU(x4, xEmu, offsetof(x64emu_t, mxcsr));
+                        if (BOX64ENV(sse_flushto0)) {
+                            MOVFCSR2GR(x5, FCSR2);
+                            // Doing LA -> x86 here, ignore denormal
+                            BSTRPICK_W(x3, x5, 16, 16);
+                            BSTRINS_W(x4, x3, 5, 5);
+                            BSTRPICK_W(x3, x5, 17, 17);
+                            BSTRINS_W(x4, x3, 4, 4);
+                            BSTRPICK_W(x3, x5, 18, 18);
+                            BSTRINS_W(x4, x3, 3, 3);
+                            BSTRPICK_W(x3, x5, 19, 19);
+                            BSTRINS_W(x4, x3, 2, 2);
+                            BSTRPICK_W(x3, x5, 20, 20);
+                            BSTRINS_W(x4, x3, 0, 0);
+                        }
                         ST_W(x4, wback, fixedaddress);
                         break;
                     case 4:
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c
index c0302d45..cb1921a5 100644
--- a/src/dynarec/la64/dynarec_la64_avx_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_0f.c
@@ -454,19 +454,48 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                         GETED(0);
                         ST_W(ed, xEmu, offsetof(x64emu_t, mxcsr));
                         if (BOX64ENV(sse_flushto0)) {
-                            // sync with fpsr, with mask from mxcsr
-                            // TODO
+                            /* LA <-> x86
+                            16/24 <-> 5    inexact
+                            17/25 <-> 4    underflow
+                            18/26 <-> 3    overflow
+                            19/27 <-> 2    divide by zero
+                                x <-> 1    denormal
+                            20/28 <-> 0    invalid operation
+                            */
+                            // Doing x86 -> LA here, ignore denormal
+                            XOR(x4, x4, x4);
+                            BSTRPICK_W(x3, ed, 5, 5);
+                            BSTRINS_W(x4, x3, 16, 16);
+                            BSTRPICK_W(x3, ed, 4, 4);
+                            BSTRINS_W(x4, x3, 17, 17);
+                            BSTRPICK_W(x3, ed, 3, 3);
+                            BSTRINS_W(x4, x3, 18, 18);
+                            BSTRPICK_W(x3, ed, 2, 2);
+                            BSTRINS_W(x4, x3, 19, 19);
+                            BSTRPICK_W(x3, ed, 0, 0);
+                            BSTRINS_W(x4, x3, 20, 20);
+                            MOVGR2FCSR(FCSR2, x4);
                         }
                         break;
                     case 3:
                         INST_NAME("VSTMXCSR Md");
                         addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
                         LD_WU(x4, xEmu, offsetof(x64emu_t, mxcsr));
-                        ST_W(x4, wback, fixedaddress);
                         if (BOX64ENV(sse_flushto0)) {
-                            // sync with fpsr, with mask from mxcsr
-                            // TODO
+                            MOVFCSR2GR(x5, FCSR2);
+                            // Doing LA -> x86 here, ignore denormal
+                            BSTRPICK_W(x3, x5, 16, 16);
+                            BSTRINS_W(x4, x3, 5, 5);
+                            BSTRPICK_W(x3, x5, 17, 17);
+                            BSTRINS_W(x4, x3, 4, 4);
+                            BSTRPICK_W(x3, x5, 18, 18);
+                            BSTRINS_W(x4, x3, 3, 3);
+                            BSTRPICK_W(x3, x5, 19, 19);
+                            BSTRINS_W(x4, x3, 2, 2);
+                            BSTRPICK_W(x3, x5, 20, 20);
+                            BSTRINS_W(x4, x3, 0, 0);
                         }
+                        ST_W(x4, wback, fixedaddress);
                         break;
                     default:
                         DEFAULT;
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index d1745b7d..1ced055c 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1978,13 +1978,66 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         GETED(0);
                         SW(ed, xEmu, offsetof(x64emu_t, mxcsr));
                         if (BOX64ENV(sse_flushto0)) {
-                            // TODO: applyFlushTo0 also needs to add RISC-V support.
+                            /* RV <-> x86
+                               0  <-> 5    inexact
+                               1  <-> 4    underflow
+                               2  <-> 3    overflow
+                               3  <-> 2    divide by zero
+                               x  <-> 1    denormal
+                               4  <-> 0    invalid operation
+                            */
+                            // Doing x86 -> RV here, 543210 => 0123x4, ignore denormal
+                            // x5 = (ed & 0b1) << 4
+                            SLLIW(x5, ed, 4);
+                            ANDI(x5, x5, 16);
+                            // x3 = x5 | ((ed & 0b100) << 1);
+                            SLLIW(x3, ed, 1);
+                            ANDI(x3, x3, 8);
+                            OR(x3, x3, x5);
+                            // x3 = x3 | (ed & 0b1000) >> 1;
+                            SRLIW(x4, ed, 1);
+                            ANDI(x4, x4, 4);
+                            OR(x3, x3, x4);
+                            // x3 = x3 | (ed & 0b10000) >> 3;
+                            SRLIW(x5, ed, 3);
+                            ANDI(x5, x5, 2);
+                            OR(x3, x3, x5);
+                            // x3 = x3 | (ed & 0b100000) >> 5;
+                            SRLIW(x5, ed, 5);
+                            ANDI(x5, x5, 1);
+                            OR(x3, x3, x5);
+                            CSRRW(xZR, x3, /* fflags */ 0x001);
                         }
                         break;
                     case 3:
                         INST_NAME("STMXCSR Md");
                         addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
                         LWU(x4, xEmu, offsetof(x64emu_t, mxcsr));
+                        if (BOX64ENV(sse_flushto0)) {
+                            // Doing RV -> x86, 43210 => 02345, ignore denormal
+                            ANDI(x4, x4, 0xfc0);
+                            CSRRS(x3, xZR, /* fflags */ 0x001);
+                            // x4 = x4 | (x3 & 0b1) << 5;
+                            SLLIW(x5, x3, 5);
+                            ANDI(x5, x5, 32);
+                            OR(x4, x4, x5);
+                            // x4 = x4 | (x3 & 0b10) << 3;
+                            SLLIW(x6, x3, 3);
+                            ANDI(x6, x6, 16);
+                            OR(x4, x4, x6);
+                            // x4 = x4 | (x3 & 0b100) << 1;
+                            SLLIW(x6, x3, 1);
+                            ANDI(x6, x6, 8);
+                            OR(x4, x4, x6);
+                            // x4 = x4 | (x3 & 0b1000) >> 1;
+                            SRLIW(x5, x3, 1);
+                            ANDI(x5, x5, 4);
+                            OR(x4, x4, x5);
+                            // x4 = x4 | (x3 & 0b10000) >> 4;
+                            SRLIW(x5, x3, 4);
+                            ANDI(x5, x5, 2);
+                            OR(x4, x4, x5);
+                        }
                         SW(x4, wback, fixedaddress);
                         break;
                     case 4:
diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c
index 9ce8c180..7627d4bb 100644
--- a/src/emu/x64emu.c
+++ b/src/emu/x64emu.c
@@ -607,6 +607,8 @@ void applyFlushTo0(x64emu_t* emu)
     #else
     __builtin_aarch64_set_fpcr(fpcr);
     #endif
+    #else
+    // This does not applies to RISC-V and LoongArch, as they don't have flush to zero
     #endif
 }