about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-08-29 01:48:22 +0800
committerGitHub <noreply@github.com>2024-08-28 19:48:22 +0200
commitabbaf9b593725d5f00d0482d0fc28310046cfd27 (patch)
tree7585473bdefc5d122acbaa95cfec3ab9851aa38e
parent126c59b4a41b3f17b4fdea6e1fd94bd15d1638d7 (diff)
downloadbox64-abbaf9b593725d5f00d0482d0fc28310046cfd27.tar.gz
box64-abbaf9b593725d5f00d0482d0fc28310046cfd27.zip
[BOX32][RV64_DYNAREC] Fixed more issues and enable CI for box32 (#1767)
* [BOX32][RV64_DYNAREC] Fixed more issues

* fix

* fix

* enable Ci for box32

* add some comments
-rw-r--r--.github/workflows/release.yml10
-rw-r--r--CMakeLists.txt52
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c5
-rw-r--r--src/dynarec/rv64/dynarec_rv64_64.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_math.c9
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c17
-rw-r--r--src/dynarec/rv64/rv64_emitter.h10
7 files changed, 62 insertions, 43 deletions
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ca809f9f..93aa2383 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -177,17 +177,17 @@ jobs:
 
           cd build
           if [[ ${{ matrix.platform }} == 'RISCV' ]]; then
-            if [[ ${{ env.BOX64_BOX32 }} != 1 ]]; then
             INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ ctest -j$(nproc) --output-on-failure
-            INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ BOX64_DYNAREC_TEST=2 ctest -j$(nproc) --repeat until-pass:20 --output-on-failure
             INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ QEMU_CPU=rv64,v=false BOX64_DYNAREC=0 ctest -j$(nproc) --output-on-failure
             INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ QEMU_CPU=rv64,v=false,zba=true,zbb=true,zbc=true,zbs=true ctest -j$(nproc) --output-on-failure
-            INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ BOX64_DYNAREC_TEST=2 QEMU_CPU=rv64,v=false,zba=true,zbb=true,zbc=true,zbs=true ctest -j$(nproc) --repeat until-pass:20 --output-on-failure
             INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ QEMU_CPU=rv64,v=true,vlen=128,vext_spec=v1.0 ctest -j$(nproc) --output-on-failure
-            INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ BOX64_DYNAREC_TEST=2 QEMU_CPU=rv64,v=true,vlen=128,vext_spec=v1.0 ctest -j$(nproc) --repeat until-pass:20 --output-on-failure
             INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ QEMU_CPU=rv64,v=true,vlen=256,vext_spec=v1.0 ctest -j$(nproc) --output-on-failure
-            INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ BOX64_DYNAREC_TEST=2 QEMU_CPU=rv64,v=true,vlen=256,vext_spec=v1.0 ctest -j$(nproc) --repeat until-pass:20 --output-on-failure
             INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ QEMU_CPU=rv64,v=false,xtheadba=true,xtheadba=true,xtheadbb=true,xtheadbs=true,xtheadcondmov=true,xtheadmemidx=true,xtheadmempair=true,xtheadfmemidx=true,xtheadmac=true,xtheadfmv=true ctest -j$(nproc) --output-on-failure
+            if [[ ${{ env.BOX64_BOX32 }} != 1 ]]; then
+            INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ BOX64_DYNAREC_TEST=2 ctest -j$(nproc) --repeat until-pass:20 --output-on-failure
+            INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ BOX64_DYNAREC_TEST=2 QEMU_CPU=rv64,v=false,zba=true,zbb=true,zbc=true,zbs=true ctest -j$(nproc) --repeat until-pass:20 --output-on-failure
+            INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ BOX64_DYNAREC_TEST=2 QEMU_CPU=rv64,v=true,vlen=128,vext_spec=v1.0 ctest -j$(nproc) --repeat until-pass:20 --output-on-failure
+            INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ BOX64_DYNAREC_TEST=2 QEMU_CPU=rv64,v=true,vlen=256,vext_spec=v1.0 ctest -j$(nproc) --repeat until-pass:20 --output-on-failure
             INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ BOX64_DYNAREC_TEST=2 QEMU_CPU=rv64,v=false,xtheadba=true,xtheadba=true,xtheadbb=true,xtheadbs=true,xtheadcondmov=true,xtheadmemidx=true,xtheadmempair=true,xtheadfmemidx=true,xtheadmac=true,xtheadfmv=true ctest -j$(nproc) --repeat until-pass:20 --output-on-failure
             fi
           elif [[ ${{ matrix.platform }} == 'LARCH64' ]]; then
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 31a39121..13b8d0bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1531,53 +1531,53 @@ endif()
 
 if(BOX32)
     add_test(NAME bootSyscall_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test01 -D TEST_OUTPUT=tmpfile01.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test01 -D TEST_OUTPUT=tmpfile32_01.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref01.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME bootSyscallC_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test02 -D TEST_OUTPUT=tmpfile02.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test02 -D TEST_OUTPUT=tmpfile32_02.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref02.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME printf_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test03 -D TEST_OUTPUT=tmpfile03.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test03 -D TEST_OUTPUT=tmpfile32_03.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref03.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME args_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test04 -D TEST_ARGS2=yeah -D TEST_OUTPUT=tmpfile04.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test04 -D TEST_ARGS2=yeah -D TEST_OUTPUT=tmpfile32_04.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref04.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME maths1_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test05 -D TEST_ARGS2=7 -D TEST_OUTPUT=tmpfile05.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test05 -D TEST_ARGS2=7 -D TEST_OUTPUT=tmpfile32_05.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref05.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME threadsStart_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test06 -D TEST_OUTPUT=tmpfile06.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test06 -D TEST_OUTPUT=tmpfile32_06.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref06.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME trig_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test07 -D TEST_OUTPUT=tmpfile07.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test07 -D TEST_OUTPUT=tmpfile32_07.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref07.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME pi_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test08 -D TEST_OUTPUT=tmpfile08.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test08 -D TEST_OUTPUT=tmpfile32_08.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref08.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME fork_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test09 -D TEST_OUTPUT=tmpfile09.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test09 -D TEST_OUTPUT=tmpfile32_09.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref09.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     if(NOT CI)
     add_test(NAME cppThreads_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test10 -D TEST_OUTPUT=tmpfile10.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test10 -D TEST_OUTPUT=tmpfile32_10.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref10.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
     
@@ -1585,61 +1585,63 @@ if(BOX32)
     endif()
 
     add_test(NAME tlsData_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test11 -D TEST_OUTPUT=tmpfile11.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test11 -D TEST_OUTPUT=tmpfile32_11.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref11.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME fpu_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test12 -D TEST_OUTPUT=tmpfile12.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test12 -D TEST_OUTPUT=tmpfile32_12.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref12.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME contexts_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test13 -D TEST_OUTPUT=tmpfile13.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test13 -D TEST_OUTPUT=tmpfile32_13.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref13.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     if(NOT LD80BITS)
         add_test(NAME conditionalThreads_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-            -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test14 -D TEST_OUTPUT=tmpfile14.txt
+            -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test14 -D TEST_OUTPUT=tmpfile32_14.txt
             -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref14.txt
             -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
     endif()
 
     add_test(NAME linkingIndirectNoversion_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test15 -D TEST_OUTPUT=tmpfile15.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test15 -D TEST_OUTPUT=tmpfile32_15.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref15.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME linkingIndirectVersion_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test16 -D TEST_OUTPUT=tmpfile16.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test16 -D TEST_OUTPUT=tmpfile32_16.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref16.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME sse_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test17 -D TEST_OUTPUT=tmpfile17.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test17 -D TEST_OUTPUT=tmpfile32_17.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref17.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     set_tests_properties(sse_32bits PROPERTIES ENVIRONMENT "BOX64_DYNAREC_FASTNAN=0;BOX64_DYNAREC_FASTROUND=0")
 
     add_test(NAME longjumpInSignals_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test18 -D TEST_OUTPUT=tmpfile18.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test18 -D TEST_OUTPUT=tmpfile32_18.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref18.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME x87_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test19 -D TEST_OUTPUT=tmpfile19.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test19 -D TEST_OUTPUT=tmpfile32_19.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref19.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
+    set_tests_properties(x87_32bits PROPERTIES ENVIRONMENT "BOX64_DYNAREC_FASTROUND=0")
+
     add_test(NAME idiv_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test20 -D TEST_OUTPUT=tmpfile20.txt
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test20 -D TEST_OUTPUT=tmpfile32_20.txt
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref20.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     add_test(NAME multiple_dlopen_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64} 
-        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test21 -D TEST_OUTPUT=tmpfile21.txt 
+        -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test21 -D TEST_OUTPUT=tmpfile32_21.txt 
         -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref21.txt
         -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
@@ -1647,20 +1649,20 @@ if(BOX32)
     foreach(file ${extension_tests})
         get_filename_component(testname "${file}" NAME_WE)
         add_test(NAME "${testname}_32bits" COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-            -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/extensions/${testname} -D TEST_OUTPUT=tmpfile-${testname}.txt
+            -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/extensions/${testname} -D TEST_OUTPUT=tmpfile32_-${testname}.txt
             -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/extensions/${testname}.txt
             -P ${CMAKE_SOURCE_DIR}/runTest.cmake)
     endforeach()
 
     #add_test(NAME sse_optimized_32bits COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64} 
-    #    -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test17_o2 -D TEST_OUTPUT=tmpfile17_o2.txt 
+    #    -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test17_o2 -D TEST_OUTPUT=tmpfile32_17_o2.txt 
     #    -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref17_o2.txt
     #    -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
 
     #set_tests_properties(sse_optimized_32bits PROPERTIES ENVIRONMENT "BOX64_DYNAREC_FASTNAN=0;BOX64_DYNAREC_FASTROUND=0")    
         
     add_test(bswap_32bits ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64}
-            -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test23 -D TEST_OUTPUT=tmpfile23.txt
+            -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests32/test23 -D TEST_OUTPUT=tmpfile32_23.txt
             -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests32/ref23.txt
             -P ${CMAKE_SOURCE_DIR}/runTest.cmake )
-endif()
\ No newline at end of file
+endif()
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index e0a841ca..e4eb7aa4 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -423,10 +423,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 DEFAULT;
             } else {     // mem <= reg
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0);
-                MV(gd, ed);
-                if(!rex.w || rex.is32bits) {
-                    ZEROUP(gd); // truncate the higher 32bits as asked
-                }
+                if (gd != ed) MV(gd, ed);
             }
             break;
         case 0x8E:
diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c
index c1ff69df..2ad48874 100644
--- a/src/dynarec/rv64/dynarec_rv64_64.c
+++ b/src/dynarec/rv64/dynarec_rv64_64.c
@@ -529,10 +529,10 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
         case 0xFF:
             nextop = F8;
-            grab_segdata(dyn, addr, ninst, x6, seg);
             switch((nextop>>3)&7) {
                 case 6: // Push Ed
                     INST_NAME("PUSH Ed");
+                    grab_segdata(dyn, addr, ninst, x6, seg);
                     GETEDOz(x6, 0, x3);
                     PUSH1z(ed);
                     break;
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c
index e19acba0..a8557853 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_math.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c
@@ -113,6 +113,9 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
             MOV64x(s2, c);
             ADD(s1, s1, s2);
         }
+        if (!rex.w) {
+            ZEROUP(s1);
+        }
         return;
     }
     IFX(X_PEND | X_AF | X_CF | X_OF) {
@@ -516,8 +519,7 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5)
 {
     CLEAR_FLAGS();
-    if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags==X_PEND))
-    {
+    if (s1 == xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags == X_PEND)) {
         // special case when doing math on RSP and only PEND is needed: ignoring it!
         if (c > -2048 && c <= 2048) {
             SUBI(s1, s1, c);
@@ -525,6 +527,9 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
             MOV64xw(s2, c);
             SUBxw(s1, s1, s2);
         }
+        if (!rex.w) {
+            ZEROUP(s1);
+        }
         return;
     }
 
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 8c9f0adc..9810cf64 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -190,12 +190,14 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
                 int64_t tmp = F32S;
                 if (sib_reg!=4) {
                     if(tmp && ((tmp<-2048) || (tmp>maxval) || !i12)) {
-                        MOV32w(scratch, tmp);
+                        // no need to zero up, as we did it below
+                        rv64_move32(dyn, ninst, scratch, tmp, 0);
                         if((sib>>6)) {
                             SLLI(ret, xRAX + sib_reg, sib >> 6);
                             ADDW(ret, ret, scratch);
                         } else
                             ADDW(ret, xRAX+sib_reg, scratch);
+                        ZEROUP(ret);
                     } else {
                         if(sib>>6)
                             SLLI(ret, xRAX+sib_reg, (sib>>6));
@@ -217,6 +219,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
                         ADDW(ret, ret, xRAX + sib_reg2);
                     } else
                         ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg);
+                    ZEROUP(ret);
                 } else {
                     ret = xRAX+sib_reg2;
                 }
@@ -256,6 +259,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
                         ADDW(ret, ret, xRAX + sib_reg2);
                     } else
                         ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg);
+                    ZEROUP(ret);
                 } else {
                     ret = xRAX+sib_reg2;
                 }
@@ -279,7 +283,8 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
                 ADDIW(ret, scratch, i32);
                 ZEROUP(ret);
             } else {
-                MOV32w(scratch, i32);
+                // no need to zero up, as we did it below
+                rv64_move32(dyn, ninst, scratch, i32, 0);
                 if((nextop&7)==4) {
                     if (sib_reg!=4) {
                         ADDW(scratch, scratch, xRAX+sib_reg2);
@@ -334,6 +339,7 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
                             ADDW(ret, ret, scratch);
                         } else
                             ADDW(ret, xRAX+sib_reg, scratch);
+                        ZEROUP(ret);
                     } else {
                         if(sib>>6)
                             SLLI(ret, xRAX+sib_reg, (sib>>6));
@@ -355,6 +361,7 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
                         ADDW(ret, ret, xRAX + sib_reg2);
                     } else
                         ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg);
+                    ZEROUP(ret);
                 } else {
                     ret = xRAX+sib_reg2;
                 }
@@ -364,6 +371,7 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
             MOV32w(ret, tmp);
             GETIP(addr+delta);
             ADDW(ret, ret, xRIP);
+            ZEROUP(ret);
             switch(lock) {
                 case 1: addLockAddress(addr+delta+tmp); break;
                 case 2: if(isLockAddress(addr+delta+tmp)) *l=1; break;
@@ -396,6 +404,7 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
                         ADDW(ret, ret, xRAX + sib_reg2);
                     } else
                         ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg);
+                    ZEROUP(ret);
                 } else {
                     ret = xRAX+sib_reg2;
                 }
@@ -2418,10 +2427,8 @@ void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zerou
         src = reg;
     }
     if (lo12 || !hi20) ADDIW(reg, src, lo12);
-    if((zeroup && ((hi20&0x80000) || (!hi20 && (lo12&0x800))))
-    || (!zeroup && !(val&0x80000000) && ((hi20&0x80000) || (!hi20 && (lo12&0x800))))) {
+    if (zeroup && (val & 0x80000000))
         ZEROUP(reg);
-    }
 }
 
 void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val)
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index d4a95a31..3f76135a 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -190,7 +190,15 @@ f28–31  ft8–11  FP temporaries                  Caller
 // rd = rs1 + rs2
 #define ADDxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.w ? 0b0110011 : 0b0111011))
 // rd = rs1 + rs2
-#define ADDz(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.is32bits ? 0b0111011 : 0b0110011))
+#define ADDz(rd, rs1, rs2)      \
+    do {                        \
+        if (!rex.is32bits) {    \
+            ADD(rd, rs1, rs2);  \
+        } else {                \
+            ADDW(rd, rs1, rs2); \
+            ZEROUP(rd);         \
+        }                       \
+    } while (0)
 // rd = rs1 - rs2
 #define SUB(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0110011))
 // rd = rs1 - rs2