about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2024-12-13 02:54:08 +0800
committerGitHub <noreply@github.com>2024-12-12 19:54:08 +0100
commit9f7d0c44043bb6e1c63d904c269db8307e94dfa7 (patch)
treef1b6bcc865fbe39efead09037c21d19e22912540 /src
parent7e5aa3e7c6bd3cf2b998be55c3668e36ffb1d7d1 (diff)
downloadbox64-9f7d0c44043bb6e1c63d904c269db8307e94dfa7.tar.gz
box64-9f7d0c44043bb6e1c63d904c269db8307e94dfa7.zip
[RV64_DYNAREC] Reworked ZEROUP and freed t0 (#2147)
* [RV64_DYNAREC] Reworked ZEROUP and freed t0

* [RV64_DYNAREC] Removed the definition of xMASK
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_1.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c18
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c12
-rw-r--r--src/dynarec/rv64/dynarec_rv64_67.c14
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_math.c24
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_tests.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f0.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c19
-rw-r--r--src/dynarec/rv64/rv64_emitter.h25
-rw-r--r--src/dynarec/rv64/rv64_mapping.h8
-rw-r--r--src/dynarec/rv64/rv64_next.S4
-rw-r--r--src/dynarec/rv64/rv64_prolog.S4
14 files changed, 74 insertions, 80 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c
index bee9fa33..85c56ba2 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_1.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_1.c
@@ -107,7 +107,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x60:
             if (rex.is32bits) {
                 INST_NAME("PUSHAD");
-                AND(x1, xRSP, xMASK);
+                ZEXTW2(x1, xRSP);
                 PUSH1_32(xRAX);
                 PUSH1_32(xRCX);
                 PUSH1_32(xRDX);
@@ -163,7 +163,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     }
                 } else {
                     if (MODREG) { // reg <= reg
-                        AND(gd, TO_NAT((nextop & 7) + (rex.b << 3)), xMASK);
+                        ZEXTW2(gd, TO_NAT((nextop & 7) + (rex.b << 3)));
                     } else { // mem <= reg
                         SMREAD();
                         addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 24968c74..72588467 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -257,7 +257,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5, x6);
                     } else {
                         if (!rex.w && MODREG) {
-                            AND(x1, ed, xMASK);
+                            ZEXTW2(x1, ed);
                             ed = x1;
                         }
                         emit_cmp32_0(dyn, ninst, rex, ed, x3, x4);
@@ -967,7 +967,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     if (rex.w) {
                         MV(x1, xRAX);
                     } else {
-                        AND(x1, xRAX, xMASK);
+                        ZEXTW2(x1, xRAX);
                     }
                     ANDI(x2, xFlags, 1 << F_DF);
                     BNEZ_MARK2(x2);
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index 8afa8442..efdbfba2 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -1199,13 +1199,13 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         MUL(xRAX, xRAX, ed);
                         if (gd != xRDX) { MV(xRDX, gd); }
                     } else {
-                        AND(x3, xRAX, xMASK);
+                        ZEXTW2(x3, xRAX);
                         if (MODREG) {
-                            AND(x4, ed, xMASK);
+                            ZEXTW2(x4, ed);
                             ed = x4;
                         }
                         MUL(xRDX, x3, ed); // 64 <- 32x32
-                        AND(xRAX, xRDX, xMASK);
+                        ZEXTW2(xRAX, xRDX);
                         SRLI(xRDX, xRDX, 32);
                     }
                     IFX (X_CF | X_OF) {
@@ -1237,7 +1237,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     } else {
                         ADDIW(x3, xRAX, 0); // sign extend 32bits-> 64bits
                         MUL(xRDX, x3, ed);  // 64 <- 32x32
-                        AND(xRAX, xRDX, xMASK);
+                        ZEXTW2(xRAX, xRDX);
                         SRLI(xRDX, xRDX, 32);
                     }
                     IFX (X_CF | X_OF) {
@@ -1282,15 +1282,15 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                                 MARK3;
                             }
                             SLLI(x3, xRDX, 32);
-                            AND(x2, xRAX, xMASK);
+                            ZEXTW2(x2, xRAX);
                             OR(x3, x3, x2);
                             if (MODREG) {
-                                AND(x4, ed, xMASK);
+                                ZEXTW2(x4, ed);
                                 ed = x4;
                             }
                             DIVU(x2, x3, ed);
                             REMU(xRDX, x3, ed);
-                            AND(xRAX, x2, xMASK);
+                            ZEXTW2(xRAX, x2);
                             ZEROUP(xRDX);
                         }
                     } else {
@@ -1352,11 +1352,11 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                             MARK3;
                         }
                         SLLI(x3, xRDX, 32);
-                        AND(x2, xRAX, xMASK);
+                        ZEXTW2(x2, xRAX);
                         OR(x3, x3, x2);
                         DIV(x2, x3, ed);
                         REM(xRDX, x3, ed);
-                        AND(xRAX, x2, xMASK);
+                        ZEXTW2(xRAX, x2);
                         ZEROUP(xRDX);
                     } else {
                         if (ninst && dyn->insts
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 12f7fb03..28b8fc96 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -68,7 +68,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 switch (nextop) {
                     case 0xD0:
                         INST_NAME("XGETBV");
-                        AND(x1, xRCX, xMASK);
+                        ZEXTW2(x1, xRCX);
                         BEQZ_MARK(x1);
                         UDF();
                         MARK;
@@ -99,7 +99,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             SRLI(x3, x3, box64_rdtsc_shift);
                         }
                         SRLI(xRDX, x3, 32);
-                        AND(xRAX, x3, xMASK); // wipe upper part
+                        ZEXTW2(xRAX, x3); // wipe upper part
                         MV(xRCX, xZR);        // IA32_TSC, 0 for now
                         break;
                     default:
@@ -424,7 +424,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SRLI(x3, x3, box64_rdtsc_shift);
             }
             SRLI(xRDX, x3, 32);
-            AND(xRAX, x3, xMASK); // wipe upper part
+            ZEXTW2(xRAX, x3); // wipe upper part
             break;
         case 0x38:
             // SSE3
@@ -1425,7 +1425,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if (MODREG) {
                 ed = TO_NAT((nextop & 7) + (rex.b << 3));
                 if (!rex.w) {
-                    AND(x4, ed, xMASK);
+                    ZEXTW2(x4, ed);
                     ed = x4;
                 }
             } else {
@@ -2181,7 +2181,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETED(0);
             GETGD;
             if (!rex.w && MODREG) {
-                AND(x4, ed, xMASK);
+                ZEXTW2(x4, ed);
                 ed = x4;
             }
             BNE_MARK(ed, xZR);
@@ -2200,7 +2200,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETED(0);
             GETGD;
             if (!rex.w && MODREG) {
-                AND(x4, ed, xMASK);
+                ZEXTW2(x4, ed);
                 ed = x4;
             }
             BNE_MARK(ed, xZR);
diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c
index c3d6ef18..06f17f9b 100644
--- a/src/dynarec/rv64/dynarec_rv64_67.c
+++ b/src/dynarec/rv64/dynarec_rv64_67.c
@@ -477,7 +477,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 }
             } else {
                 if (MODREG) { // reg <= reg
-                    AND(gd, TO_NAT((nextop & 7) + (rex.b << 3)), xMASK);
+                    ZEXTW2(gd, TO_NAT((nextop & 7) + (rex.b << 3)));
                 } else { // mem <= reg
                     SMREAD();
                     addr = geted32(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
@@ -719,7 +719,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 DEFAULT;
             } else { // mem <= reg
                 addr = geted32(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0);
-                AND(gd, ed, xMASK);
+                ZEXTW2(gd, ed);
             }
             break;
 
@@ -804,7 +804,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SUBI(xRCX, xRCX, 1);
             ANDI(x1, xFlags, 1 << F_ZF);
             CBNZ_NEXT(x1);
-            AND(x1, xRCX, xMASK);
+            ZEXTW2(x1, xRCX);
             GO(0);
             break;
         case 0xE1:
@@ -814,20 +814,20 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SUBI(xRCX, xRCX, 1);
             ANDI(x1, xFlags, 1 << F_ZF);
             CBZ_NEXT(x1);
-            AND(x1, xRCX, xMASK);
+            ZEXTW2(x1, xRCX);
             GO(0);
             break;
         case 0xE2:
             INST_NAME("LOOP (32bits)");
             i8 = F8S;
             SUBI(xRCX, xRCX, 1);
-            AND(x1, xRCX, xMASK);
+            ZEXTW2(x1, xRCX);
             GO(0);
             break;
         case 0xE3:
             INST_NAME("JECXZ (32bits)");
             i8 = F8S;
-            AND(x1, xRCX, xMASK);
+            ZEXTW2(x1, xRCX);
             GO(1);
             break;
 #undef GO
@@ -852,7 +852,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         if (gd != xRDX) MV(xRDX, gd);
                     } else {
                         MUL(xRDX, xRAX, ed); // 64 <- 32x32
-                        AND(xRAX, xRDX, xMASK);
+                        ZEXTW2(xRAX, xRDX);
                         SRLIW(xRDX, xRDX, 32);
                     }
                     UFLAG_RES(xRAX);
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c
index cfa08eac..f33b0970 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_math.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c
@@ -34,11 +34,11 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
     IFX (X_CF) {
         if (rex.w) {
-            AND(s5, xMASK, s1);
+            ZEXTW2(s5, s1);
             if (rv64_zba) // lo
                 ADDUW(s5, s2, s5);
             else {
-                AND(s4, xMASK, s2);
+                ZEXTW2(s4, s2);
                 ADD(s5, s5, s4);
             }
             SRLI(s3, s1, 0x20);
@@ -48,8 +48,8 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
             ADD(s5, s5, s4); // hi
             SRAI(s5, s5, 0x20);
         } else {
-            AND(s3, s1, xMASK);
-            AND(s4, s2, xMASK);
+            ZEXTW2(s3, s1);
+            ZEXTW2(s4, s2);
             ADD(s5, s3, s4);
             SRLI(s5, s5, 0x20);
         }
@@ -130,11 +130,11 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
     }
     IFX (X_CF) {
         if (rex.w) {
-            AND(s5, xMASK, s1);
+            ZEXTW2(s5, s1);
             if (rv64_zba) // lo
                 ADDUW(s5, s2, s5);
             else {
-                AND(s4, xMASK, s2);
+                ZEXTW2(s4, s2);
                 ADD(s5, s5, s4);
             }
             SRLI(s3, s1, 0x20);
@@ -144,8 +144,8 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
             ADD(s5, s5, s4); // hi
             SRAI(s5, s5, 0x20);
         } else {
-            AND(s3, s1, xMASK);
-            AND(s4, s2, xMASK);
+            ZEXTW2(s3, s1);
+            ZEXTW2(s4, s2);
             ADD(s5, s3, s4);
             SRLI(s5, s5, 0x20);
         }
@@ -1353,11 +1353,11 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
     IFX (X_CF) {
         if (rex.w) {
-            AND(s5, xMASK, s1);
+            ZEXTW2(s5, s1);
             if (rv64_zba)
                 ADDUW(s5, s2, s5);
             else {
-                AND(s4, xMASK, s2);
+                ZEXTW2(s4, s2);
                 ADD(s5, s5, s4);
             } // lo
             ANDI(s3, xFlags, 1);
@@ -1369,8 +1369,8 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
             ADD(s5, s5, s4); // hi
             SRAI(s6, s5, 0x20);
         } else {
-            AND(s3, s1, xMASK);
-            AND(s4, s2, xMASK);
+            ZEXTW2(s3, s1);
+            ZEXTW2(s4, s2);
             ADD(s5, s3, s4);
             ANDI(s3, xFlags, 1);
             ADD(s5, s5, s3); // add carry
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_tests.c b/src/dynarec/rv64/dynarec_rv64_emit_tests.c
index 7c276f7d..6783079b 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_tests.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_tests.c
@@ -246,8 +246,8 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
                 SEXT_W(s3, s1);
                 SEXT_W(s4, s2);
             } else {
-                AND(s3, s1, xMASK);
-                AND(s4, s2, xMASK);
+                ZEXTW2(s3, s1);
+                ZEXTW2(s4, s2);
             }
             NAT_FLAGS_OPS(s3, s4);
         }
@@ -290,7 +290,7 @@ void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int
             if (dyn->insts[ninst].nat_flags_needsign) {
                 SEXT_W(s3, s1);
             } else {
-                AND(s3, s1, xMASK);
+                ZEXTW2(s3, s1);
             }
             NAT_FLAGS_OPS(s3, xZR);
         }
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c
index b4e529a7..93b7cca3 100644
--- a/src/dynarec/rv64/dynarec_rv64_f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_f0.c
@@ -312,10 +312,10 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 AMOSWAP_W(xZR, xZR, x7, 1, 1);
                             } else {
                                 SMDMB();
-                                AND(x3, xRAX, xMASK);
+                                ZEXTW2(x3, xRAX);
                                 SLLI(x2, xRDX, 32);
                                 OR(x3, x3, x2);
-                                AND(x4, xRBX, xMASK);
+                                ZEXTW2(x4, xRBX);
                                 SLLI(x2, xRCX, 32);
                                 OR(x4, x4, x2);
                                 MARKLOCK;
@@ -327,7 +327,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 B_MARK3_nocond;
                                 MARK;
                                 SLLI(xRDX, x2, 32);
-                                AND(xRAX, x2, xMASK);
+                                ZEXTW2(xRAX, x2);
                                 MARK3;
                                 SMDMB();
                             }
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index 42a67929..58daf454 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -437,7 +437,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETED(0);
             GETGD;
             if (!rex.w && MODREG) {
-                AND(x4, ed, xMASK);
+                ZEXTW2(x4, ed);
                 ed = x4;
             }
             CLEAR_FLAGS();
@@ -479,7 +479,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETED(0);
             GETGD;
             if (!rex.w && MODREG) {
-                AND(x4, ed, xMASK);
+                ZEXTW2(x4, ed);
                 ed = x4;
             }
             ANDI(xFlags, xFlags, ~((1 << F_ZF) | (1 << F_CF)));
@@ -500,7 +500,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETED(0);
             GETGD;
             if (!rex.w && MODREG) {
-                AND(x4, ed, xMASK);
+                ZEXTW2(x4, ed);
                 ed = x4;
             }
             BNE_MARK(ed, xZR);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index af55ce5e..a3df47d9 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -247,7 +247,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
         } else {
             ret = TO_NAT(nextop & 7);
             if (ret == hint) {
-                AND(hint, ret, xMASK); // to clear upper part
+                ZEXTW2(hint, ret); // to clear upper part
             }
         }
     } else {
@@ -402,7 +402,7 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
         } else {
             ret = TO_NAT((nextop & 7) + (rex.b << 3));
             if (ret == hint) {
-                AND(hint, ret, xMASK); // to clear upper part
+                ZEXTW2(hint, ret); // to clear upper part
             }
         }
     } else {
@@ -827,10 +827,6 @@ void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav
     if (ret != xRIP)
         LD(xRIP, xEmu, offsetof(x64emu_t, ip));
 
-    // regenerate mask
-    XORI(xMASK, xZR, -1);
-    SRLI(xMASK, xMASK, 32);
-
     // reinitialize sew
     if (dyn->vector_sew != VECTOR_SEWNA)
         vector_vsetvli(dyn, ninst, x3, dyn->vector_sew, VECTOR_LMUL1, 1);
@@ -857,9 +853,6 @@ void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w)
         MV(xRDX, A1);
     }
     // all done, restore all regs
-    // regenerate mask
-    XORI(xMASK, xZR, -1);
-    SRLI(xMASK, xMASK, 32);
 
     // reinitialize sew
     if (dyn->vector_sew != VECTOR_SEWNA)
@@ -1091,7 +1084,7 @@ void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in
         if (a > 0) {
             SLLI(s1, s1, a * 2);
         } else {
-            SLLI(s3, xMASK, 16); // 0xffff0000 (plus some unused hipart)
+            MOV32w(s3, 0xffff0000);
             OR(s1, s1, s3);
             SRLI(s1, s1, -a * 2);
         }
@@ -1187,7 +1180,7 @@ static void x87_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int
         if (a > 0) {
             SLLI(s1, s1, a * 2);
         } else {
-            SLLI(s3, xMASK, 16); // 0xffff0000
+            MOV32w(s3, 0xffff0000);
             OR(s1, s1, s3);
             SRLI(s1, s1, -a * 2);
         }
@@ -1240,7 +1233,7 @@ static void x87_unreflectcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, i
         // update tags
         LH(s1, xEmu, offsetof(x64emu_t, fpu_tags));
         if (a > 0) {
-            SLLI(s3, xMASK, 16); // 0xffff0000
+            MOV32w(s3, 0xffff0000);
             OR(s1, s1, s3);
             SRLI(s1, s1, a * 2);
         } else {
@@ -2726,7 +2719,7 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in
         if (a > 0) {
             SLLI(s2, s2, a * 2);
         } else {
-            SLLI(s3, xMASK, 16); // 0xffff0000
+            MOV32w(s3, 0xffff0000);
             OR(s2, s2, s3);
             SRLI(s2, s2, -a * 2);
         }
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index ecac8a46..9200b358 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -32,8 +32,19 @@
         }                   \
     } while (0)
 
-// ZERO the upper part
-#define ZEROUP(r) AND(r, r, xMASK)
+// ZERO the upper part, compatible to zba, xtheadbb, and rv64gc
+#define ZEXTW2(rd, rs1)              \
+    do {                             \
+        if (rv64_zba) {              \
+            ZEXTW(rd, rs1);          \
+        } else if (rv64_xtheadbb) {  \
+            TH_EXTU(rd, rs1, 31, 0); \
+        } else {                     \
+            SLLI(rd, rs1, 32);       \
+            SRLI(rd, rd, 32);        \
+        }                            \
+    } while (0)
+#define ZEROUP(r) ZEXTW2(r, r)
 
 #define R_type(funct7, rs2, rs1, funct3, rd, opcode) ((funct7) << 25 | (rs2) << 20 | (rs1) << 15 | (funct3) << 12 | (rd) << 7 | (opcode))
 #define I_type(imm12, rs1, funct3, rd, opcode)       ((imm12) << 20 | (rs1) << 15 | (funct3) << 12 | (rd) << 7 | (opcode))
@@ -145,14 +156,14 @@
         if (rex.w) {             \
             MV(rd, rs1);         \
         } else {                 \
-            AND(rd, rs1, xMASK); \
+            ZEXTW2(rd, rs1);     \
         }                        \
     } while (0)
 // rd = rs1 (pseudo instruction)
 #define MVz(rd, rs1)             \
     do {                         \
         if (rex.is32bits) {      \
-            AND(rd, rs1, xMASK); \
+            ZEXTW2(rd, rs1);     \
         } else {                 \
             MV(rd, rs1);         \
         }                        \
@@ -760,7 +771,7 @@
             SUBI(u8, u8, 32);                \
             MV(s2, s3);                      \
         } else {                             \
-            AND(s2, rs, xMASK);              \
+            ZEXTW2(s2, rs);                  \
         }                                    \
         SRLI(s3, s2, 16);                    \
         BEQZ(s3, 4 + 2 * 4);                 \
@@ -856,7 +867,7 @@
 // Insert low 16bits in rs to low 16bits of rd
 #define INSHz(rd, rs, s1, s2, init_s1, zexth_rs) \
     INSH(rd, rs, s1, s2, init_s1, zexth_rs)      \
-    if (rex.is32bits) AND(rd, rd, xMASK);
+    if (rex.is32bits) ZEXTW2(rd, rd);
 
 // Rotate left (register)
 #define ROL(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, 0b0110011))
@@ -939,7 +950,7 @@
         }                              \
     }                                  \
     if (!rex.w)                        \
-        AND(rd, rd, xMASK);
+        ZEXTW2(rd, rd);
 
 
 // Zbc
diff --git a/src/dynarec/rv64/rv64_mapping.h b/src/dynarec/rv64/rv64_mapping.h
index bc40184d..93ecdf23 100644
--- a/src/dynarec/rv64/rv64_mapping.h
+++ b/src/dynarec/rv64/rv64_mapping.h
@@ -10,7 +10,7 @@ x1      ra     native ra    Return address                  N/A
 x2      sp     native sp    Stack pointer                   N/A                     Callee
 x3      gp     native gp    Global pointer                  N/A                     —
 x4      tp     native tp    Thread pointer                  N/A                     —
-x5      t0     xMask        Temporary                       Always 0xFFFFFFFF       Caller
+x5      t0     -            Temporary                       Unused                  Caller
 x6      t1     x1           Temporary                       Scratch                 Caller
 x7      t2     x2           Temporary                       Scratch                 Caller
 x8      s0/fp  RBP          Saved register/frame pointer    -                       Callee
@@ -22,7 +22,7 @@ x13     a3     RCX          Function argument               -
 x14     a4     R8           Function argument               -                       Caller
 x15     a5     R9           Function argument               -                       Caller
 x16     a6     RAX          Function argument               -                       Caller
-x17     a7     x7           Function argument               The Emu struct          Caller
+x17     a7     x7           Function argument               Scratch                 Caller
 x18     s2     R12          Saved register                  -                       Callee
 x19     s3     R13          Saved register                  -                       Callee
 x20     s4     R14          Saved register                  -                       Callee
@@ -30,7 +30,7 @@ x21     s5     R15          Saved register                  -
 x22     s6     RIP          Saved register                  -                       Callee
 x23     s7     FLAGS        Saved register                  -                       Callee
 x24     s8     RBX          Saved register                  -                       Callee
-x25     s9     xEmu         Saved register                  Scratch                 Callee
+x25     s9     xEmu         Saved register                  The Emu struct          Callee
 x26     s10    R10          Saved register                  -                       Callee
 x27     s11    R11          Saved register                  -                       Callee
 x28     t3     x3           Temporary                       Scratch                 Caller
@@ -74,7 +74,6 @@ x31     t6     x6           Temporary                       Scratch
 #define x6   31
 #define xEmu 25
 
-#define xMASK 5
 #define x7    17
 
 #define xRA 1
@@ -112,7 +111,6 @@ x31     t6     x6           Temporary                       Scratch
 #define RIP   x22
 #define Flags x23
 #define Emu   x25
-#define MASK  x5
 
 #endif
 
diff --git a/src/dynarec/rv64/rv64_next.S b/src/dynarec/rv64/rv64_next.S
index a631aac3..5d4daa0a 100644
--- a/src/dynarec/rv64/rv64_next.S
+++ b/src/dynarec/rv64/rv64_next.S
@@ -55,10 +55,6 @@ rv64_next:
 
     addi    sp,  sp,  (8 * 10)
 
-    // setup MASK
-    xori    MASK, zero, -1
-    srli    MASK, MASK, 32
-
     // return offset is jump address
     jr      t3
 
diff --git a/src/dynarec/rv64/rv64_prolog.S b/src/dynarec/rv64/rv64_prolog.S
index 7440b905..86f21ce2 100644
--- a/src/dynarec/rv64/rv64_prolog.S
+++ b/src/dynarec/rv64/rv64_prolog.S
@@ -78,9 +78,5 @@ rv64_prolog:
     sd      zero, -8(sp)
     addi    sp, sp, -16
 
-    // setup MASK
-    xori    MASK, zero, -1
-    srli    MASK, MASK, 32
-
     // jump to block
     jr      t6