about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c11
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c42
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c25
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66f0.c9
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_logic.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c128
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h4
10 files changed, 130 insertions, 97 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index fbabc49a..1232a973 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -547,7 +547,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x9F:
             INST_NAME("LAHF");
             READFLAGS(X_CF | X_PF | X_AF | X_ZF | X_SF);
-            ANDI(x1, xFlags, 0xFF);
+            ANDI(x1, xFlags, 0b11010111); // leave reserved bits out (we are using one as OF2)
             SLLI(x1, x1, 8);
             MOV64x(x2, 0xffffffffffff00ffLL);
             AND(xRAX, xRAX, x2);
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index 3cf4bcd9..790d00bb 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -198,6 +198,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     GETEDW(x4, x1, 0);
                     CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4);
                     WBACK;
+                    if(!wback && !rex.w) ZEROUP(ed);
                     break;
                 case 3:
                     INST_NAME("RCR Ed, Ib");
@@ -209,6 +210,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     GETEDW(x4, x1, 0);
                     CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4);
                     WBACK;
+                    if(!wback && !rex.w) ZEROUP(ed);
                     break;
                 case 4:
                 case 6:
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index eb80de3d..7b3c26ae 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -2068,13 +2068,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         ORI(xFlags, xFlags, 1 << F_CF);
                         XOR(ed, ed, x6);
                     }
+                    MARK;
                     if (wback) {
                         SDxw(ed, wback, fixedaddress);
                         SMWRITE();
                     } else if(!rex.w) {
                         ZEROUP(ed);
                     }
-                    MARK;
                     break;
                 case 7:
                     INST_NAME("BTC Ed, Ib");
@@ -2219,11 +2219,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             nextop = F8;
             GETEB(x1, 0);
             GETGB(x2);
-            MV(x9, ed);
+            if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
+                MV(x9, ed);
             emit_add8(dyn, ninst, ed, gd, x4, x5);
-            MV(gd, x9);
+            if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
+                MV(gd, x9);
             EBBACK(x5, 0);
-            GBBACK(x5);
+            if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
+                GBBACK(x5);
             break;
         case 0xC1:
             INST_NAME("XADD Ed, Gd");
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index b68ae692..18474468 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -154,9 +154,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("ADC AX, Iw");
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
-            i16 = F16;
+            u64 = F16;
             ZEXTH(x1, xRAX);
-            MOV32w(x2, i16);
+            MOV64x(x2, u64);
             emit_adc16(dyn, ninst, x1, x2, x3, x4, x5);
             INSH(xRAX, x1, x3, x4, 1, 0);
             break;
@@ -185,8 +185,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             ZEXTH(x1, xRAX);
-            i16 = F16;
-            MOV64xw(x2, i16);
+            u64 = F16;
+            MOV64x(x2, u64);
             emit_sbb16(dyn, ninst, x1, x2, x3, x4, x5);
             INSH(xRAX, x1, x3, x4, 1, 0);
             break;
@@ -455,8 +455,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     if(opcode==0x81) {INST_NAME("ADD Ew, Iw");} else {INST_NAME("ADD Ew, Ib");}
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEW(x1, (opcode==0x81)?2:1);
-                    if(opcode==0x81) i16 = F16S; else i16 = F8S;
-                    MOV64x(x5, i16);
+                    if(opcode==0x81) u64 = F16; else u64 = F8;
+                    MOV64x(x5, u64);
                     emit_add16(dyn, ninst, ed, x5, x2, x4, x6);
                     EWBACK;
                     break;
@@ -464,8 +464,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     if(opcode==0x81) {INST_NAME("OR Ew, Iw");} else {INST_NAME("OR Ew, Ib");}
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEW(x1, (opcode==0x81)?2:1);
-                    if(opcode==0x81) i16 = F16S; else i16 = F8S;
-                    MOV64x(x5, i16);
+                    if(opcode==0x81) u64 = F16; else u64 = F8;
+                    MOV64x(x5, u64);
                     emit_or16(dyn, ninst, x1, x5, x2, x4);
                     EWBACK;
                     break;
@@ -474,8 +474,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     READFLAGS(X_CF);
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEW(x1, (opcode==0x81)?2:1);
-                    if(opcode==0x81) i16 = F16S; else i16 = F8S;
-                    MOV64x(x5, i16);
+                    if(opcode==0x81) u64 = F16; else u64 = F8;
+                    MOV64x(x5, u64);
                     emit_adc16(dyn, ninst, x1, x5, x2, x4, x6);
                     EWBACK;
                     break;
@@ -484,8 +484,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     READFLAGS(X_CF);
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEW(x1, (opcode==0x81)?2:1);
-                    if(opcode==0x81) i16 = F16S; else i16 = F8S;
-                    MOV64x(x5, i16);
+                    if(opcode==0x81) u64 = F16; else u64 = F8;
+                    MOV64x(x5, u64);
                     emit_sbb16(dyn, ninst, x1, x5, x2, x4, x6);
                     EWBACK;
                     break;
@@ -493,8 +493,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     if(opcode==0x81) {INST_NAME("AND Ew, Iw");} else {INST_NAME("AND Ew, Ib");}
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEW(x1, (opcode==0x81)?2:1);
-                    if(opcode==0x81) i16 = F16S; else i16 = F8S;
-                    MOV64x(x5, i16);
+                    if(opcode==0x81) u64 = F16; else u64 = F8;
+                    MOV64x(x5, u64);
                     emit_and16(dyn, ninst, x1, x5, x2, x4);
                     EWBACK;
                     break;
@@ -502,8 +502,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     if(opcode==0x81) {INST_NAME("SUB Ew, Iw");} else {INST_NAME("SUB Ew, Ib");}
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEW(x1, (opcode==0x81)?2:1);
-                    if(opcode==0x81) i16 = F16S; else i16 = F8S;
-                    MOV32w(x5, i16);
+                    if(opcode==0x81) u64 = F16; else u64 = F8;
+                    MOV64x(x5, u64);
                     emit_sub16(dyn, ninst, x1, x5, x2, x4, x6);
                     EWBACK;
                     break;
@@ -511,8 +511,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     if(opcode==0x81) {INST_NAME("XOR Ew, Iw");} else {INST_NAME("XOR Ew, Ib");}
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEW(x1, (opcode==0x81)?2:1);
-                    if(opcode==0x81) i16 = F16S; else i16 = F8S;
-                    MOV32w(x5, i16);
+                    if(opcode==0x81) u64 = F16; else u64 = F8;
+                    MOV64x(x5, u64);
                     emit_xor16(dyn, ninst, x1, x5, x2, x4, x6);
                     EWBACK;
                     break;
@@ -520,9 +520,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     if(opcode==0x81) {INST_NAME("CMP Ew, Iw");} else {INST_NAME("CMP Ew, Ib");}
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEW(x1, (opcode==0x81)?2:1);
-                    if(opcode==0x81) i16 = F16S; else i16 = F8S;
-                    if(i16) {
-                        MOV64x(x2, i16);
+                    if(opcode==0x81) u64 = F16; else u64 = F8;
+                    if(u64) {
+                        MOV64x(x2, u64);
                         emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6);
                     } else
                         emit_cmp16_0(dyn, ninst, x1, x3, x4);
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index a3c57441..ae8a7522 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -2302,7 +2302,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEW(x1, 0);
             GETGW(x2);
             u8 = F8;
-            emit_shrd16c(dyn, ninst, rex, ed, gd, u8, x3, x4);
+            emit_shrd16c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5);
             EWBACK;
             break;
         case 0xAF:
@@ -2420,8 +2420,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         ORI(xFlags, xFlags, 1 << F_CF);
                         XOR(ed, ed, x6);
                     }
-                    EWBACK;
                     MARK;
+                    EWBACK;
                     break;
                 case 7:
                     INST_NAME("BTC Ew, Ib");
@@ -2433,7 +2433,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     BEXTI(x3, ed, u8); // F_CF is 1
                     ANDI(xFlags, xFlags, ~1);
                     OR(xFlags, xFlags, x3);
-                    if (u8 <= 0x10) {
+                    if (u8 <= 10) {
                         XORI(ed, ed, (1LL << u8));
                     } else {
                         MOV64xw(x3, (1LL << u8));
@@ -2547,11 +2547,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             GETGW(x1);
             GETEW(x2, 0);
-            MV(x9, ed);
+            if (!(MODREG && wback == xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3)))
+                MV(x9, ed);
             emit_add16(dyn, ninst, ed, gd, x4, x5, x6);
-            MV(gd, x9);
+            if (!(MODREG && wback == xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3)))
+                MV(gd, x9);
             EWBACK;
-            GWBACK;
+            if (!(MODREG && wback == xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3)))
+                GWBACK;
             break;
         case 0xC2:
             INST_NAME("CMPPD Gx, Ex, Ib");
@@ -2656,13 +2659,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 REV8xw(gd, gd, x1, x2, x3, x4);
             } else {
                 ANDI(x1, gd, 0xff);
-                SLLI(x1, x1, 8);
                 SRLI(x2, gd, 8);
+                SLLI(x1, x1, 8);
                 ANDI(x2, x2, 0xff);
-                SRLI(x3, gd, 16);
-                SLLI(x4, x3, 16);
-                AND(x1, x4, x1);
-                AND(gd, x1, x2);
+                SRLI(gd, gd, 16);
+                OR(x1, x1, x2);
+                SLLI(gd, gd, 16);
+                OR(gd, gd, x1);
             }
             break;
         case 0xD1:
diff --git a/src/dynarec/rv64/dynarec_rv64_66f0.c b/src/dynarec/rv64/dynarec_rv64_66f0.c
index 84164227..6e51ebc9 100644
--- a/src/dynarec/rv64/dynarec_rv64_66f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_66f0.c
@@ -33,6 +33,7 @@ uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
     uint8_t wback, wb1, wb2, gb1, gb2;
     int32_t i32;
     int64_t i64, j64;
+    uint64_t u64;
     int64_t fixedaddress;
     int unscaled;
     MAYUSE(gb1);
@@ -127,9 +128,9 @@ uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     }
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     if(MODREG) {
-                        if(opcode==0x81) i32 = F16S; else i32 = F8S;
+                        if(opcode==0x81) u64 = F16; else u64 = F8;
                         ed = xRAX+(nextop&7)+(rex.b<<3);
-                        MOV32w(x5, i32);
+                        MOV64x(x5, u64);
                         ZEXTH(x6, ed);
                         emit_add16(dyn, ninst, x6, x5, x3, x4, x2);
                         SRLI(ed, ed, 16);
@@ -137,8 +138,8 @@ uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         OR(ed, ed, x6);
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1);
-                        if(opcode==0x81) i32 = F16S; else i32 = F8S;
-                        MOV32w(x5, i32);
+                        if(opcode==0x81) u64 = F16; else u64 = F8;
+                        MOV64x(x5, u64);
 
                         ANDI(x3, wback, 0b10);
                         BNEZ_MARK(x3);
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_logic.c b/src/dynarec/rv64/dynarec_rv64_emit_logic.c
index 1352868b..e954b32e 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_logic.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_logic.c
@@ -422,7 +422,7 @@ void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         MOV64xw(s3, c);
         AND(s1, s1, s3); // res = s1 & s2
     }
-    if (!rex.w && c<0 && c>=-2048) ZEROUP(s1);
+    if (!rex.w) ZEROUP(s1);
 
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index 8bda1867..62fc485b 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -710,14 +710,15 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 // emit SHL32 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
 void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5)
 {
-    if (!c) return;
+    if (!c) {
+        if (!rex.w) ZEROUP(s1);
+        return;
+    }
 
     CLEAR_FLAGS();
     IFX(X_PEND) {
-        if (c) {
-            MOV64x(s3, c);
-            SDxw(s3, xEmu, offsetof(x64emu_t, op2));
-        } else SDxw(xZR, xEmu, offsetof(x64emu_t, op2));
+        MOV64x(s3, c);
+        SDxw(s3, xEmu, offsetof(x64emu_t, op2));
         SDxw(s1, xEmu, offsetof(x64emu_t, op1));
         SET_DF(s4, rex.w?d_shl64:d_shl32);
     } else IFX(X_ALL) {
@@ -725,12 +726,10 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     }
 
     IFX(X_CF|X_OF) {
-        if (c > 0) {
-            SRLI(s3, s1, (rex.w?64:32)-c);
-            ANDI(s5, s3, 1); // LSB == F_CF
-            IFX(X_CF) {
-                OR(xFlags, xFlags, s5);
-            }
+        SRLI(s3, s1, (rex.w?64:32)-c);
+        ANDI(s5, s3, 1); // LSB == F_CF
+        IFX(X_CF) {
+            OR(xFlags, xFlags, s5);
         }
     }
 
@@ -823,7 +822,10 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 // emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
 {
-    if (!c) return;
+    if (!c) {
+        if (!rex.w) ZEROUP(s1);
+        return;
+    }
 
     CLEAR_FLAGS();
 
@@ -837,12 +839,6 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     } else IFX(X_ALL) {
         SET_DFNONE();
     }
-    if(!c) {
-        IFX(X_PEND) {
-            SDxw(s1, xEmu, offsetof(x64emu_t, res));
-        }
-        return;
-    }
     IFX(X_CF) {
         if (c > 1) {
             SRAI(s3, s1, c-1);
@@ -891,7 +887,10 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
 // emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
 {
-    if (!c) return;
+    if (!c) {
+        if (!rex.w) ZEROUP(s1);
+        return;
+    }
 
     CLEAR_FLAGS();
 
@@ -905,12 +904,6 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     } else IFX(X_ALL) {
         SET_DFNONE();
     }
-    if(!c) {
-        IFX(X_PEND) {
-            SDxw(s1, xEmu, offsetof(x64emu_t, res));
-        }
-        return;
-    }
     IFX(X_CF) {
         if (c > 1) {
             SRAI(s3, s1, c-1);
@@ -970,7 +963,12 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 
     if(rv64_zbb) {
-        ROLxw(s1, s1, s4);
+        if (rex.w) {
+            ROL(s1, s1, s4);
+        } else {
+            ROLW(s1, s1, s4);
+            ZEROUP(s1);
+        }
     } else {
         SLLxw(s3, s1, s4);
         NEG(s4, s4);
@@ -1021,7 +1019,12 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 
     if(rv64_zbb) {
-        RORxw(s1, s1, s4);
+        if (rex.w) {
+            ROR(s1, s1, s4);
+        } else {
+            RORW(s1, s1, s4);
+            ZEROUP(s1);
+        }
     } else {
         SRLxw(s3, s1, s4);
         NEG(s4, s4);
@@ -1055,7 +1058,10 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 // emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
 {
-    if (!c) return;
+    if (!c) {
+        if (!rex.w) ZEROUP(s1);
+        return;
+    }
 
     IFX(X_CF | X_OF) {
         ANDI(xFlags, xFlags, ~(1UL<<F_CF | 1UL<<F_OF2));
@@ -1068,12 +1074,6 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     } else IFX(X_ALL) {
         SET_DFNONE();
     }
-    if(!c) {
-        IFX(X_PEND) {
-            SDxw(s1, xEmu, offsetof(x64emu_t, res));
-        }
-        return;
-    }
     if(rv64_zbb) {
         RORIxw(s1, s1, (rex.w?64:32)-c);
     } else if (rv64_xtheadbb) {
@@ -1107,7 +1107,10 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
 // emit ROR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
 {
-    if (!c) return;
+    if (!c) {
+        if (!rex.w) ZEROUP(s1);
+        return;
+    }
 
     IFX(X_CF | X_OF) {
         ANDI(xFlags, xFlags, ~(1UL<<F_CF | 1UL<<F_OF2));
@@ -1120,12 +1123,6 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     } else IFX(X_ALL) {
         SET_DFNONE();
     }
-    if(!c) {
-        IFX(X_PEND) {
-            SDxw(s1, xEmu, offsetof(x64emu_t, res));
-        }
-        return;
-    }
     if(rv64_zbb) {
         RORIxw(s1, s1, c);
     } else if (rv64_xtheadbb) {
@@ -1175,6 +1172,7 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         SET_DFNONE();
     }
     if(!c) {
+        if (!rex.w) ZEROUP(s1);
         IFX(X_PEND) {
             SDxw(s1, xEmu, offsetof(x64emu_t, res));
         }
@@ -1227,9 +1225,9 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     }
 }
 
-void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
+void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5)
 {
-    c&=15;
+    c&=0x1f;
     CLEAR_FLAGS();
 
     IFX(X_PEND) {
@@ -1248,6 +1246,11 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         }
         return;
     }
+
+    // create concat first
+    SLLI(s5, s2, 16);
+    OR(s1, s1, s5);
+
     IFX(X_CF) {
         if (c > 1) {
             SRAI(s3, s1, c-1);
@@ -1263,9 +1266,13 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         if (c == 1) SRLI(s4, s1, 15);
     }
 
-    SRLIxw(s3, s1, c);
-    SLLIxw(s1, s2, 16-c);
-    OR(s1, s1, s3);
+    if (rv64_zbb) {
+        RORIW(s1, s1, c);
+    } else {
+        SRLI(s5, s1, c);
+        SLLI(s1, s1, 32 - c);
+        OR(s1, s1, s5);
+    }
     ZEXTH(s1, s1);
 
     IFX(X_SF) {
@@ -1311,6 +1318,9 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     }
 
     if(!c) {
+        if (!rex.w) {
+            ZEROUP(s1);
+        }
         IFX(X_PEND) {
             SDxw(s1, xEmu, offsetof(x64emu_t, res));
         }
@@ -1429,6 +1439,7 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
     } else IFX(X_ALL) {
         SET_DFNONE();
     }
+
     MOV32w(s3, (rex.w?64:32));
     SUB(s3, s3, s5);
     IFX(X_CF) {
@@ -1475,7 +1486,7 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
 
 void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5)
 {
-    c&=15;
+    c&=0x1f;
     CLEAR_FLAGS();
     IFX(X_PEND) {
         if (c) {
@@ -1494,21 +1505,32 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         }
         return;
     }
+
+    // create concat first
+    SLLI(s5, s2, 16);
+    OR(s1, s1, s5);
+
     IFX(X_CF) {
-        if (c > 0) {
+        if (c < 16) {
             SRLI(s3, s1, 16-c);
-            ANDI(s5, s3, 1); // LSB == F_CF
-            OR(xFlags, xFlags, s5);
+        } else {
+            SRLI(s3, s2, 32-c);
         }
+        ANDI(s5, s3, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s5);
     }
     IFX(X_OF) {
         // Store sign for later use.
         if (c == 1) SRLI(s5, s1, 15);
     }
 
-    SLLIxw(s3, s1, c);
-    SRLIxw(s1, s2, 16-c);
-    OR(s1, s1, s3);
+    if (rv64_zbb) {
+        RORIW(s1, s1, 32 - c);
+    } else {
+        SLLI(s3, s1, c);
+        SRLI(s1, s1, 32 - c);
+        OR(s1, s1, s3);
+    }
     ZEXTH(s1, s1);
 
     IFX(X_SF) {
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 9c36d4fe..93bd0566 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1341,7 +1341,7 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
 void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6);
 void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6);
-void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
+void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
 void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
 
 void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index aeda741c..9d3240fc 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -158,9 +158,11 @@ void CreateJmpNext(void* addr, void* next);
 #define GO_TRACE(A, B, s0)  \
     GETIP(addr);            \
     MV(A1, xRIP);           \
+    FLAGS_ADJUST_TO11(xFlags, xFlags, s0); \
     STORE_XEMU_CALL(s0);    \
     MOV64x(A2, B);          \
     CALL(A, -1);            \
-    LOAD_XEMU_CALL()
+    LOAD_XEMU_CALL();       \
+    FLAGS_ADJUST_FROM11(xFlags, xFlags, s0);
 
 #endif //__DYNAREC_RV64_PRIVATE_H_