about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2024-04-07 16:39:58 +0800
committerGitHub <noreply@github.com>2024-04-07 10:39:58 +0200
commit1ca68a58fde0ac8b7389379f0862fb69a86ba2e4 (patch)
tree5b0674adb4e623f66cc6253cfb7bc46cfd8d79d3 /src
parentb96139274fcb83be3e9085a1a06084364c938bc5 (diff)
downloadbox64-1ca68a58fde0ac8b7389379f0862fb69a86ba2e4.tar.gz
box64-1ca68a58fde0ac8b7389379f0862fb69a86ba2e4.zip
[RV64_DYNAREC] Fixed various bugs in shift instructions (#1426)
* [RV64_DYNAREC] Optimized 8-bit constant shifts

* [RV64_DYNAREC] Fixed shl8c when c > 8

* [RV64_DYNAREC] Optimized 16-bit constant shifts

* [RV64_DYNAREC] Optimized 8-bit CL shifts

* [RV64_DYNAREC] Fixed SF generation of 32-bit SHL Ed, CL

* [RV64_DYNAREC] Optimized 16-bit CL shifts

* [RV64_DYNAREC] Fixed typo in 8-bit CL SHL and SHR

* [RV64_DYNAREC] Fixed the wrong mask in 8-bit SHL Eb, CL

* [RV64_DYNAREC] Fixed typo in SAR Ew, CL
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c143
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c109
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c634
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h24
4 files changed, 764 insertions, 146 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index a3a6680e..df22e054 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -99,60 +99,44 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 case 4:
                 case 6:
                     INST_NAME("SHL Eb, Ib");
-                    GETEB(x1, 1);
-                    u8 = (F8)&0x1f;
-                    if(u8) {
-                        SETFLAGS(X_ALL, SF_PENDING);
-                        UFLAG_IF{
-                            MOV32w(x4, u8); UFLAG_OP2(x4);
-                        };
-                        UFLAG_OP1(ed);
-                        SLLIW(ed, ed, u8);
-                        EBBACK(x5, 1);
-                        UFLAG_RES(ed);
-                        UFLAG_DF(x3, d_shl8);
+                    u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f;
+                    if (u8) {
+                        SETFLAGS(X_ALL, SF_SET_PENDING);
+                        GETEB(x1, 1);
+                        u8 = (F8) & 0x1f;
+                        emit_shl8c(dyn, ninst, ed, u8, x4, x5, x6);
+                        EBBACK(x5, 0);
                     } else {
-                        NOP();
+                        FAKEED;
+                        F8;
                     }
                     break;
                 case 5:
                     INST_NAME("SHR Eb, Ib");
-                    GETEB(x1, 1);
-                    u8 = (F8)&0x1f;
-                    if(u8) {
-                        SETFLAGS(X_ALL, SF_PENDING);
-                        UFLAG_IF{
-                            MOV32w(x4, u8); UFLAG_OP2(x4);
-                        };
-                        UFLAG_OP1(ed);
-                        if(u8) {
-                            SRLIW(ed, ed, u8);
-                            EBBACK(x5, 1);
-                        }
-                        UFLAG_RES(ed);
-                        UFLAG_DF(x3, d_shr8);
+                    u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f;
+                    if (u8) {
+                        SETFLAGS(X_ALL, SF_SET_PENDING);
+                        GETEB(x1, 1);
+                        u8 = (F8) & 0x1f;
+                        emit_shr8c(dyn, ninst, ed, u8, x4, x5, x6);
+                        EBBACK(x5, 0);
                     } else {
-                        NOP();
+                        FAKEED;
+                        F8;
                     }
                     break;
                 case 7:
                     INST_NAME("SAR Eb, Ib");
-                    GETSEB(x1, 1);
-                    u8 = (F8)&0x1f;
-                    if(u8) {
-                        SETFLAGS(X_ALL, SF_PENDING);
-                        UFLAG_IF{
-                            MOV32w(x4, u8); UFLAG_OP2(x4);
-                        };
-                        UFLAG_OP1(ed);
-                        if(u8) {
-                            SRAIW(ed, ed, u8);
-                            EBBACK(x5, 1);
-                        }
-                        UFLAG_RES(ed);
-                        UFLAG_DF(x3, d_sar8);
+                    u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f;
+                    if (u8) {
+                        SETFLAGS(X_ALL, SF_SET_PENDING);
+                        GETSEB(x1, 1);
+                        u8 = (F8) & 0x1f;
+                        emit_sar8c(dyn, ninst, ed, u8, x4, x5, x6);
+                        EBBACK(x5, 0);
                     } else {
-                        NOP();
+                        FAKEED;
+                        F8;
                     }
                     break;
                 default:
@@ -506,15 +490,12 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         INST_NAME("ROL Eb, CL");
                         ANDI(x2, xRCX, 7);
                     }
-                    SETFLAGS(X_OF|X_CF, SF_PENDING);
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_OF|X_CF, SF_SET);
                     GETEB(x1, 0);
-                    UFLAG_OP12(ed, x2);
-                    SLL(x3, ed, x2);
-                    SRLI(x4, x3, 8);
-                    OR(ed, x3, x4);
-                    EBBACK(x5, 1);
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_rol8);
+                    CALL_(rol8, ed, x3);
+                    EBBACK(x5, 0);
                     break;
                 case 1:
                     if(opcode==0xD0) {
@@ -524,16 +505,12 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         INST_NAME("ROR Eb, CL");
                         ANDI(x2, xRCX, 7);
                     }
-                    SETFLAGS(X_OF|X_CF, SF_PENDING);
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_OF|X_CF, SF_SET);
                     GETEB(x1, 0);
-                    UFLAG_OP12(ed, x2);
-                    SRL(x3, ed, x2);
-                    SLLI(x4, ed, 8);
-                    SRL(x4, x4, x2);
-                    OR(ed, x3, x4);
-                    EBBACK(x5, 1);
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_ror8);
+                    CALL_(ror8, ed, x3);
+                    EBBACK(x5, 0);
                     break;
                 case 2:
                     if(opcode==0xD0) {
@@ -572,47 +549,47 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         MOV32w(x2, 1);
                     } else {
                         INST_NAME("SHL Eb, CL");
-                        ANDI(x2, xRCX, 7);
+                        ANDI(x2, xRCX, 0x1f);
+                        BEQ_NEXT(x2, xZR);
                     }
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
+                    if(box64_dynarec_safeflags>1)
+                        MAYSETFLAGS();
                     GETEB(x1, 0);
-                    UFLAG_OP12(ed, x2)
-                    SLL(ed, ed, x2);
-                    EBBACK(x5, 1);
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_shl8);
+                    emit_shl8(dyn, ninst, x1, x2, x5, x4, x6);
+                    EBBACK(x5, 0);
                     break;
                 case 5:
                     if(opcode==0xD0) {
                         INST_NAME("SHR Eb, 1");
-                        MOV32w(x4, 1);
+                        MOV32w(x2, 1);
                     } else {
                         INST_NAME("SHR Eb, CL");
-                        ANDI(x4, xRCX, 0x1F);
+                        ANDI(x2, xRCX, 0x1F);
+                        BEQ_NEXT(x2, xZR);
                     }
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
+                    if(box64_dynarec_safeflags>1)
+                        MAYSETFLAGS();
                     GETEB(x1, 0);
-                    UFLAG_OP12(ed, x4);
-                    SRLW(ed, ed, x4);
-                    EBBACK(x5, 1);
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_shr8);
+                    emit_shr8(dyn, ninst, x1, x2, x5, x4, x6);
+                    EBBACK(x5, 0);
                     break;
                 case 7:
                     if(opcode==0xD0) {
                         INST_NAME("SAR Eb, 1");
-                        MOV32w(x4, 1);
+                        MOV32w(x2, 1);
                     } else {
                         INST_NAME("SAR Eb, CL");
-                        ANDI(x4, xRCX, 0x1f);
+                        ANDI(x2, xRCX, 0x1f);
+                        BEQ_NEXT(x2, xZR);
                     }
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
+                    if(box64_dynarec_safeflags>1)
+                        MAYSETFLAGS();
                     GETSEB(x1, 0);
-                    UFLAG_OP12(ed, x4)
-                    SRA(ed, ed, x4);
-                    EBBACK(x3, 1);
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_sar8);
+                    emit_sar8(dyn, ninst, x1, x2, x5, x4, x6);
+                    EBBACK(x5, 0);
                     break;
                 default:
                     DEFAULT;
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index a17aae3b..edce9940 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -985,50 +985,42 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 case 4:
                 case 6:
                     INST_NAME("SHL Ew, Ib");
-                    UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
-                    SETFLAGS(X_ALL, SF_PENDING);
-                    GETEW(x1, 1);
-                    u8 = F8;
-                    UFLAG_IF {MOV32w(x2, (u8&15));}
-                    UFLAG_OP12(ed, x2)
-                    if(MODREG) {
-                        SLLI(ed, ed, 48+(u8&15));
-                        SRLI(ed, ed, 48);
+                    if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {
+                        SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
+                        GETEW(x1, 0);
+                        u8 = (F8)&0x1f;
+                        emit_shl16c(dyn, ninst, x1, u8, x5, x4, x6);
+                        EWBACK;
                     } else {
-                        SLLI(ed, ed, u8&15);
+                        FAKEED;
+                        F8;
                     }
-                    EWBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_shl16);
                     break;
                 case 5:
                     INST_NAME("SHR Ew, Ib");
-                    UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
-                    SETFLAGS(X_ALL, SF_PENDING);
-                    GETEW(x1, 1);
-                    u8 = F8;
-                    UFLAG_IF {MOV32w(x2, (u8&15));}
-                    UFLAG_OP12(ed, x2)
-                    SRLI(ed, ed, u8&15);
-                    EWBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_shr16);
+                    if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {
+                        SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
+                        GETEW(x1, 0);
+                        u8 = (F8)&0x1f;
+                        emit_shr16c(dyn, ninst, x1, u8, x5, x4, x6);
+                        EWBACK;
+                    } else {
+                        FAKEED;
+                        F8;
+                    }
                     break;
                 case 7:
                     INST_NAME("SAR Ew, Ib");
-                    SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
-                    GETSEW(x1, 1);
-                    u8 = F8;
-                    UFLAG_IF {MOV32w(x2, (u8&15));}
-                    UFLAG_OP12(ed, x2)
-                    SRAI(ed, ed, u8&15);
-                    if(MODREG) {
-                        ZEXTH(ed, ed);
+                    if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {
+                        SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
+                        GETSEW(x1, 0);
+                        u8 = (F8)&0x1f;
+                        emit_sar16c(dyn, ninst, x1, u8, x5, x4, x6);
+                        EWBACK;
+                    } else {
+                        FAKEED;
+                        F8;
                     }
-                    EWBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_sar16);
                     break;
             }
             break;
@@ -1115,56 +1107,51 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 case 5:
                     if(opcode==0xD1) {
                         INST_NAME("SHR Ew, 1");
-                        MOV32w(x4, 1);
+                        MOV32w(x2, 1);
                     } else {
                         INST_NAME("SHR Ew, CL");
-                        ANDI(x4, xRCX, 15);
+                        ANDI(x2, xRCX, 0x1f);
+                        BEQ_NEXT(x2, xZR);
                     }
-                    UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
+                    if(box64_dynarec_safeflags>1)
+                        MAYSETFLAGS();
                     GETEW(x1, 0);
-                    UFLAG_OP12(ed, x4)
-                    SRL(ed, ed, x4);
+                    emit_shr16(dyn, ninst, x1, x2, x5, x4, x6);
                     EWBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_shr16);
                     break;
                 case 4:
                 case 6:
                     if(opcode==0xD1) {
                         INST_NAME("SHL Ew, 1");
-                        MOV32w(x4, 1);
+                        MOV32w(x2, 1);
                     } else {
                         INST_NAME("SHL Ew, CL");
-                        ANDI(x4, xRCX, 15);
+                        ANDI(x2, xRCX, 0x1f);
+                        BEQ_NEXT(x2, xZR);
                     }
-                    UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
+                    if(box64_dynarec_safeflags>1)
+                        MAYSETFLAGS();
                     GETEW(x1, 0);
-                    UFLAG_OP12(ed, x4)
-                    SLL(ed, ed, x4);
-                    ZEXTH(ed, ed);
+                    emit_shl16(dyn, ninst, x1, x2, x5, x4, x6);
                     EWBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_shl16);
                     break;
                 case 7:
                     if(opcode==0xD1) {
                         INST_NAME("SAR Ew, 1");
-                        MOV32w(x4, 1);
+                        MOV32w(x2, 1);
                     } else {
                         INST_NAME("SAR Ew, CL");
-                        ANDI(x4, xRCX, 15);
+                        ANDI(x2, xRCX, 0x1f);
+                        BEQ_NEXT(x2, xZR);
                     }
-                    UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
+                    if(box64_dynarec_safeflags>1)
+                        MAYSETFLAGS();
                     GETSEW(x1, 0);
-                    UFLAG_OP12(ed, x4);
-                    SRA(ed, ed, x4);
-                    ZEXTH(ed, ed);
+                    emit_sar16(dyn, ninst, x1, x2, x5, x4, x6);
                     EWBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_sar16);
                     break;
                 default:
                     DEFAULT;
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index 69e6a08f..a4bd4e19 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -21,6 +21,636 @@
 #include "dynarec_rv64_functions.h"
 #include "dynarec_rv64_helper.h"
 
+// emit SHL8 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
+void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
+{
+    if (!c) return;
+    // c != 0
+
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        MOV64x(s3, c);
+        SB(s3, xEmu, offsetof(x64emu_t, op2));
+        SB(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shl8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (c < 8) {
+        IFX(X_CF|X_OF) {
+            SRLI(s3, s1, 8-c);
+            ANDI(s5, s3, 1); // LSB == F_CF
+            IFX(X_CF) {
+                OR(xFlags, xFlags, s5);
+            }
+        }
+
+        SLLI(s1, s1, c+56);
+        IFX(X_SF) {
+            BGE(s1, xZR, 8);
+            ORI(xFlags, xFlags, 1 << F_SF);
+        }
+        SRLI(s1, s1, 56);
+
+        IFX(X_PEND) {
+            SB(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        IFX(X_ZF) {
+            BNEZ(s1, 8);
+            ORI(xFlags, xFlags, 1 << F_ZF);
+        }
+        IFX(X_OF) {
+            // OF flag is affected only on 1-bit shifts
+            if (c == 1) {
+                SRLI(s3, s1, 7);
+                XOR(s3, s3, s5);
+                SLLI(s3, s3, F_OF2);
+                OR(xFlags, xFlags, s3);
+            }
+        }
+        IFX(X_PF) {
+            emit_pf(dyn, ninst, s1, s3, s4);
+        }
+    } else {
+        IFX(X_CF) {
+            if (c == 8) {
+                ANDI(s3, s1, 1);
+                OR(xFlags, xFlags, s3); // F_CF == 0
+            }
+        }
+        MV(s1, xZR);
+        // OF nop
+        // SF nop
+        // AF nop
+        IFX(X_PF | X_ZF) {
+            IFX(X_ZF) {
+                ORI(xFlags, xFlags, 1 << F_ZF);
+            }
+            IFX(X_PF) {
+                ORI(xFlags, xFlags, 1 << F_PF);
+            }
+        }
+    }
+}
+
+// emit SHR8 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
+void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
+{
+    if (!c) return;
+    // c != 0
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        MOV64x(s3, c);
+        SB(s3, xEmu, offsetof(x64emu_t, op2));
+        SB(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shr8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_CF) {
+        if (c > 1) {
+            SRAI(s3, s1, c-1);
+            ANDI(s3, s3, 1); // LSB == F_CF
+        } else {
+            // no need to shift
+            ANDI(s3, s1, 1); // LSB == F_CF
+        }
+        OR(xFlags, xFlags, s3);
+    }
+    IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        if (c == 1) {
+            SRLI(s3, s1, 7);
+            SLLI(s3, s3, F_OF2);
+            OR(xFlags, xFlags, s3);
+        }
+    }
+
+    SRLI(s1, s1, c);
+    ANDI(s1, s1, 0xff);
+
+    // SF should be unset
+    IFX(X_PEND) {
+        SB(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SAR8 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
+void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
+{
+    if (!c) return;
+    // c != 0
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        MOV64x(s3, c);
+        SB(s3, xEmu, offsetof(x64emu_t, op2));
+        SB(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_sar8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_CF) {
+        if (c > 1) {
+            SRAI(s3, s1, c-1);
+            ANDI(s3, s3, 1); // LSB == F_CF
+        } else {
+            // no need to shift
+            ANDI(s3, s1, 1); // LSB == F_CF
+        }
+        OR(xFlags, xFlags, s3);
+    }
+    // For the SAR instruction, the OF flag is cleared for all 1-bit shifts.
+    // OF nop
+    IFX(X_SF) {
+        // SF is the same as the original operand
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+
+    SRLI(s1, s1, c);
+    ANDI(s1, s1, 0xff);
+
+    IFX(X_PEND) {
+        SB(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SHL8 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
+void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    // s2 is not 0 here and is 1..1f/3f
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        SB(s1, xEmu, offsetof(x64emu_t, op1));
+        SB(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_shl8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+    SLL(s1, s1, s2);
+
+    IFX(X_CF | X_OF) {
+        SRLI(s5, s1, 8);
+        ANDI(s5, s5, 1); // LSB == F_CF
+        IFX(X_CF) {
+            OR(xFlags, xFlags, s5);
+        }
+    }
+
+    SLLI(s1, s1, 56);
+    IFX(X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    SRLI(s1, s1, 56);
+
+    IFX(X_PEND) {
+        SB(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        ADDI(s3, s2, -1);
+        BNEZ(s3, 4 + 4 * 4);
+        SRLI(s3, s1, 7);
+        XOR(s3, s3, s5);
+        SLLI(s3, s3, F_OF2);
+        OR(xFlags, xFlags, s3);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
+void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    int64_t j64;
+
+    CLEAR_FLAGS();
+
+    IFX(X_PEND) {
+        SB(s2, xEmu, offsetof(x64emu_t, op2));
+        SB(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shr8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+    IFX(X_CF) {
+        SUBI(s3, s2, 1);
+        SRA(s3, s1, s3);
+        ANDI(s3, s3, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s3);
+    }
+    IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        ADDI(s3, xZR, 1);
+        BEQ(s2, s3, 4+3*4);
+        SRLI(s3, s1, 7);
+        SLLI(s3, s3, F_OF2);
+        OR(xFlags, xFlags, s3);
+    }
+
+    SRL(s1, s1, s2);
+    ANDI(s1, s1, 0xff);
+
+    // SF should be unset
+    IFX(X_PEND) {
+        SB(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SAR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3, s4 and s5 as scratch
+void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    int64_t j64;
+
+    CLEAR_FLAGS();
+
+    IFX(X_PEND) {
+        SB(s2, xEmu, offsetof(x64emu_t, op2));
+        SB(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_sar8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_CF) {
+        SUBI(s3, s2, 1);
+        SRA(s3, s1, s3);
+        ANDI(s3, s3, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s3);
+    }
+    // For the SAR instruction, the OF flag is cleared for all 1-bit shifts.
+    // OF nop
+    IFX(X_SF) {
+        // SF is the same as the original operand
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+
+    SRL(s1, s1, s2);
+    ANDI(s1, s1, 0xff);
+
+    IFX(X_PEND) {
+        SB(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SHL16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
+void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
+{
+    if (!c) return;
+    // c != 0
+
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        MOV64x(s3, c);
+        SH(s3, xEmu, offsetof(x64emu_t, op2));
+        SH(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shl16);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (c < 16) {
+        IFX(X_CF|X_OF) {
+            SRLI(s3, s1, 16-c);
+            ANDI(s5, s3, 1); // LSB == F_CF
+            IFX(X_CF) {
+                OR(xFlags, xFlags, s5);
+            }
+        }
+
+        SLLI(s1, s1, c+48);
+        IFX(X_SF) {
+            BGE(s1, xZR, 8);
+            ORI(xFlags, xFlags, 1 << F_SF);
+        }
+        SRLI(s1, s1, 48);
+
+        IFX(X_PEND) {
+            SH(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        IFX(X_ZF) {
+            BNEZ(s1, 8);
+            ORI(xFlags, xFlags, 1 << F_ZF);
+        }
+        IFX(X_OF) {
+            // OF flag is affected only on 1-bit shifts
+            if (c == 1) {
+                SRLI(s3, s1, 15);
+                XOR(s3, s3, s5);
+                SLLI(s3, s3, F_OF2);
+                OR(xFlags, xFlags, s3);
+            }
+        }
+        IFX(X_PF) {
+            emit_pf(dyn, ninst, s1, s3, s4);
+        }
+    } else {
+        IFX(X_CF) {
+            if (c == 16) {
+                ANDI(s3, s1, 1);
+                OR(xFlags, xFlags, s3); // F_CF == 0
+            }
+        }
+        MV(s1, xZR);
+        // OF nop
+        // SF nop
+        // AF nop
+        IFX(X_PF | X_ZF) {
+            IFX(X_ZF) {
+                ORI(xFlags, xFlags, 1 << F_ZF);
+            }
+            IFX(X_PF) {
+                ORI(xFlags, xFlags, 1 << F_PF);
+            }
+        }
+    }
+}
+
+// emit SHR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
+void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
+{
+    if (!c) return;
+    // c != 0
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        MOV64x(s3, c);
+        SH(s3, xEmu, offsetof(x64emu_t, op2));
+        SH(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shr16);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_CF) {
+        if (c > 1) {
+            SRAI(s3, s1, c-1);
+            ANDI(s3, s3, 1); // LSB == F_CF
+        } else {
+            // no need to shift
+            ANDI(s3, s1, 1); // LSB == F_CF
+        }
+        OR(xFlags, xFlags, s3);
+    }
+    IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        if (c == 1) {
+            SRLI(s3, s1, 15);
+            SLLI(s3, s3, F_OF2);
+            OR(xFlags, xFlags, s3);
+        }
+    }
+
+    SRLI(s1, s1, c);
+    // SF should be unset
+
+    IFX(X_PEND) {
+        SH(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SAR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
+void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
+{
+    if (!c) return;
+    // c != 0
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        MOV64x(s3, c);
+        SH(s3, xEmu, offsetof(x64emu_t, op2));
+        SH(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_sar16);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_CF) {
+        if (c > 1) {
+            SRAI(s3, s1, c-1);
+            ANDI(s3, s3, 1); // LSB == F_CF
+        } else {
+            // no need to shift
+            ANDI(s3, s1, 1); // LSB == F_CF
+        }
+        OR(xFlags, xFlags, s3);
+    }
+    IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        if (c == 1) {
+            SRLI(s3, s1, 15);
+            ANDI(s3, s3, 1);
+            SLLI(s3, s3, F_OF2);
+            OR(xFlags, xFlags, s3);
+        }
+    }
+    IFX(X_SF) {
+        // SF is the same as the original operand
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+
+    SRLI(s1, s1, c);
+    ZEXTH(s1, s1);
+
+    IFX(X_PEND) {
+        SH(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+
+// emit SHL16 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
+void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    // s2 is not 0 here and is 1..1f/3f
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        SH(s1, xEmu, offsetof(x64emu_t, op1));
+        SH(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_shl16);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+    SLL(s1, s1, s2);
+
+    IFX(X_CF | X_OF) {
+        SRLI(s5, s1, 16);
+        ANDI(s5, s5, 1); // LSB == F_CF
+        IFX(X_CF) {
+            OR(xFlags, xFlags, s5);
+        }
+    }
+
+    SLLI(s1, s1, 48);
+    IFX(X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    SRLI(s1, s1, 48);
+
+    IFX(X_PEND) {
+        SH(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        ADDI(s3, s2, -1);
+        BNEZ(s3, 4 + 4 * 4);
+        SRLI(s3, s1, 15);
+        XOR(s3, s3, s5);
+        SLLI(s3, s3, F_OF2);
+        OR(xFlags, xFlags, s3);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SHR16 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
+void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    int64_t j64;
+
+    CLEAR_FLAGS();
+
+    IFX(X_PEND) {
+        SH(s2, xEmu, offsetof(x64emu_t, op2));
+        SH(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shr16);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+    IFX(X_CF) {
+        SUBI(s3, s2, 1);
+        SRA(s3, s1, s3);
+        ANDI(s3, s3, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s3);
+    }
+    IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        ADDI(s3, xZR, 1);
+        BEQ(s2, s3, 4+3*4);
+        SRLI(s3, s1, 15);
+        SLLI(s3, s3, F_OF2);
+        OR(xFlags, xFlags, s3);
+    }
+
+    SRL(s1, s1, s2);
+    ZEXTH(s1, s1);
+
+    // SF should be unset
+    IFX(X_PEND) {
+        SH(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SAR16 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3, s4 and s5 as scratch
+void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    int64_t j64;
+
+    CLEAR_FLAGS();
+
+    IFX(X_PEND) {
+        SH(s2, xEmu, offsetof(x64emu_t, op2));
+        SH(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_sar8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_CF) {
+        SUBI(s3, s2, 1);
+        SRA(s3, s1, s3);
+        ANDI(s3, s3, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s3);
+    }
+    // For the SAR instruction, the OF flag is cleared for all 1-bit shifts.
+    // OF nop
+    IFX(X_SF) {
+        // SF is the same as the original operand
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+
+    SRL(s1, s1, s2);
+    ZEXTH(s1, s1);
+
+    IFX(X_PEND) {
+        SH(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
 void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
@@ -44,7 +674,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         }
     }
 
-    SLL(s1, s1, s2);
+    SLLxw(s1, s1, s2);
 
     IFX(X_SF) {
         BGE(s1, xZR, 8);
@@ -155,7 +785,7 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         // OF flag is affected only on 1-bit shifts
         // OF flag is set to the most-significant bit of the original operand
         ADDI(s3, xZR, 1);
-        BEQ(s2, s3, 4+4*4);
+        BEQ(s2, s3, 4+3*4);
         SRLIxw(s3, s1, rex.w?63:31);
         SLLI(s3, s3, F_OF2);
         OR(xFlags, xFlags, s3);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 1f2f43c5..3292ea2f 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1158,6 +1158,18 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_neg32          STEPNAME(emit_neg32)
 #define emit_neg16          STEPNAME(emit_neg16)
 #define emit_neg8           STEPNAME(emit_neg8)
+#define emit_shl8c          STEPNAME(emit_shl8c)
+#define emit_shr8c          STEPNAME(emit_shr8c)
+#define emit_sar8c          STEPNAME(emit_sar8c)
+#define emit_shl8           STEPNAME(emit_shl8)
+#define emit_shr8           STEPNAME(emit_shr8)
+#define emit_sar8           STEPNAME(emit_sar8)
+#define emit_shl16c         STEPNAME(emit_shl16c)
+#define emit_shr16c         STEPNAME(emit_shr16c)
+#define emit_sar16c         STEPNAME(emit_sar16c)
+#define emit_shl16          STEPNAME(emit_shl16)
+#define emit_shr16          STEPNAME(emit_shr16)
+#define emit_sar16          STEPNAME(emit_sar16)
 #define emit_shl32          STEPNAME(emit_shl32)
 #define emit_shl32c         STEPNAME(emit_shl32c)
 #define emit_shr32          STEPNAME(emit_shr32)
@@ -1297,6 +1309,18 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
 void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
 void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
+void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
+void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
+void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
+void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
+void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
+void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
 void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);