about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2023-12-25 20:54:42 +0000
committerGitHub <noreply@github.com>2023-12-25 21:54:42 +0100
commitcb63cdcd421e33449c5edde04a04a3833c950cfe (patch)
treebf0d81af7d1d61d01a330ac77017c78e45cb8d50
parent1f28b9f98d4cb26fb464ca56c042c227cfe4f3af (diff)
downloadbox64-cb63cdcd421e33449c5edde04a04a3833c950cfe.tar.gz
box64-cb63cdcd421e33449c5edde04a04a3833c950cfe.zip
[DYNAREC_RV64] Added more opcodes for VMP-protected GI (#1164)
* [DYNAREC_RV64] Added 66 0F C8-CF BSWAP opcode

* [DYNAREC_RV64] Added 66 0F BB BTC opcode

* [DYNAREC_RV64] Added D2 /3 RCR opcode

* [DYNAREC_RV64] Added 66 0F BA /4 BT opcode

* [DYNAREC_RV64] Added 66 D3 /1 ROR opcode

* [DYNAREC_RV64] Added D2 /2 RCL opcode

* [DYNAREC_RV64] Added 66 D3 /0 ROL opcode

* [DYNAREC_RV64] Added 66 0F BC BSF opcode

* [DYNAREC_RV64] Added 66 0F A4 SHLD opcode

* [DYNAREC_RV64] Added 66 0F AB BTS opcode

* [DYNAREC_RV64] Added 36 SS prefix

* [DYNAREC_RV64] Added D3 /3 RCR opcode

* [DYNAREC_RV64] Added C1 /3 RCR opcode

* [DYNAREC_RV64] Added 66 0F AC SHRD opcode

* [DYNAREC_RV64] Fixed emit_shrd16c, emit_shld16c declaration

* [DYNAREC_RV64] Added C1 /2 RCL opcode

* [DYNAREC_RV64] Added 66 0F BD BSR opcode

* [DYNAREC_RV64] Added 66 0F A3 BT opcode

* [DYNAREC_RV64] Added 66 0F C1 XADD opcode

* [DYNAREC_RV64] Added 0F A5 SHLD opcode

* [DYNAREC_RV64] Added C0 /2 RCL opcode

* [DYNAREC_RV64] Added C0 /3 RCR opcode

* [DYNAREC_RV64] Added 66 99 CWD opcode

* [DYNAREC_RV64] Fixed 66 99 CWD opcode

* [DYNAREC_RV64] Revert 66 0F BD BSR opcode

VMP anti-debugger message will break with this opcode
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_0.c3
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c82
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c14
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c44
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c194
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c187
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h6
7 files changed, 524 insertions, 6 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_0.c b/src/dynarec/rv64/dynarec_rv64_00_0.c
index 8674eca1..75ebb3ec 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_0.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_0.c
@@ -434,6 +434,9 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             i64 = F32S;
             emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4);
             break;
+        case 0x36:
+            INST_NAME("SS:");
+            break;
         case 0x38:
             INST_NAME("CMP Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index 670de262..eddb2ead 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -76,6 +76,26 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     CALL_(ror8, ed, x3);
                     EBBACK(x5, 0);
                     break;
+                case 2:
+                    INST_NAME("RCL Eb, Ib");
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    MOV32w(x2, u8);
+                    CALL_(rcl8, ed, x3);
+                    EBBACK(x5, 0);
+                    break;
+                case 3:
+                    INST_NAME("RCR Eb, Ib");
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    MOV32w(x2, u8);
+                    CALL_(rcr8, ed, x3);
+                    EBBACK(x5, 0);
+                    break;
                 case 4:
                 case 6:
                     INST_NAME("SHL Eb, Ib");
@@ -160,6 +180,28 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     if(u8) { WBACK; }
                     if(!wback && !rex.w) ZEROUP(ed);
                     break;
+                case 2:
+                    INST_NAME("RCL Ed, Ib");
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    u8 = (F8)&(rex.w?0x3f:0x1f);
+                    MOV32w(x2, u8);
+                    GETEDW(x4, x1, 0);
+                    CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4);
+                    WBACK;
+                    break;
+                case 3:
+                    INST_NAME("RCR Ed, Ib");
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    u8 = (F8)&(rex.w?0x3f:0x1f);
+                    MOV32w(x2, u8);
+                    GETEDW(x4, x1, 0);
+                    CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4);
+                    WBACK;
+                    break;
                 case 4:
                 case 6:
                     INST_NAME("SHL Ed, Ib");
@@ -438,6 +480,36 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     UFLAG_RES(ed);
                     UFLAG_DF(x3, d_ror8);
                     break;
+                case 2:
+                    if(opcode==0xD0) {
+                        INST_NAME("RCL Eb, 1");
+                        MOV32w(x2, 1);
+                    } else {
+                        INST_NAME("RCL Eb, CL");
+                        ANDI(x2, xRCX, 7);
+                    }
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEB(x1, 0);
+                    CALL_(rcl8, ed, x3);
+                    EBBACK(x5, 0);
+                    break;
+                case 3:
+                    if(opcode==0xD0) {
+                        INST_NAME("RCR Eb, 1");
+                        MOV32w(x2, 1);
+                    } else {
+                        INST_NAME("RCR Eb, CL");
+                        ANDI(x2, xRCX, 7);
+                    }
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEB(x1, 0);
+                    CALL_(rcr8, ed, x3);
+                    EBBACK(x5, 0);
+                    break;
                 case 4:
                 case 6:
                     if(opcode==0xD0) {
@@ -576,6 +648,16 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     WBACK;
                     if(!wback && !rex.w) ZEROUP(ed);
                     break;
+                case 3:
+                    INST_NAME("RCR Ed, CL");
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    ANDI(x2, xRCX, rex.w?0x3f:0x1f);
+                    GETEDW(x4, x1, 0);
+                    CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4);
+                    WBACK;
+                    break;
                 case 4:
                 case 6:
                     INST_NAME("SHL Ed, CL");
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 22b21694..86a5a3e7 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1213,6 +1213,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5);
             WBACK;
             break;
+        case 0xA5:
+            nextop = F8;
+            INST_NAME("SHLD Ed, Gd, CL");
+            SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
+            if(box64_dynarec_safeflags>1)
+                MAYSETFLAGS();
+            GETGD;
+            GETED(0);
+            if(!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); }
+            ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f);
+            BEQ_NEXT(x3, xZR);
+            emit_shld32(dyn, ninst, rex, ed, gd, x3, x4, x5);
+            WBACK;
+            break;
         case 0xAB:
             INST_NAME("BTS Ed, Gd");
             SETFLAGS(X_CF, SF_SUBSET);
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index 4f58ff98..afa34d48 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -608,7 +608,6 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     OR(gd, gd, x2);
                 }
             break;
-
         case 0x98:
             INST_NAME("CBW");
             SLLI(x1, xRAX, 56);
@@ -619,6 +618,15 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             AND(x1, x1, x2);
             OR(xRAX, xRAX, x1);
             break;
+        case 0x99:
+            INST_NAME("CWD");
+            SLLI(x1, xRAX, 48);
+            SRAI(x1, x1, 48);
+            SRLI(x1, x1, 48);
+            SRLI(xRDX, xRDX, 16);
+            SLLI(xRDX, xRDX, 16);
+            OR(xRDX, xRDX, x1);
+            break;
         case 0xA1:
             INST_NAME("MOV EAX,Od");
             if (rex.is32bits) u64 = F32; else u64 = F64;
@@ -954,13 +962,41 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0xD3:
             nextop = F8;
             switch((nextop>>3)&7) {
+                case 0:
+                    if(opcode==0xD1) {
+                        INST_NAME("ROL Ew, 1");
+                        MOV32w(x2, 1);
+                    } else {
+                        INST_NAME("ROL Ew, CL");
+                        ANDI(x2, xRCX, 15);
+                    }
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEW(x1, 1);
+                    CALL_(rol16, x1, x3);
+                    EWBACK;
+                    break;
+                case 1:
+                    if(opcode==0xD1) {
+                        INST_NAME("ROR Ew, 1");
+                        MOV32w(x2, 1);
+                    } else {
+                        INST_NAME("ROR Ew, CL");
+                        ANDI(x2, xRCX, 15);
+                    }
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEW(x1, 1);
+                    CALL_(ror16, x1, x3);
+                    EWBACK;
+                    break;
                 case 5:
                     if(opcode==0xD1) {
                         INST_NAME("SHR Ew, 1");
                         MOV32w(x4, 1);
                     } else {
                         INST_NAME("SHR Ew, CL");
-                        ANDI(x4, xRCX, 0x1f);
+                        ANDI(x4, xRCX, 15);
                     }
                     UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
                     SETFLAGS(X_ALL, SF_PENDING);
@@ -978,7 +1014,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         MOV32w(x4, 1);
                     } else {
                         INST_NAME("SHL Ew, CL");
-                        ANDI(x4, xRCX, 0x1f);
+                        ANDI(x4, xRCX, 15);
                     }
                     UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
                     SETFLAGS(X_ALL, SF_PENDING);
@@ -996,7 +1032,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         MOV32w(x4, 1);
                     } else {
                         INST_NAME("SAR Ew, CL");
-                        ANDI(x4, xRCX, 0x1f);
+                        ANDI(x4, xRCX, 15);
                     }
                     UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
                     SETFLAGS(X_ALL, SF_PENDING);
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index b3b92a49..e13e775d 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -15,6 +15,7 @@
 #include "emu/x64run_private.h"
 #include "x64trace.h"
 #include "dynarec_native.h"
+#include "bitutils.h"
 
 #include "rv64_printer.h"
 #include "dynarec_rv64_private.h"
@@ -2013,6 +2014,54 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             SSE_LOOP_MV_Q2(x3);
             if (!MODREG) SMWRITE2();
             break;
+        case 0xA3:
+            INST_NAME("BT Ew, Gw");
+            SETFLAGS(X_CF, SF_SUBSET);
+            SET_DFNONE();
+            nextop = F8;
+            GETEW(x1, 0);
+            GETGW(x2);
+            ANDI(gd, gd, 15);
+            BEXT(x4, ed, gd, x6);
+            ANDI(xFlags, xFlags, ~1);
+            OR(xFlags, xFlags, x4);
+            break;
+        case 0xA4:
+            nextop = F8;
+            INST_NAME("SHLD Ew, Gw, Ib");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            GETEW(x1, 0);
+            GETGW(x2);
+            u8 = F8;
+            emit_shld16c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5);
+            EWBACK;
+            break;
+        case 0xAB:
+            INST_NAME("BTS Ew, Gw");
+            SETFLAGS(X_CF, SF_SUBSET);
+            SET_DFNONE();
+            nextop = F8;
+            GETEW(x1, 0);
+            GETGW(x2);
+            ANDI(gd, gd, 15);
+            BEXT(x4, ed, gd, x6);
+            ANDI(xFlags, xFlags, ~1);
+            OR(xFlags, xFlags, x4);
+            ADDI(x4, xZR, 1);
+            SLL(x4, x4, gd);
+            OR(ed, ed, x4);
+            EWBACK;
+            break;
+        case 0xAC:
+            nextop = F8;
+            INST_NAME("SHRD Ew, Gw, Ib");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            GETEW(x1, 0);
+            GETGW(x2);
+            u8 = F8;
+            emit_shrd16c(dyn, ninst, rex, ed, gd, u8, x3, x4);
+            EWBACK;
+            break;
         case 0xAF:
             INST_NAME("IMUL Gw,Ew");
             SETFLAGS(X_ALL, SF_PENDING);
@@ -2051,6 +2100,116 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             AND(gd, gd, x5);
             OR(gd, gd, x1);
             break;
+        case 0xBA:
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 4:
+                    INST_NAME("BT Ew, Ib");
+                    SETFLAGS(X_CF, SF_SUBSET);
+                    SET_DFNONE();
+                    GETED(1);
+                    u8 = F8;
+                    u8 &= rex.w ? 0x3f : 15;
+                    BEXTI(x3, ed, u8); // F_CF is 1
+                    ANDI(xFlags, xFlags, ~1);
+                    OR(xFlags, xFlags, x3);
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
+        case 0xBB:
+            INST_NAME("BTC Ew, Gw");
+            SETFLAGS(X_CF, SF_SUBSET);
+            SET_DFNONE();
+            nextop = F8;
+            GETGD;
+            if (MODREG) {
+                ed = xRAX + (nextop & 7) + (rex.b << 3);
+                wback = 0;
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 1, 0);
+                SRAIxw(x1, gd, 5 + rex.w);
+                ADDSL(x3, wback, x1, 2 + rex.w, x1);
+                LDxw(x1, x3, fixedaddress);
+                ed = x1;
+                wback = x3;
+            }
+            BEXT(x4, ed, gd, x2); // F_CF is 1
+            ANDI(xFlags, xFlags, ~1);
+            OR(xFlags, xFlags, x4);
+            ADDI(x4, xZR, 1);
+            ANDI(x2, gd, rex.w ? 0x3f : 15);
+            SLL(x4, x4, x2);
+            XOR(ed, ed, x4);
+            if (wback) {
+                SDxw(ed, wback, fixedaddress);
+                SMWRITE();
+            }
+            break;
+        case 0xBC:
+            INST_NAME("BSF Gw, Ew");
+            SETFLAGS(X_ZF, SF_SUBSET);
+            SET_DFNONE();
+            nextop = F8;
+            GETEW(x5, 0);
+            GETGW(x4);
+            BNE_MARK(ed, xZR);
+            ORI(xFlags, xFlags, 1 << F_ZF);
+            B_NEXT_nocond;
+            MARK;
+            if (rv64_zbb) {
+                CTZxw(gd, ed);
+            } else {
+                NEG(x2, ed);
+                AND(x2, x2, ed);
+                TABLE64(x3, 0x03f79d71b4ca8b09ULL);
+                MUL(x2, x2, x3);
+                SRLI(x2, x2, 64 - 6);
+                TABLE64(x1, (uintptr_t)&deBruijn64tab);
+                ADD(x1, x1, x2);
+                LBU(gd, x1, 0);
+            }
+            ANDI(xFlags, xFlags, ~(1 << F_ZF));
+            GWBACK;
+            break;
+        // case 0xBD:
+        //     INST_NAME("BSR Gw, Ew");
+        //     SETFLAGS(X_ZF, SF_SUBSET);
+        //     SET_DFNONE();
+        //     nextop = F8;
+        //     GETEW(x5, 0);
+        //     GETGW(x4);
+        //     BNE_MARK(ed, xZR);
+        //     ORI(xFlags, xFlags, 1 << F_ZF);
+        //     B_NEXT_nocond;
+        //     MARK;
+        //     ANDI(xFlags, xFlags, ~(1 << F_ZF));
+        //     if (rv64_zbb) {
+        //         MOV32w(x1, 31);
+        //         CLZxw(gd, ed);
+        //         SUB(gd, x1, gd);
+        //     } else {
+        //         u8 = gd;
+        //         ADDI(u8, xZR, 0);
+        //         AND(x2, ed, xMASK);
+        //         SRLI(x3, x2, 8);
+        //         BEQZ(x3, 4 + 2 * 4);
+        //         ADDI(u8, u8, 8);
+        //         MV(x2, x3);
+        //         SRLI(x3, x2, 4);
+        //         BEQZ(x3, 4 + 2 * 4);
+        //         ADDI(u8, u8, 4);
+        //         MV(x2, x3);
+        //         ANDI(x2, x2, 0b1111);
+        //         TABLE64(x3, (uintptr_t)&lead0tab);
+        //         ADD(x3, x3, x2);
+        //         LBU(x2, x3, 0);
+        //         ADD(gd, u8, x2);
+        //     }
+        //     GWBACK;
+        //     break;
         case 0xBE:
             INST_NAME("MOVSX Gw, Eb");
             nextop = F8;
@@ -2078,6 +2237,18 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             AND(x1, x1, x5);
             OR(gd, gd, x1);
             break;
+        case 0xC1:
+            INST_NAME("XADD Ew, Gw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x1);
+            GETEW(x2, 0);
+            MV(x9, ed);
+            emit_add16(dyn, ninst, ed, gd, x4, x5, x6);
+            MV(gd, x9);
+            EWBACK;
+            GWBACK;
+            break;
         case 0xC2:
             INST_NAME("CMPPD Gx, Ex, Ib");
             nextop = F8;
@@ -2167,6 +2338,29 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             SD(x3, gback, gdoffset + 0);
             SD(x4, gback, gdoffset + 8);
             break;
+        case 0xC8:
+        case 0xC9:
+        case 0xCA:
+        case 0xCB:
+        case 0xCC:
+        case 0xCD:
+        case 0xCE:
+        case 0xCF:                  /* BSWAP reg */
+            INST_NAME("BSWAP Reg");
+            gd = xRAX+(opcode&7)+(rex.b<<3);
+            if(rex.w) {
+                REV8xw(gd, gd, x1, x2, x3, x4);
+            } else {
+                ANDI(x1, gd, 0xff);
+                SLLI(x1, x1, 8);
+                SRLI(x2, gd, 8);
+                ANDI(x2, x2, 0xff);
+                SRLI(x3, gd, 16);
+                SLLI(x4, x3, 16);
+                AND(x1, x4, x1);
+                AND(gd, x1, x2);
+            }
+            break;
         case 0xD1:
             INST_NAME("PSRLW Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index 0fdc10e3..f5d5ade1 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -506,7 +506,7 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
             SDxw(s3, xEmu, offsetof(x64emu_t, op2));
         } else SDxw(xZR, xEmu, offsetof(x64emu_t, op2));
         SDxw(s1, xEmu, offsetof(x64emu_t, op1));
-        SET_DF(s4, rex.w?d_shr64:d_shr32);
+        SET_DF(s4, rex.w?d_shrd64:d_shrd32);
     } else IFX(X_ALL) {
         SET_DFNONE();
     }
@@ -562,6 +562,72 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     }
 }
 
+void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
+{
+    c&=15;
+    CLEAR_FLAGS();
+
+    IFX(X_PEND) {
+        if (c) {
+            MOV64x(s3, c);
+            SH(s3, xEmu, offsetof(x64emu_t, op2));
+        } else SH(xZR, xEmu, offsetof(x64emu_t, op2));
+        SH(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shrd16);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            SH(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    IFX(X_CF) {
+        if (c > 1) {
+            SRAI(s3, s1, c-1);
+            ANDI(s3, s3, 1); // LSB
+            BEQZ(s3, 8);
+        } else {
+            // no need to shift
+            ANDI(s3, s1, 1);
+            BEQZ(s3, 8);
+        }
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+
+    SRLIxw(s3, s1, c);
+    SLLIxw(s1, s2, 16-c);
+    OR(s1, s1, s3);
+    ZEXTH(s1, s1);
+
+    IFX(X_SF) {
+        SLLIW(s4, s1, 16);
+        BGE(s4, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX(X_PEND) {
+        SH(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            SRLI(s3, s1, 14);
+            SRLI(s4, s1, 15);
+            XOR(s3, s3, s4);
+            ANDI(s3, s3, 1);
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF2);
+        }
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) {
     c&=(rex.w?0x3f:0x1f);
     CLEAR_FLAGS();
@@ -571,7 +637,7 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
             SDxw(s3, xEmu, offsetof(x64emu_t, op2));
         } else SDxw(xZR, xEmu, offsetof(x64emu_t, op2));
         SDxw(s1, xEmu, offsetof(x64emu_t, op1));
-        SET_DF(s4, rex.w?d_shl64:d_shl32);
+        SET_DF(s4, rex.w?d_shld64:d_shld32);
     } else IFX(X_ALL) {
         SET_DFNONE();
     }
@@ -622,3 +688,120 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         emit_pf(dyn, ninst, s1, s3, s4);
     }
 }
+
+void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s4, int s3) {
+    int64_t j64;
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SDxw(s5, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_shld64:d_shld32);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    MOV32w(s3, (rex.w?64:32));
+    SUB(s3, s3, s5);
+    IFX(X_CF) {
+        SRL(s4, s1, s3);
+        ANDI(s4, s4, 1);
+        BEQZ(s4, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+    IFX(X_OF) {
+        SRLxw(s4, s1, rex.w?63:31);
+        BEQZ(s4, 8);
+        ORI(xFlags, xFlags, 1 << F_OF2);
+    }
+    SLLxw(s4, s1, s5);
+    SRLxw(s3, s2, s3);
+    OR(s1, s3, s4);
+
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_OF) {
+        ADDI(s5, s5, -1);
+        BNEZ_MARK(s5);
+        SRLIxw(s3, s1, rex.w?63:31);
+        BEXTI(s4, xFlags, F_OF2);
+        XOR(s3, s3, s4);
+        ANDI(xFlags, xFlags, ~(1<<F_OF2));
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_OF2);
+        MARK;
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) {
+    c&=15;
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        if (c) {
+            MOV64x(s3, c);
+            SH(s3, xEmu, offsetof(x64emu_t, op2));
+        } else SH(xZR, xEmu, offsetof(x64emu_t, op2));
+        SH(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shld16);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+    if(!c) {
+        IFX(X_PEND) {
+            SH(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    IFX(X_CF|X_OF) {
+        if (c > 0) {
+            SRLI(s3, s1, 16-c);
+            ANDI(s5, s3, 1); // F_CF
+            IFX(X_CF) {
+                OR(xFlags, xFlags, s5);
+            }
+        } else {
+            IFX(X_OF) MOV64x(s5, 0);
+        }
+    }
+
+    SLLIxw(s3, s1, c);
+    SRLIxw(s1, s2, 16-c);
+    OR(s1, s1, s3);
+    ZEXTH(s1, s1);
+
+    IFX(X_SF) {
+        SLLIW(s4, s1, 16);
+        BGE(s4, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX(X_PEND) {
+        SH(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_OF) {
+        SRLIxw(s3, s1, 15);
+        XOR(s3, s3, s5);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_OF2);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 8b631a07..84119bb7 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1151,6 +1151,9 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_ror32c         STEPNAME(emit_ror32c)
 #define emit_shrd32c        STEPNAME(emit_shrd32c)
 #define emit_shld32c        STEPNAME(emit_shld32c)
+#define emit_shld32         STEPNAME(emit_shld32)
+#define emit_shld16c        STEPNAME(emit_shld16c)
+#define emit_shrd16c        STEPNAME(emit_shrd16c)
 
 #define emit_pf STEPNAME(emit_pf)
 
@@ -1287,6 +1290,9 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
 void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
+void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);
+void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
+void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
 
 void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);