about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-02-21 18:34:44 +0800
committerGitHub <noreply@github.com>2024-02-21 11:34:44 +0100
commitb179bd24154f890fe28e44c94dd667ea99ccbf45 (patch)
treedf31e9a0d437444f866a259c49318547020ec921
parent64fbd3db46beff7091ede54fa96f245d324c6c9a (diff)
downloadbox64-b179bd24154f890fe28e44c94dd667ea99ccbf45.tar.gz
box64-b179bd24154f890fe28e44c94dd667ea99ccbf45.zip
[DYNAREC_RV64] Minor OF flag fixes and optimizations for emit shift utils (#1275)
* Minor OF flag fixes and optimizations for emit shift utils

* original operand
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c201
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h11
3 files changed, 118 insertions, 100 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 3cd3e92f..f048936c 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1211,7 +1211,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETED(1);
             GETGD;
             u8 = F8;
-            emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5);
+            emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4);
             WBACK;
             break;
         case 0xA5:
@@ -1225,7 +1225,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if(!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); }
             ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f);
             BEQ_NEXT(x3, xZR);
-            emit_shld32(dyn, ninst, rex, ed, gd, x3, x4, x5);
+            emit_shld32(dyn, ninst, rex, ed, gd, x3, x4, x5, x6);
             WBACK;
             break;
         case 0xAB:
@@ -1280,7 +1280,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if (!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); }
             ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f);
             BEQ_NEXT(x3, xZR);
-            emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4);
+            emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4, x6);
             WBACK;
             break;
         case 0xAE:
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index 23bf1097..9008ca19 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -38,7 +38,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SUBI(s5, s2, rex.w?64:32);
         NEG(s5, s5);
         SRL(s3, s1, s5);
-        ANDI(s5, s3, 1); // F_CF
+        ANDI(s5, s3, 1); // LSB == F_CF
         IFX(X_CF) {
             OR(xFlags, xFlags, s5);
         }
@@ -61,10 +61,13 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        ADDI(s3, s2, -1);
+        BNEZ(s3, 4 + 4 * 4);
         SRLIxw(s3, s1, rex.w?63:31);
         XOR(s3, s3, s5);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_OF2);
+        SLLI(s3, s3, F_OF2);
+        OR(xFlags, xFlags, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -94,12 +97,10 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     IFX(X_CF|X_OF) {
         if (c > 0) {
             SRLI(s3, s1, (rex.w?64:32)-c);
-            ANDI(s5, s3, 1); // F_CF
+            ANDI(s5, s3, 1); // LSB == F_CF
             IFX(X_CF) {
                 OR(xFlags, xFlags, s5);
             }
-        } else {
-            IFX(X_OF) MOV64x(s5, 0);
         }
     }
 
@@ -120,10 +121,13 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_OF) {
-        SRLIxw(s3, s1, rex.w?63:31);
-        XOR(s3, s3, s5);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_OF2);
+        // OF flag is affected only on 1-bit shifts
+        if (c == 1) {
+            SRLIxw(s3, s1, rex.w?63:31);
+            XOR(s3, s3, s5);
+            SLLI(s3, s3, F_OF2);
+            OR(xFlags, xFlags, s3);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -151,6 +155,15 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         ANDI(s3, s3, 1); // LSB == F_CF
         OR(xFlags, xFlags, s3);
     }
+    IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        ADDI(s3, xZR, 1);
+        BEQ(s2, s3, 4+4*4);
+        SRLIxw(s3, s1, rex.w?63:31);
+        SLLI(s3, s3, F_OF2);
+        OR(xFlags, xFlags, s3);
+    }
 
     SRL(s1, s1, s2);
 
@@ -168,16 +181,6 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
-    IFX(X_OF) {
-        ADDI(s3, xZR, 1);
-        BEQ(s2, s3, 4+6*4);
-            SRLI(s3, s1, rex.w?62:30);
-            SRLI(s4, s1, rex.w?63:31);
-            XOR(s3, s3, s4);
-            ANDI(s3, s3, 1);
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
-    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
@@ -214,6 +217,15 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         }
         OR(xFlags, xFlags, s3);
     }
+    IFX(X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        if(c==1) {
+            SRLIxw(s3, s1, rex.w?63:31);
+            SLLI(s3, s3, F_OF2);
+            OR(xFlags, xFlags, s3);
+        }
+    }
 
     SRLIxw(s1, s1, c);
 
@@ -231,16 +243,6 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
-    IFX(X_OF) {
-        if(c==1) {
-            SRLI(s3, s1, rex.w?62:30);
-            SRLI(s4, s1, rex.w?63:31);
-            XOR(s3, s3, s4);
-            ANDI(s3, s3, 1);
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
-        }
-    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
@@ -332,16 +334,16 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_CF) {
-        ANDI(s4, s1, 1<<F_CF);
-        OR(xFlags, xFlags, s4);
+    IFX(X_CF | X_OF) {
+        ANDI(s4, s1, 1); // LSB == F_CF
+        IFX(X_CF) OR(xFlags, xFlags, s4);
     }
     IFX(X_OF) {
+        // the OF flag is set to the exclusive OR of the CF bit (after the rotate) and the most-significant bit of the result.
         ADDI(s3, xZR, 1);
-        BEQ_NEXT(s2, s3);
+        BNE_NEXT(s2, s3);
         SRLIxw(s3, s1, rex.w?63:31);
-        XOR(s3, s3, s1);
-        ANDI(s3, s3, 1);
+        XOR(s3, s3, s4); // s3: MSB, s4: CF bit
         SLLI(s3, s3, F_OF2);
         OR(xFlags, xFlags, s3);
     }
@@ -384,10 +386,12 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         OR(xFlags, xFlags, s3);
     }
     IFX(X_OF) {
+        // the OF flag is set to the exclusive OR of the two most-significant bits of the result
         ADDI(s3, xZR, 1);
-        BEQ_NEXT(s2, s3);
+        BNE_NEXT(s2, s3);
         SRLIxw(s3, s1, rex.w?63:31);
-        XOR(s3, s3, s1);
+        SRLIxw(s4, s1, rex.w?62:30);
+        XOR(s3, s3, s4);
         ANDI(s3, s3, 1);
         SLLI(s3, s3, F_OF2);
         OR(xFlags, xFlags, s3);
@@ -426,15 +430,15 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_CF) {
+    IFX(X_CF | X_OF) {
         ANDI(s4, s1, 1<<F_CF);
-        OR(xFlags, xFlags, s4);
+        IFX(X_CF) OR(xFlags, xFlags, s4);
     }
     IFX(X_OF) {
+        // the OF flag is set to the exclusive OR of the CF bit (after the rotate) and the most-significant bit of the result.
         if(c==1) {
             SRLIxw(s3, s1, rex.w?63:31);
-            XOR(s3, s3, s1);
-            ANDI(s3, s3, 1);
+            XOR(s3, s3, s4);
             SLLI(s3, s3, F_OF2);
             OR(xFlags, xFlags, s3);
         }
@@ -478,6 +482,7 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         OR(xFlags, xFlags, s3);
     }
     IFX(X_OF) {
+        // the OF flag is set to the exclusive OR of the two most-significant bits of the result
         if(c==1) {
             SRLI(s3, s1, rex.w?62:30);
             SRLI(s4, s3, 1);
@@ -520,6 +525,10 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         }
         OR(xFlags, xFlags, s3);
     }
+    IFX(X_OF) {
+        // Store sign for later use.
+        if (c == 1) SRLIxw(s4, s1, rex.w?63:31);
+    }
 
     SRLIxw(s3, s1, c);
     SLLIxw(s1, s2, (rex.w?64:32)-c);
@@ -540,13 +549,12 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_OF) {
+        // the OF flag is set if a sign change occurred
         if(c==1) {
-            SRLI(s3, s1, rex.w?62:30);
-            SRLI(s4, s1, rex.w?63:31);
+            SRLI(s3, s1, rex.w?63:31);
             XOR(s3, s3, s4);
-            ANDI(s3, s3, 1);
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SLLI(s3, s3, F_OF2);
+            OR(xFlags, xFlags, s3);
         }
     }
     IFX(X_PF) {
@@ -585,6 +593,10 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         }
         OR(xFlags, xFlags, s3);
     }
+    IFX(X_OF) {
+        // Store sign for later use.
+        if (c == 1) SRLI(s4, s1, 15);
+    }
 
     SRLIxw(s3, s1, c);
     SLLIxw(s1, s2, 16-c);
@@ -592,8 +604,8 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     ZEXTH(s1, s1);
 
     IFX(X_SF) {
-        SLLIW(s4, s1, 16);
-        BGE(s4, xZR, 8);
+        SLLIW(s3, s1, 16);
+        BGE(s3, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     IFX(X_PEND) {
@@ -604,13 +616,13 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_OF) {
+        // the OF flag is set if a sign change occurred
         if(c==1) {
-            SRLI(s3, s1, 14);
-            SRLI(s4, s1, 15);
+            SRLI(s3, s1, 15);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1);
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SLLI(s3, s3, F_OF2);
+            OR(xFlags, xFlags, s3);
         }
     }
     IFX(X_PF) {
@@ -618,7 +630,7 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     }
 }
 
-void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5)
+void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
 {
     c&=(rex.w?0x3f:0x1f);
     CLEAR_FLAGS();
@@ -639,17 +651,17 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         }
         return;
     }
-    IFX(X_CF|X_OF) {
+    IFX(X_CF) {
         if (c > 0) {
             SRLI(s3, s1, (rex.w?64:32)-c);
-            ANDI(s5, s3, 1); // F_CF
-            IFX(X_CF) {
-                OR(xFlags, xFlags, s5);
-            }
-        } else {
-            IFX(X_OF) MOV64x(s5, 0);
+            ANDI(s4, s3, 1); // F_CF
+            OR(xFlags, xFlags, s4);
         }
     }
+    IFX(X_OF) {
+        // Store sign for later use.
+        if (c == 1) SRLIxw(s4, s1, rex.w?63:31);
+    }
 
     SLLIxw(s3, s1, c);
     SRLIxw(s1, s2, (rex.w?64:32)-c);
@@ -670,10 +682,13 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_OF) {
-        SRLIxw(s3, s1, rex.w?63:31);
-        XOR(s3, s3, s5);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_OF2);
+        // the OF flag is set if a sign change occurred
+        if (c == 1) {
+            SRLIxw(s3, s1, rex.w?63:31);
+            XOR(s3, s3, s4);
+            SLLI(s3, s3, F_OF2);
+            ORI(xFlags, xFlags, s3);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -681,7 +696,7 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
 }
 
 
-void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4)
+void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6)
 {
     int64_t j64;
     CLEAR_FLAGS();
@@ -699,9 +714,8 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
         OR(xFlags, xFlags, s3);
     }
     IFX(X_OF) {
-        SRLxw(s4, s1, rex.w ? 63 : 31);
-        BEQZ(s4, 8);
-        ORI(xFlags, xFlags, 1 << F_OF2);
+        // Store current sign for later use.
+        SRLxw(s6, s1, rex.w ? 63 : 31);
     }
     ADDI(s4, xZR, (rex.w ? 64 : 32));
     SUB(s4, s4, s5);
@@ -727,11 +741,9 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
         ADDI(s5, s5, -1);
         BNEZ_MARK(s5);
         SRLIxw(s3, s1, rex.w?63:31);
-        BEXTI(s4, xFlags, F_OF2);
-        XOR(s3, s3, s4);
-        ANDI(xFlags, xFlags, ~(1<<F_OF2));
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_OF2);
+        XOR(s3, s3, s6);
+        SLLI(s3, s3, F_OF2);
+        OR(xFlags, xFlags, s3);
         MARK;
     }
     IFX(X_PF) {
@@ -739,7 +751,7 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
     }
 }
 
-void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4)
+void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6)
 {
     int64_t j64;
     CLEAR_FLAGS();
@@ -758,9 +770,8 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
         OR(xFlags, xFlags, s4);
     }
     IFX(X_OF) {
-        SRLxw(s4, s1, rex.w?63:31);
-        BEQZ(s4, 8);
-        ORI(xFlags, xFlags, 1 << F_OF2);
+        // Store current sign for later use.
+        SRLxw(s6, s1, rex.w ? 63 : 31);
     }
     SLLxw(s4, s1, s5);
     SRLxw(s3, s2, s3);
@@ -784,11 +795,9 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
         ADDI(s5, s5, -1);
         BNEZ_MARK(s5);
         SRLIxw(s3, s1, rex.w?63:31);
-        BEXTI(s4, xFlags, F_OF2);
-        XOR(s3, s3, s4);
-        ANDI(xFlags, xFlags, ~(1<<F_OF2));
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_OF2);
+        XOR(s3, s3, s6);
+        SLLI(s3, s3, F_OF2);
+        OR(xFlags, xFlags, s3);
         MARK;
     }
     IFX(X_PF) {
@@ -817,17 +826,17 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         }
         return;
     }
-    IFX(X_CF|X_OF) {
+    IFX(X_CF) {
         if (c > 0) {
             SRLI(s3, s1, 16-c);
-            ANDI(s5, s3, 1); // F_CF
-            IFX(X_CF) {
-                OR(xFlags, xFlags, s5);
-            }
-        } else {
-            IFX(X_OF) MOV64x(s5, 0);
+            ANDI(s5, s3, 1); // LSB == F_CF
+            OR(xFlags, xFlags, s5);
         }
     }
+    IFX(X_OF) {
+        // Store sign for later use.
+        if (c == 1) SRLI(s5, s1, 15);
+    }
 
     SLLIxw(s3, s1, c);
     SRLIxw(s1, s2, 16-c);
@@ -847,10 +856,14 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_OF) {
-        SRLIxw(s3, s1, 15);
-        XOR(s3, s3, s5);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_OF2);
+        // the OF flag is set if a sign change occurred
+        if(c==1) {
+            SRLI(s3, s1, 15);
+            XOR(s3, s3, s5);
+            ANDI(s3, s3, 1);
+            SLLI(s3, s3, F_OF2);
+            OR(xFlags, xFlags, s3);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index e1048260..0a1dcfa6 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -734,6 +734,11 @@
     j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
     BEQ(reg1, reg2, j64)
 
+// Branch to NEXT if reg1!=reg2 (use j64)
+#define BNE_NEXT(reg1, reg2)                                                  \
+    j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
+    BNE(reg1, reg2, j64)
+
 // Branch to NEXT if reg1==0 (use j64)
 #define CBZ_NEXT(reg1)                                                        \
     j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
@@ -1302,9 +1307,9 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
-void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
-void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);
-void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);
+void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
+void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6);
+void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6);
 void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);