about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-12-23 17:05:05 +0800
committerGitHub <noreply@github.com>2024-12-23 10:05:05 +0100
commitcad9450e16a0c145d49a66b275b091db4e8d1308 (patch)
treee724ff681abcaf7e81b043a5e27de60d91719494 /src
parent0c7cc657cc3ef0b11330f51e96ca851393acbcec (diff)
downloadbox64-cad9450e16a0c145d49a66b275b091db4e8d1308.tar.gz
box64-cad9450e16a0c145d49a66b275b091db4e8d1308.zip
[LA64_DYNAREC] Optimized some 16bit shift opcodes (#2192)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_66.c61
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_shift.c227
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h6
3 files changed, 261 insertions, 33 deletions
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c
index d5482728..ef8f5c8d 100644
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@@ -650,48 +650,43 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 case 4:
                 case 6:
                     INST_NAME("SHL Ew, Ib");
-                    UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); }
-                    SETFLAGS(X_ALL, SF_PENDING);
-                    GETEW(x1, 1);
-                    u8 = F8;
-                    UFLAG_IF { MOV32w(x2, (u8 & 15)); }
-                    UFLAG_OP12(ed, x2)
-                    if (MODREG) {
-                        SLLI_D(ed, ed, 48 + (u8 & 15));
-                        SRLI_D(ed, ed, 48);
+                    if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {
+                        SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
+                        GETEW(x1, 0);
+                        u8 = (F8) & 0x1f;
+                        emit_shl16c(dyn, ninst, x1, u8, x5, x4, x6);
+                        EWBACK;
                     } else {
-                        SLLI_D(ed, ed, u8 & 15);
+                        FAKEED;
+                        F8;
                     }
-                    EWBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_shl16);
                     break;
                 case 5:
                     INST_NAME("SHR Ew, Ib");
-                    UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); }
-                    SETFLAGS(X_ALL, SF_PENDING);
-                    GETEW(x1, 1);
-                    u8 = F8;
-                    UFLAG_IF { MOV32w(x2, (u8 & 15)); }
-                    UFLAG_OP12(ed, x2)
-                    SRLI_D(ed, ed, u8 & 15);
-                    EWBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_shr16);
+                    if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {
+                        SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
+                        GETEW(x1, 0);
+                        u8 = (F8) & 0x1f;
+                        emit_shr16c(dyn, ninst, x1, u8, x5, x4, x6);
+                        EWBACK;
+                    } else {
+                        FAKEED;
+                        F8;
+                    }
                     break;
                 case 7:
                     INST_NAME("SAR Ew, Ib");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); }
-                    GETSEW(x1, 1);
-                    u8 = F8;
-                    UFLAG_IF { MOV32w(x2, (u8 & 15)); }
-                    UFLAG_OP12(ed, x2)
-                    SRAI_D(ed, ed, u8 & 15);
-                    if (MODREG) BSTRPICK_D(ed, ed, 15, 0);
-                    EWBACK;
-                    UFLAG_RES(ed);
-                    UFLAG_DF(x3, d_sar16);
+                    if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {
+                        SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
+                        GETSEW(x1, 0);
+                        u8 = (F8) & 0x1f;
+                        emit_sar16c(dyn, ninst, x1, u8, x5, x4, x6);
+                        EWBACK;
+                    } else {
+                        FAKEED;
+                        F8;
+                    }
                     break;
                 default:
                     DEFAULT;
diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c
index 9d891bdd..a032be16 100644
--- a/src/dynarec/la64/dynarec_la64_emit_shift.c
+++ b/src/dynarec/la64/dynarec_la64_emit_shift.c
@@ -86,6 +86,101 @@ void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
 }
 
+// emit SHL16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
+void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
+{
+    if (!c) return;
+    // c != 0
+
+    IFX (X_PEND) {
+        MOV64x(s3, c);
+        ST_H(s3, xEmu, offsetof(x64emu_t, op2));
+        ST_H(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shl16);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_PEND) {
+        } else {
+            MOV64x(s3, c);
+        }
+        IFX (X_ALL) {
+            X64_SLL_H(s1, s3);
+        }
+
+        SLLI_D(s1, s1, c);
+        BSTRPICK_D(s1, s1, 15, 0);
+
+        IFX (X_PEND) {
+            ST_H(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+
+    CLEAR_FLAGS(s3);
+    if (c < 16) {
+        IFX (X_CF | X_OF) {
+            SRLI_D(s3, s1, 16 - c);
+            ANDI(s5, s3, 1); // LSB == F_CF
+            IFX (X_CF) {
+                OR(xFlags, xFlags, s5);
+            }
+        }
+
+        SLLI_D(s1, s1, c + 48);
+        IFX (X_SF) {
+            BGE(s1, xZR, 8);
+            ORI(xFlags, xFlags, 1 << F_SF);
+        }
+        SRLI_D(s1, s1, 48);
+
+        IFX (X_PEND) {
+            ST_H(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        IFX (X_ZF) {
+            BNEZ(s1, 8);
+            ORI(xFlags, xFlags, 1 << F_ZF);
+        }
+        IFX (X_OF) {
+            // OF flag is affected only on 1-bit shifts
+            if (c == 1) {
+                SRLI_D(s3, s1, 15);
+                XOR(s3, s3, s5);
+                SLLI_D(s3, s3, F_OF);
+                OR(xFlags, xFlags, s3);
+            }
+        }
+        IFX (X_PF) {
+            emit_pf(dyn, ninst, s1, s3, s4);
+        }
+    } else {
+        IFX (X_CF) {
+            if (c == 16) {
+                ANDI(s3, s1, 1);
+                OR(xFlags, xFlags, s3); // F_CF == 0
+            }
+        }
+        MV(s1, xZR);
+
+        IFX (X_PEND) {
+            ST_H(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        // OF nop
+        // SF nop
+        // AF nop
+        IFX (X_PF | X_ZF) {
+            IFX (X_ZF) {
+                ORI(xFlags, xFlags, 1 << F_ZF);
+            }
+            IFX (X_PF) {
+                ORI(xFlags, xFlags, 1 << F_PF);
+            }
+        }
+    }
+}
+
 // emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
 void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
@@ -354,6 +449,72 @@ void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
 }
 
+// emit SHR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
+void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
+{
+    if (!c) return;
+    // c != 0
+
+    IFX (X_PEND) {
+        MOV64x(s3, c);
+        ST_H(s3, xEmu, offsetof(x64emu_t, op2));
+        ST_H(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shr16);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_PEND) {
+        } else {
+            MOV64x(s3, c);
+        }
+        IFX (X_ALL) {
+            X64_SRL_H(s1, s3);
+        }
+        SRLI_D(s1, s1, c);
+        IFX (X_PEND) {
+            ST_H(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF) {
+        if (c > 1) {
+            SRAI_D(s3, s1, c - 1);
+            ANDI(s3, s3, 1); // LSB == F_CF
+        } else {
+            // no need to shift
+            ANDI(s3, s1, 1); // LSB == F_CF
+        }
+        OR(xFlags, xFlags, s3);
+    }
+    IFX (X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        if (c == 1) {
+            SRLI_D(s3, s1, 15);
+            SLLI_D(s3, s3, F_OF);
+            OR(xFlags, xFlags, s3);
+        }
+    }
+
+    SRLI_D(s1, s1, c);
+
+    // SF should be unset
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
 void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
 {
@@ -563,6 +724,72 @@ void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
 }
 
+
+// emit SAR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
+void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
+{
+    if (!c) return;
+    // c != 0
+
+    IFX (X_PEND) {
+        MOV64x(s3, c);
+        ST_H(s3, xEmu, offsetof(x64emu_t, op2));
+        ST_H(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_sar16);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_PEND) {
+        } else {
+            MOV64x(s3, c);
+        }
+        IFX (X_ALL) {
+            X64_SRA_H(s1, s3);
+        }
+        SRLI_D(s1, s1, c);
+        BSTRPICK_D(s1, s1, 15, 0);
+        IFX (X_PEND) {
+            ST_H(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF) {
+        if (c > 1) {
+            SRAI_D(s3, s1, c - 1);
+            ANDI(s3, s3, 1); // LSB == F_CF
+        } else {
+            // no need to shift
+            ANDI(s3, s1, 1); // LSB == F_CF
+        }
+        OR(xFlags, xFlags, s3);
+    }
+    // For the SAR instruction, the OF flag is cleared for all 1-bit shifts.
+    // OF nop
+    IFX (X_SF) {
+        // SF is the same as the original operand
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+
+    SRLI_D(s1, s1, c);
+    BSTRPICK_D(s1, s1, 15, 0);
+
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
 {
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index b4b33443..630f3873 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -862,13 +862,16 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_and32          STEPNAME(emit_and32)
 #define emit_and32c         STEPNAME(emit_and32c)
 #define emit_shl16          STEPNAME(emit_shl16)
+#define emit_shl16c          STEPNAME(emit_shl16c)
 #define emit_shl32          STEPNAME(emit_shl32)
 #define emit_shl32c         STEPNAME(emit_shl32c)
 #define emit_shr8           STEPNAME(emit_shr8)
 #define emit_shr16          STEPNAME(emit_shr16)
+#define emit_shr16c          STEPNAME(emit_shr16c)
 #define emit_shr32          STEPNAME(emit_shr32)
 #define emit_shr32c         STEPNAME(emit_shr32c)
 #define emit_sar16          STEPNAME(emit_sar16)
+#define emit_sar16c          STEPNAME(emit_sar16c)
 #define emit_sar32c         STEPNAME(emit_sar32c)
 #define emit_shld32c        STEPNAME(emit_shld32c)
 #define emit_shrd32c        STEPNAME(emit_shrd32c)
@@ -967,13 +970,16 @@ void emit_and16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
 void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
 void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
 void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
 void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_shld32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shrd32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);