about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c30
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c70
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h47
-rw-r--r--src/dynarec/la64/la64_emitter.h45
-rw-r--r--src/dynarec/la64/la64_printer.c20
5 files changed, 86 insertions, 126 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 26c300d4..87c6a989 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -94,9 +94,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             u8 = F8;
             ANDI(x1, xRAX, 0xff);
             emit_add8c(dyn, ninst, x1, u8, x3, x4, x5);
-            ADDI_W(x3, xZR, 0xf00);
-            AND(xRAX, xRAX, x3);
-            OR(xRAX, xRAX, x1);
+            BSTRINS_D(xRAX, x1, 7, 0);
             break;
         case 0x05:
             INST_NAME("ADD EAX, Id");
@@ -163,9 +161,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             u8 = F8;
             ANDI(x1, xRAX, 0xff);
             emit_sub8c(dyn, ninst, x1, u8, x2, x3, x4, x5);
-            ADDI_W(x3, xZR, 0xf00);
-            AND(xRAX, xRAX, x3);
-            OR(xRAX, xRAX, x1);
+            BSTRINS_D(xRAX, x1, 7, 0);
             break;
         case 0x2D:
             INST_NAME("SUB EAX, Id");
@@ -489,26 +485,8 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     eb1 = TO_LA64((nextop & 7) + (rex.b << 3));
                     eb2 = 0;
                 }
-                if (eb2) {
-                    // load a mask to x3 (ffffffffffff00ff)
-                    LU12I_W(x3, 0xffff0);
-                    ORI(x3, x3, 0xff);
-                    // apply mask
-                    AND(eb1, eb1, x3);
-                    if (u8) {
-                        if ((u8 << 8) < 2048) {
-                            ADDI_D(x4, xZR, u8 << 8);
-                        } else {
-                            ADDI_D(x4, xZR, u8);
-                            SLLI_D(x4, x4, 8);
-                        }
-                        OR(eb1, eb1, x4);
-                    }
-                } else {
-                    ADDI_W(x3, xZR, 0xf00); // mask ffffffffffffff00
-                    AND(eb1, eb1, x3);
-                    ORI(eb1, eb1, u8);
-                }
+                MOV32w(x3, u8);
+                BSTRINS_D(eb1, x3, eb2 * 8 + 7, eb2 * 8);
             } else { // mem <= u8
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 0, 1);
                 u8 = F8;
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index ea0f7d11..03934883 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -369,37 +369,26 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
         if (reg != xRIP) {
             MV(xRIP, reg);
         }
+        NOTEST(x2);
         uintptr_t tbl = is32bits ? getJumpTable32() : getJumpTable64();
         MAYUSE(tbl);
         TABLE64(x3, tbl);
         if (!is32bits) {
-            SRLI_D(x2, xRIP, JMPTABL_START3);
-            ALSL_D(x3, x2, x3, 2);
-            LD_D(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety
+            BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
+            ALSL_D(x3, x2, x3, 3);
+            LD_D(x3, x3, 0);
         }
-        MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-        SRLI_D(x2, xRIP, JMPTABL_START2 - 3);
-        AND(x2, x2, x4);
-        ADD_D(x3, x3, x2);
-        LD_D(x3, x3, 0); // LR_D(x3, x3, 1, 1);
-        if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
-            MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
-        }
-        SRLI_D(x2, xRIP, JMPTABL_START1 - 3);
-        AND(x2, x2, x4);
-        ADD_D(x3, x3, x2);
-        LD_D(x3, x3, 0); // LR_D(x3, x3, 1, 1);
-        if (JMPTABLE_MASK0 < 2048) {
-            ANDI(x2, xRIP, JMPTABLE_MASK0);
-        } else {
-            if (JMPTABLE_MASK1 != JMPTABLE_MASK0) {
-                MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
-            }
-            AND(x2, xRIP, x4);
-        }
-        ALSL_D(x3, x2, x3, 2);
-        LD_D(x2, x3, 0); // LR_D(x2, x3, 1, 1);
+        BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+        ALSL_D(x3, x2, x3, 3);
+        LD_D(x3, x3, 0);
+        BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
+        ALSL_D(x3, x2, x3, 3);
+        LD_D(x3, x3, 0);
+        BSTRPICK_D(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0);
+        ALSL_D(x3, x2, x3, 3);
+        LD_D(x2, x3, 0);
     } else {
+        NOTEST(x2);
         uintptr_t p = getJumpTableAddress64(ip);
         MAYUSE(p);
         TABLE64(x3, p);
@@ -439,33 +428,18 @@ void ret_to_epilog(dynarec_la64_t* dyn, int ninst, rex_t rex)
     uintptr_t tbl = rex.is32bits ? getJumpTable32() : getJumpTable64();
     MOV64x(x3, tbl);
     if (!rex.is32bits) {
-        SRLI_D(x2, xRIP, JMPTABL_START3);
-        SLLI_D(x2, x2, 3);
-        ADD_D(x3, x3, x2);
+        BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
+        ALSL_D(x3, x2, x3, 3);
         LD_D(x3, x3, 0);
     }
-    MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-    SRLI_D(x2, xRIP, JMPTABL_START2 - 3);
-    AND(x2, x2, x4);
-    ADD_D(x3, x3, x2);
+    BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+    ALSL_D(x3, x2, x3, 3);
     LD_D(x3, x3, 0);
-    if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
-        MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
-    }
-    SRLI_D(x2, xRIP, JMPTABL_START1 - 3);
-    AND(x2, x2, x4);
-    ADD_D(x3, x3, x2);
+    BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
+    ALSL_D(x3, x2, x3, 3);
     LD_D(x3, x3, 0);
-    if (JMPTABLE_MASK0 < 2048) {
-        ANDI(x2, xRIP, JMPTABLE_MASK0);
-    } else {
-        if (JMPTABLE_MASK1 != JMPTABLE_MASK0) {
-            MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
-        }
-        AND(x2, xRIP, x4);
-    }
-    SLLI_D(x2, x2, 3);
-    ADD_D(x3, x3, x2);
+    BSTRPICK_D(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0);
+    ALSL_D(x3, x2, x3, 3);
     LD_D(x2, x3, 0);
     BR(x2); // save LR
     CLEARIP();
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index e3c0c874..ea2b155f 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -134,12 +134,7 @@
             wb2 = (wback >> 2) * 8;                                                             \
             wback = TO_LA64((wback & 3));                                                       \
         }                                                                                       \
-        if (wb2) {                                                                              \
-            MV(i, wback);                                                                       \
-            SRLI_D(i, i, wb2);                                                                  \
-            ANDI(i, i, 0xff);                                                                   \
-        } else                                                                                  \
-            ANDI(i, wback, 0xff);                                                               \
+        BSTRPICK_D(i, wback, wb2 + 7, wb2);                                                     \
         wb1 = 0;                                                                                \
         ed = i;                                                                                 \
     } else {                                                                                    \
@@ -161,42 +156,18 @@
         gb1 = TO_LA64((gd & 3));                              \
     }                                                         \
     gd = i;                                                   \
-    if (gb2) {                                                \
-        MV(gd, gb1);                                          \
-        SRLI_D(gd, gd, 8);                                    \
-        ANDI(gd, gd, 0xff);                                   \
-    } else                                                    \
-        ANDI(gd, gb1, 0xff);
+    BSTRPICK_D(gd, gb1, gb2 + 7, gb2);
 
 // Write gb (gd) back to original register / memory, using s1 as scratch
-#define GBBACK(s1)                        \
-    if (gb2) {                            \
-        MOV64x(s1, 0xffffffffffff00ffLL); \
-        AND(gb1, gb1, s1);                \
-        SLLI_D(s1, gd, 8);                \
-        OR(gb1, gb1, s1);                 \
-    } else {                              \
-        ADDI_W(s1, xZR, 0xf00);           \
-        AND(gb1, gb1, s1);                \
-        OR(gb1, gb1, gd);                 \
-    }
+#define GBBACK(s1) BSTRINS_D(gb1, gd, gb2 + 7, gb2);
 
 // Write eb (ed) back to original register / memory, using s1 as scratch
-#define EBBACK(s1, c)                     \
-    if (wb1) {                            \
-        SUB_D(ed, wback, fixedaddress);   \
-        SMWRITE();                        \
-    } else if (wb2) {                     \
-        MOV64x(s1, 0xffffffffffff00ffLL); \
-        AND(wback, wback, s1);            \
-        if (c) { ANDI(ed, ed, 0xff); }    \
-        SLLI_D(s1, ed, 8);                \
-        OR(wback, wback, s1);             \
-    } else {                              \
-        ADDI_W(s1, xZR, 0xf00);           \
-        AND(wback, wback, s1);            \
-        if (c) { ANDI(ed, ed, 0xff); }    \
-        OR(wback, wback, ed);             \
+#define EBBACK(s1, c)                       \
+    if (wb1) {                              \
+        SUB_D(ed, wback, fixedaddress);     \
+        SMWRITE();                          \
+    } else {                                \
+        BSTRINS_D(wback, ed, wb2 + 7, wb2); \
     }
 
 // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1)
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index a53ccd47..c72178ce 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -113,9 +113,6 @@ f24-f31  fs0-fs7   Static registers                Callee
 #define SPLIT20(A) (((A) + 0x800) >> 12)
 #define SPLIT12(A) ((A) & 0xfff)
 
-// ZERO the upper part
-#define ZEROUP(r) AND(r, r, xMASK);
-
 // Standard formats
 #define type_4R(opc, ra, rk, rj, rd)     ((opc) << 20 | (ra) << 15 | (rk) << 10 | (rj) << 5 | (rd))
 #define type_3R(opc, rk, rj, rd)         ((opc) << 15 | (rk) << 10 | (rj) << 5 | (rd))
@@ -131,10 +128,10 @@ f24-f31  fs0-fs7   Static registers                Callee
 #define type_I26(opc, imm26)             ((opc) << 26 | ((imm26) & 0xFFFF) << 10 | ((imm26 >> 16) & 0x3FF))
 
 // Made-up formats not found in the spec.
-#define type_2RI3(opc, imm3, rj, rd)     ((opc) << 13 | ((imm3) & 0x7 ) << 10 | (rj) << 5 | (rd))
-#define type_2RI4(opc, imm4, rj, rd)     ((opc) << 14 | ((imm4) & 0xF ) << 10 | (rj) << 5 | (rd))
-#define type_2RI5(opc, imm5, rj, rd)     ((opc) << 15 | ((imm5) & 0x1F) << 10 | (rj) << 5 | (rd))
-#define type_2RI6(opc, imm6, rj, rd)     ((opc) << 16 | ((imm6) & 0x3F) << 10 | (rj) << 5 | (rd))
+#define type_2RI3(opc, imm3, rj, rd)     ((opc) << 13 | ((imm3)  & 0x7 )  << 10 | (rj) << 5 | (rd))
+#define type_2RI4(opc, imm4, rj, rd)     ((opc) << 14 | ((imm4)  & 0xF )  << 10 | (rj) << 5 | (rd))
+#define type_2RI5(opc, imm5, rj, rd)     ((opc) << 15 | ((imm5)  & 0x1F)  << 10 | (rj) << 5 | (rd))
+#define type_2RI6(opc, imm6, rj, rd)     ((opc) << 16 | ((imm6)  & 0x3F)  << 10 | (rj) << 5 | (rd))
 
 // tmp = GR[rj][31:0] + GR[rk][31:0]
 // Gr[rd] = SignExtend(tmp[31:0], GRLEN)
@@ -159,15 +156,15 @@ f24-f31  fs0-fs7   Static registers                Callee
 // GR[rd] = tmp[63:0]
 #define ADDU16I_D(rd, rj, imm16) EMIT(type_2RI16(0b000100, imm16, rj, rd))
 
-// tmp = (GR[rj][31:0] << (imm2 + 1)) + GR[rk][31:0]
+// tmp = (GR[rj][31:0] << imm) + GR[rk][31:0]
 // GR[rd] = SignExtend(tmp[31:0], GRLEN)
-#define ALSL_W(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000000010, imm2, rk, rj, rd))
-// tmp = (GR[rj][31:0] << (imm2 + 1)) + GR[rk][31:0]
+#define ALSL_W(rd, rj, rk, imm) EMIT(type_3RI2(0b000000000000010, (imm - 1), rk, rj, rd))
+// tmp = (GR[rj][31:0] << imm) + GR[rk][31:0]
 // GR[rd] = ZeroExtend(tmp[31:0], GRLEN)
-#define ALSL_WU(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000000011, imm2, rk, rj, rd))
-// tmp = (GR[rj][63:0] << (imm2 + 1)) + GR[rk][63:0]
+#define ALSL_WU(rd, rj, rk, imm) EMIT(type_3RI2(0b000000000000011, (imm - 1), rk, rj, rd))
+// tmp = (GR[rj][63:0] << imm) + GR[rk][63:0]
 // GR[rd] = tmp[63:0]
-#define ALSL_D(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000010110, imm2, rk, rj, rd))
+#define ALSL_D(rd, rj, rk, imm) EMIT(type_3RI2(0b000000000010110, (imm - 1), rk, rj, rd))
 
 // GR[rd] = SignExtend({imm20, 12'b0}, GRLEN)
 #define LU12I_W(rd, imm20) EMIT(type_1RI20(0b0001010, imm20, rd))
@@ -264,9 +261,29 @@ f24-f31  fs0-fs7   Static registers                Callee
         ADD_D(rd, rs1, scratch);           \
     }
 
-
 #define SEXT_W(rd, rs1) SLLI_W(rd, rs1, 0)
 
+// bstr32[31:msbw+1] = GR[rd][31: msbw+1]
+// bstr32[msbw:lsbw] = GR[rj][msbw-lsbw:0]
+// bstr32[lsbw-1:0] = GR[rd][lsbw-1:0]
+// GR[rd] = SignExtend(bstr32[31:0], GRLEN)
+#define BSTRINS_W(rd, rj, msbw5, lsbw5) EMIT(type_2RI12(0b0000000001, 0b100000000000 | (msbw5 & 0x1F) << 6 | (lsbw5 & 0x1F), rj, rd))
+
+// GR[rd][63:msbd+1] = GR[rd][63:msbd+1]
+// GR[rd][msbd:lsbd] = GR[rj][msbd-lsbd:0]
+// GR[rd][lsbd-1:0] = GR[rd][lsbd-1:0]
+#define BSTRINS_D(rd, rj, msbd6, lsbd6) EMIT(type_2RI12(0b0000000010, (msbd6 & 0x3F) << 6 | (lsbd6 & 0x3F), rj, rd))
+
+// bstr32[31:0] = ZeroExtend(GR[rj][msbw:lsbw], 32)
+// GR[rd] = SignExtend(bstr32[31:0], GRLEN)
+#define BSTRPICK_W(rd, rj, msbw5, lsbw5) EMIT(type_2RI12(0b0000000001, 0b100000100000 | (msbw5 & 0x1F) << 6 | (lsbw5 & 0x1F), rj, rd))
+
+// GR[rd] = ZeroExtend(GR[rj][msbd:lsbd], 64)
+#define BSTRPICK_D(rd, rj, msbd6, lsbd6) EMIT(type_2RI12(0b0000000011, (msbd6 & 0x3F) << 6 | (lsbd6 & 0x3F), rj, rd))
+
+// ZERO the upper part
+#define ZEROUP(rd) BSTRINS_D(rd, xZR, 63, 32);
+
 // if GR[rj] == GR[rd]:
 //     PC = PC + SignExtend({imm16, 2'b0}, GRLEN)
 #define BEQ(rj, rd, imm18) EMIT(type_2RI16(0b010110, ((imm18)>>2), rj, rd))
diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c
index cdb9a4c6..385be055 100644
--- a/src/dynarec/la64/la64_printer.c
+++ b/src/dynarec/la64/la64_printer.c
@@ -255,6 +255,26 @@ const char* la64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "%-15s %s, %s, %u", "ROTRI.W", Xt[Rd], Xt[Rj], imm);
         return buff;
     }
+    // BSTRINS.W
+    if(isMask(opcode, "00000000011uuuuu0iiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %u, %u", "BSTRINS.W", Xt[Rd], Xt[Rj], imm_up, imm);
+        return buff;
+    }
+    // BSTRINS.D
+    if(isMask(opcode, "0000000010uuuuuuiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %u, %u", "BSTRINS.D", Xt[Rd], Xt[Rj], imm_up, imm);
+        return buff;
+    }
+    // BSTRPICK.W
+    if(isMask(opcode, "00000000011uuuuu1iiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %u, %u", "BSTRPICK.W", Xt[Rd], Xt[Rj], imm_up, imm);
+        return buff;
+    }
+    // BSTRPICK.D
+    if(isMask(opcode, "0000000011uuuuuuiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %u, %u", "BSTRPICK.D", Xt[Rd], Xt[Rj], imm_up, imm);
+        return buff;
+    }
     // BEQ
     if(isMask(opcode, "010110iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
         snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "BEQ", Xt[Rd], Xt[Rj], imm << 2);