diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-08-29 23:16:39 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-29 17:16:39 +0200 |
| commit | 9820bee5e7e696e82a27d48051d526f289cc0d45 (patch) | |
| tree | 02f68d0a35fe924e0c2df7b61f8bb75635238c06 /src | |
| parent | b02302a5cfb248eb333b01747516e76959e8c68d (diff) | |
| download | box64-9820bee5e7e696e82a27d48051d526f289cc0d45.tar.gz box64-9820bee5e7e696e82a27d48051d526f289cc0d45.zip | |
[BOX32][LA64_DYNAREC] Added preliminary box32 support (#1773)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_64.c | 41 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_6664.c | 5 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_math.c | 8 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 32 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 84 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 26 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 6 |
7 files changed, 124 insertions, 78 deletions
diff --git a/src/dynarec/la64/dynarec_la64_64.c b/src/dynarec/la64/dynarec_la64_64.c index ae23ab2e..677b1633 100644 --- a/src/dynarec/la64/dynarec_la64_64.c +++ b/src/dynarec/la64/dynarec_la64_64.c @@ -319,9 +319,14 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (MODREG) { // reg <= reg MVxw(TO_LA64((nextop & 7) + (rex.b << 3)), gd); } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - ADD_D(x4, ed, x4); - SDxw(gd, x4, fixedaddress); + if (rex.is32bits) { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + ADDz(x4, ed, x4); + SDxw(gd, x4, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); + SDXxw(gd, ed, x4); + } SMWRITE2(); } break; @@ -334,9 +339,14 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MVxw(gd, TO_LA64((nextop & 7) + (rex.b << 3))); } else { // mem <= reg SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - ADD_D(x4, ed, x4); - LDxw(gd, x4, fixedaddress); + if (rex.is32bits) { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + ADDz(x4, ed, x4); + LDxw(gd, x4, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); + LDXxw(gd, ed, x4); + } } break; case 0xC6: @@ -356,14 +366,19 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MOV32w(x3, u8); BSTRINS_D(eb1, x3, eb2 * 8 + 7, eb2 * 8); } else { // mem <= u8 - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 1); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, 1); u8 = F8; if (u8) { ADDI_D(x3, xZR, u8); ed = x3; } else ed = xZR; - STX_B(x3, wback, x4); + if (rex.is32bits) { + ADDz(x4, wback, x4); + ST_B(ed, x4, 0); + } else { + STX_B(ed, wback, x4); + } SMWRITE2(); } break; @@ -376,15 +391,19 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = TO_LA64((nextop & 7) + (rex.b << 3)); MOV64xw(ed, i64); } else { // mem <= i32 - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 4); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, 4); i64 = F32S; if (i64) { MOV64xw(x3, i64); ed = x3; } else ed = xZR; - ADD_D(x4, wback, x4); - SDxw(ed, x4, fixedaddress); + if (rex.is32bits) { + ADDz(x4, wback, x4); + SDxw(ed, x4, 0); + } else { + SDXxw(ed, wback, x4); + } SMWRITE2(); } break; diff --git a/src/dynarec/la64/dynarec_la64_6664.c b/src/dynarec/la64/dynarec_la64_6664.c index 01db8021..b6efa1ee 100644 --- a/src/dynarec/la64/dynarec_la64_6664.c +++ b/src/dynarec/la64/dynarec_la64_6664.c @@ -54,10 +54,11 @@ uintptr_t dynarec64_6664(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int grab_segdata(dyn, addr, ninst, x4, seg); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + ADDz(x4, ed, x4); if (rex.w) { - LDX_D(gd, ed, x4); + LD_D(gd, x4, fixedaddress); } else { - LDX_HU(x1, ed, x4); + LD_HU(x1, x4, fixedaddress); BSTRINS_D(gd, x1, 15, 0); } } diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index fcf3230f..1e7fc3f9 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -134,11 +134,12 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i if (s1 == xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags == X_PEND)) { // special case when doing math on ESP and only PEND is needed: ignoring it! if (c >= -2048 && c < 2048) { - ADDIxw(s1, s1, c); + ADDI_D(s1, s1, c); } else { - MOV64xw(s2, c); - ADDxw(s1, s1, s2); + MOV64x(s2, c); + ADD_D(s1, s1, s2); } + if (!rex.w) { ZEROUP(s1); } return; } IFX(X_PEND | X_AF | X_CF | X_OF) @@ -642,6 +643,7 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i MOV64xw(s2, c); SUBxw(s1, s1, s2); } + if (!rex.w) { ZEROUP(s1); } return; } diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index 0458a93e..b9c7d4b1 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -201,13 +201,15 @@ static uintptr_t geted_32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_ int64_t tmp = F32S; if (sib_reg != 4) { if (tmp && ((tmp < -2048) || (tmp > maxval) || !i12)) { - MOV32w(scratch, tmp); + // no need to zero up, as we did it below + la64_move32(dyn, ninst, scratch, tmp, 0); if ((sib >> 6)) { SLLI_D(ret, TO_LA64(sib_reg), sib >> 6); ADD_W(ret, ret, scratch); } else { ADD_W(ret, TO_LA64(sib_reg), scratch); } + ZEROUP(ret); } else { if (sib >> 6) { SLLI_D(ret, TO_LA64(sib_reg), (sib >> 6)); @@ -233,6 +235,7 @@ static uintptr_t geted_32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_ } else { ADD_W(ret, TO_LA64(sib_reg2), TO_LA64(sib_reg)); } + ZEROUP(ret); } else { ret = TO_LA64(sib_reg2); } @@ -275,6 +278,7 @@ static uintptr_t geted_32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_ } else { ADD_W(ret, TO_LA64(sib_reg2), TO_LA64(sib_reg)); } + ZEROUP(ret); } else { ret = TO_LA64(sib_reg2); } @@ -298,8 +302,10 @@ static uintptr_t geted_32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_ scratch = TO_LA64((nextop & 0x07)); } ADDI_W(ret, scratch, i32); + ZEROUP(ret); } else { - MOV32w(scratch, i32); + // no need to zero up, as we did it below + la64_move32(dyn, ninst, scratch, i32, 0); if ((nextop & 7) == 4) { if (sib_reg != 4) { ADD_W(scratch, scratch, TO_LA64(sib_reg2)); @@ -317,6 +323,7 @@ static uintptr_t geted_32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_ PASS3(int tmp = TO_LA64((nextop & 0x07))); ADD_W(ret, tmp, scratch); } + ZEROUP(ret); } } } @@ -354,8 +361,10 @@ uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop if ((sib >> 6)) { SLLI_D(ret, TO_LA64(sib_reg), sib >> 6); ADD_W(ret, ret, scratch); - } else + } else { ADD_W(ret, TO_LA64(sib_reg), scratch); + } + ZEROUP(ret); } else { if (sib >> 6) SLLI_D(ret, TO_LA64(sib_reg), (sib >> 6)); @@ -377,17 +386,21 @@ uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop if ((sib >> 6)) { SLLI_D(ret, TO_LA64(sib_reg), (sib >> 6)); ADD_W(ret, ret, TO_LA64(sib_reg2)); - } else + } else { ADD_W(ret, TO_LA64(sib_reg2), TO_LA64(sib_reg)); + } + ZEROUP(ret); } else { ret = TO_LA64(sib_reg2); } } } else if ((nextop & 7) == 5) { uint32_t tmp = F32; - MOV32w(ret, tmp); + // no need to zero up, as we did it below + la64_move32(dyn, ninst, ret, tmp, 0); GETIP(addr + delta); ADD_W(ret, ret, xRIP); + ZEROUP(ret); switch (lock) { case 1: addLockAddress(addr + delta + tmp); break; case 2: @@ -420,8 +433,10 @@ uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop if (sib >> 6) { SLLI_D(ret, TO_LA64(sib_reg), (sib >> 6)); ADD_W(ret, ret, TO_LA64(sib_reg2)); - } else + } else { ADD_W(ret, TO_LA64(sib_reg2), TO_LA64(sib_reg)); + } + ZEROUP(ret); } else { ret = TO_LA64(sib_reg2); } @@ -443,8 +458,10 @@ uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop } else scratch = TO_LA64((nextop & 0x07) + (rex.b << 3)); ADDI_W(ret, scratch, i64); + ZEROUP(ret); } else { - MOV32w(scratch, i64); + // no need to zero up, as we did it below + la64_move32(dyn, ninst, scratch, i64, 0); if ((nextop & 7) == 4) { if (sib_reg != 4) { ADD_W(scratch, scratch, TO_LA64(sib_reg2)); @@ -461,6 +478,7 @@ uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop PASS3(int tmp = TO_LA64((nextop & 0x07) + (rex.b << 3))); ADD_W(ret, tmp, scratch); } + ZEROUP(ret); } } } diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 9c1f6aa5..ba43be46 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -151,15 +151,21 @@ #define GETEW(i, D) GETEWW(x3, i, D) // GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI -#define GETEDO(O, D) \ - if (MODREG) { \ - ed = TO_LA64((nextop & 7) + (rex.b << 3)); \ - wback = 0; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, D); \ - LDXxw(x1, wback, O); \ - ed = x1; \ +#define GETEDO(O, D) \ + if (MODREG) { \ + ed = TO_LA64((nextop & 7) + (rex.b << 3)); \ + wback = 0; \ + } else { \ + SMREAD(); \ + if (rex.is32bits) { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + ADDz(O, wback, O); \ + LD_WU(x1, O, fixedaddress); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, D); \ + LDXxw(x1, wback, O); \ + } \ + ed = x1; \ } // GETSED can use r1 for ed, and r2 for wback. ed will be sign extended! @@ -203,10 +209,15 @@ SMWRITE(); \ } -#define WBACKO(O) \ - if (wback) { \ - SDXxw(ed, wback, O); \ - SMWRITE2(); \ +#define WBACKO(O) \ + if (wback) { \ + if (rex.is32bits) { \ + ADDz(O, wback, O); \ + ST_W(ed, O, 0); \ + } else { \ + SDXxw(ed, wback, O); \ + } \ + SMWRITE2(); \ } // GETSEW will use i for ed, and can use r3 for wback. This is the Signed version @@ -298,27 +309,32 @@ BSTRPICK_D(gd, gb1, gb2 + 7, gb2); // GETEBO will use i for ed, i is also Offset, and can use r3 for wback. -#define GETEBO(i, D) \ - if (MODREG) { \ - if (rex.rex) { \ - wback = TO_LA64((nextop & 7) + (rex.b << 3)); \ - wb2 = 0; \ - } else { \ - wback = (nextop & 7); \ - wb2 = (wback >> 2) * 8; \ - wback = TO_LA64(wback & 3); \ - } \ - BSTRPICK_D(i, wback, wb2 + 7, wb2); \ - wb1 = 0; \ - ed = i; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ - ADD_D(x3, wback, i); \ - if (wback != x3) wback = x3; \ - LD_B(i, wback, fixedaddress); \ - wb1 = 1; \ - ed = i; \ +#define GETEBO(i, D) \ + if (MODREG) { \ + if (rex.rex) { \ + wback = TO_LA64((nextop & 7) + (rex.b << 3)); \ + wb2 = 0; \ + } else { \ + wback = (nextop & 7); \ + wb2 = (wback >> 2) * 8; \ + wback = TO_LA64(wback & 3); \ + } \ + BSTRPICK_D(i, wback, wb2 + 7, wb2); \ + wb1 = 0; \ + ed = i; \ + } else { \ + SMREAD(); \ + if (rex.is32bits) { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ + ADDz(x3, wback, i); \ + if (wback != x3) wback = x3; \ + LD_BU(i, wback, fixedaddress); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 0, D); \ + LDX_BU(i, wback, i); \ + } \ + wb1 = 1; \ + ed = i; \ } // Get GX as a quad (might use x1) diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 5f34cd14..5577221d 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -2006,20 +2006,6 @@ LSX instruction starts with V, LASX instruction starts with XV. //////////////////////////////////////////////////////////////////////////////// - -// GR[rd] = imm32 -#define MOV32w_(rd, imm32, zeroup) \ - do { \ - if (((uint32_t)(imm32)) > 0xfffu) { \ - LU12I_W(rd, (imm32) >> 12); \ - ORI(rd, rd, imm32); \ - if (zeroup && (int32_t)imm32 < 0) \ - ZEROUP(rd); \ - } else { \ - ORI(rd, xZR, imm32); \ - } \ - } while (0) - // MOV64x/MOV32w is quite complex, so use a function for this #define MOV64x(A, B) la64_move64(dyn, ninst, A, B) #define MOV32w(A, B) la64_move32(dyn, ninst, A, B, 1) @@ -2084,10 +2070,12 @@ LSX instruction starts with V, LASX instruction starts with XV. #define ADDz(rd, rj, rk) \ do { \ - if (rex.is32bits) \ - ADD_W(rd, rj, rk); \ - else \ + if (!rex.is32bits) \ ADD_D(rd, rj, rk); \ + else { \ + ADD_W(rd, rj, rk); \ + ZEROUP(rd); \ + } \ } while (0) #define LDxw(rd, rj, imm12) \ @@ -2183,12 +2171,12 @@ LSX instruction starts with V, LASX instruction starts with XV. #define PUSH1_32(reg) \ do { \ ST_W(reg, xRSP, -4); \ - ADDI_W(xRSP, xRSP, -4); \ + ADDI_D(xRSP, xRSP, -4); \ } while (0); #define POP1_32(reg) \ do { \ LD_WU(reg, xRSP, 0); \ - if (reg != xRSP) ADDI_W(xRSP, xRSP, 4); \ + if (reg != xRSP) ADDI_D(xRSP, xRSP, 4); \ } while (0); // POP reg diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 5b9a04eb..0585b98e 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -373,7 +373,8 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop } } else if((nextop&7)==5) { uint32_t tmp = F32; - MOV32w(ret, tmp); + // no need to zero up, as we did it below + rv64_move32(dyn, ninst, ret, tmp, 0); GETIP(addr+delta); ADDW(ret, ret, xRIP); ZEROUP(ret); @@ -434,7 +435,8 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop ADDIW(ret, scratch, i64); ZEROUP(ret); } else { - MOV32w(scratch, i64); + // no need to zero up, as we did it below + rv64_move32(dyn, ninst, scratch, i64, 0); if((nextop&7)==4) { if (sib_reg!=4) { ADDW(scratch, scratch, xRAX+sib_reg2); |