diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-03-11 16:05:10 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-11 09:05:10 +0100 |
| commit | 90a6306bbe2c853221e18a9a4eb5f79e2dafc186 (patch) | |
| tree | e6d805b4ea5454305b69066b2e989798b0cac3b4 /src | |
| parent | a5a7d9382ec09c89779ae66f3af2ca2255ea3003 (diff) | |
| download | box64-90a6306bbe2c853221e18a9a4eb5f79e2dafc186.tar.gz box64-90a6306bbe2c853221e18a9a4eb5f79e2dafc186.zip | |
[LA64_DYNAREC] Added more opcodes (#1354)
* Added 38 CMP opcode * Added 0F B6 MOVZX opcode * Added 88 MOV opcode * Fixed 0F B6 MOVZX opcode * Added 81/83 LOCK ADD opcodes * Added 98 CWDE opcode * Rebase
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 10 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 20 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f0.c | 45 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 414 |
4 files changed, 313 insertions, 176 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 3402f014..0cb34c96 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -254,6 +254,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(0); emit_cmp32(dyn, ninst, rex, ed, gd, x3, x4, x5, x6); break; + case 0x3B: + INST_NAME("CMP Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_cmp32(dyn, ninst, rex, gd, ed, x3, x4, x5, x6); + break; case 0x3D: INST_NAME("CMP EAX, Id"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -515,7 +523,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BSTRINS_D(eb1, gd, eb2 * 8 + 7, eb2 * 8); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); - ST_B(gb1, ed, fixedaddress); + ST_B(gd, ed, fixedaddress); SMWRITELOCK(lock); } break; diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 6e90d230..ccd2c368 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -186,6 +186,26 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LD_D(xRDX, xEmu, offsetof(x64emu_t, regs[_DX])); LD_D(xRBX, xEmu, offsetof(x64emu_t, regs[_BX])); break; + case 0xB6: + INST_NAME("MOVZX Gd, Eb"); + nextop = F8; + GETGD; + if (MODREG) { + if (rex.rex) { + eb1 = TO_LA64((nextop & 7) + (rex.b << 3)); + eb2 = 0; + } else { + ed = (nextop & 7); + eb1 = TO_LA64(ed & 3); // Ax, Cx, Dx or Bx + eb2 = (ed & 4) >> 2; // L or H + } + BSTRPICK_D(gd, eb1, eb2 * 8 + 7, eb2 * 8); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LD_BU(gd, ed, fixedaddress); + } + break; case 0xB7: INST_NAME("MOVZX Gd, Ew"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c index adf6ce8f..9d1c19db 100644 --- a/src/dynarec/la64/dynarec_la64_f0.c +++ b/src/dynarec/la64/dynarec_la64_f0.c @@ -92,6 +92,51 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0x81: + case 0x83: + nextop = F8; + SMDMB(); + switch ((nextop >> 3) & 7) { + case 0: // ADD + if (opcode == 0x81) { + INST_NAME("LOCK ADD Ed, Id"); + } else { + INST_NAME("LOCK ADD Ed, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + if (MODREG) { + if (opcode == 0x81) + i64 = F32S; + else + i64 = F8S; + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode == 0x81) ? 4 : 1); + if (opcode == 0x81) + i64 = F32S; + else + i64 = F8S; + if (i64 < -2048 || i64 >= 2048) { + MOV64xw(x3, i64); + } + MARKLOCK; + LLxw(x1, wback, 0); + if (i64 >= -2048 && i64 < 2048) { + ADDIxw(x4, x1, i64); + } else { + ADDxw(x4, x1, x4); + } + SCxw(x4, wback, 0); + BEQZ_MARKLOCK(x4); + IFX(X_ALL | X_PEND) { + emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5, x6); + } + SMDMB(); + } + break; + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 51fbbf68..1d1968f2 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -277,29 +277,35 @@ f24-f31 fs0-fs7 Static registers Callee SLLI_W(rd, rs1, imm); \ } // Shift Right Logical Immediate -#define SRLIxw(rd, rs1, imm) \ - if (rex.w) { \ - SRLI_D(rd, rs1, imm); \ - } else { \ - SRLI_W(rd, rs1, imm); \ - } +#define SRLIxw(rd, rs1, imm) \ + do { \ + if (rex.w) { \ + SRLI_D(rd, rs1, imm); \ + } else { \ + SRLI_W(rd, rs1, imm); \ + } \ + } while (0) // Shift Right Arithmetic Immediate -#define SRAIxw(rd, rs1, imm) \ - if (rex.w) { \ - SRAI_D(rd, rs1, imm); \ - } else { \ - SRAI_W(rd, rs1, imm); \ - } +#define SRAIxw(rd, rs1, imm) \ + do { \ + if (rex.w) { \ + SRAI_D(rd, rs1, imm); \ + } else { \ + SRAI_W(rd, rs1, imm); \ + } \ + } while (0) // rd = rj + (rk << imm6) #define ADDSL(rd, rs1, rs2, imm6, scratch) \ - if (!(imm6)) { \ - ADD_D(rd, rs1, rs2); \ - } else { \ - SLLI_D(scratch, rs2, imm6); \ - ADD_D(rd, rs1, scratch); \ - } + do { \ + if (!(imm6)) { \ + ADD_D(rd, rs1, rs2); \ + } else { \ + SLLI_D(scratch, rs2, imm6); \ + ADD_D(rd, rs1, scratch); \ + } \ + } while (0) #define SEXT_W(rd, rs1) SLLI_W(rd, rs1, 0) @@ -324,6 +330,12 @@ f24-f31 fs0-fs7 Static registers Callee // ZERO the upper part #define ZEROUP(rd) BSTRINS_D(rd, xZR, 63, 32); +// GR[rd] = SignExtend(GR[rj][7:0], GRLEN) +#define EXT_W_B(rd, rj) EMIT(type_2R(0b0000000000000000010111, rj, rd)) + +// GR[rd] = SignExtend(GR[rj][15:0], GRLEN) +#define EXT_W_H(rd, rj) EMIT(type_2R(0b0000000000000000010110, rj, rd)) + // if GR[rj] == GR[rd]: // PC = PC + SignExtend({imm16, 2'b0}, GRLEN) #define BEQ(rj, rd, imm18) EMIT(type_2RI16(0b010110, ((imm18)>>2), rj, rd)) @@ -360,59 +372,72 @@ f24-f31 fs0-fs7 Static registers Callee // PC = PC + SignExtend({imm26, 2'b0}, GRLEN) #define B(imm28) EMIT(type_I26(0b010100, ((imm28)>>2))) -#define BEQ_safe(rj, rd, imm) \ - if ((imm) > -0x20000 && (imm) < 0x20000) { \ - BEQ(rj, rd, imm); \ - NOP(); \ - } else { \ - BNE(rj, rd, 8); \ - B(imm - 4); \ - } +#define BEQ_safe(rj, rd, imm) \ + if { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BEQ(rj, rd, imm); \ + NOP(); \ + } else { \ + BNE(rj, rd, 8); \ + B(imm - 4); \ + } \ + } \ + while (0) -#define BNE_safe(rj, rd, imm) \ - if ((imm) > -0x20000 && (imm) < 0x20000) { \ - BNE(rj, rd, imm); \ - NOP(); \ - } else { \ - BEQ(rj, rd, 8); \ - B(imm - 4); \ - } +#define BNE_safe(rj, rd, imm) \ + do { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BNE(rj, rd, imm); \ + NOP(); \ + } else { \ + BEQ(rj, rd, 8); \ + B(imm - 4); \ + } \ + } while (0) -#define BLT_safe(rj, rd, imm) \ - if ((imm) > -0x20000 && (imm) < 0x20000) { \ - BLT(rj, rd, imm); \ - NOP(); \ - } else { \ - BGE(rj, rd, 8); \ - B(imm - 4); \ - } +#define BLT_safe(rj, rd, imm) \ + do { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BLT(rj, rd, imm); \ + NOP(); \ + } else { \ + BGE(rj, rd, 8); \ + B(imm - 4); \ + } \ + } while (0) -#define BGE_safe(rj, rd, imm) \ - if ((imm) > -0x20000 && (imm) < 0x20000) { \ - BGE(rj, rd, imm); \ - NOP(); \ - } else { \ - BLT(rj, rd, 8); \ - B(imm - 4); \ - } +#define BGE_safe(rj, rd, imm) \ + do { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BGE(rj, rd, imm); \ + NOP(); \ + } else { \ + BLT(rj, rd, 8); \ + B(imm - 4); \ + } \ + } while (0) -#define BLTU_safe(rj, rd, imm) \ - if ((imm) > -0x20000 && (imm) < 0x20000) { \ - BLTU(rj, rd, imm); \ - NOP(); \ - } else { \ - BGEU(rj, rd, 8); \ - B(imm - 4); \ - } +#define BLTU_safe(rj, rd, imm) \ + do { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BLTU(rj, rd, imm); \ + NOP(); \ + } else { \ + BGEU(rj, rd, 8); \ + B(imm - 4); \ + } \ + } while (0) -#define BGEU_safe(rj, rd, imm) \ - if ((imm) > -0x20000 && (imm) < 0x20000) { \ - BGEU(rj, rd, imm); \ - NOP(); \ - } else { \ - BLTU(rj, rd, 8); \ - B(imm - 4); \ - } +#define BGEU_safe(rj, rd, imm) \ + do { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BGEU(rj, rd, imm); \ + NOP(); \ + } else { \ + BLTU(rj, rd, 8); \ + B(imm - 4); \ + } \ + } while (0) #define BEQZ_safe(rj, imm) \ do { \ @@ -499,11 +524,6 @@ f24-f31 fs0-fs7 Static registers Callee // MemoryStore(GR[rd][63:0], paddr, DOUBLEWORD) #define ST_D(rd, rj, imm12) EMIT(type_2RI12(0b0010100111, imm12, rj, rd)) -// GR[rd] = SignExtend(GR[rj][15:0], GRLEN) -#define EXT_W_H(rd, rj) EMIT(type_2R(0b10110, rj, rd)) -// GR[rd] = SignExtend(GR[rj][7:0], GRLEN) -#define EXT_W_B(rd, rj) EMIT(type_2R(0b10111, rj, rd)) - //////////////////////////////////////////////////////////////////////////////// // LBT extension instructions @@ -671,109 +691,148 @@ f24-f31 fs0-fs7 Static registers Callee // GR[rd] = imm32 -#define MOV32w_(rd, imm32, zeroup) \ - if (((uint32_t)(imm32)) > 0xfffu) { \ - LU12I_W(rd, (imm32) >> 12); \ - ORI(rd, rd, imm32); \ - if (zeroup && (int32_t)imm32 < 0) \ - ZEROUP(rd); \ - } else { \ - ORI(rd, xZR, imm32); \ - } +#define MOV32w_(rd, imm32, zeroup) \ + do { \ + if (((uint32_t)(imm32)) > 0xfffu) { \ + LU12I_W(rd, (imm32) >> 12); \ + ORI(rd, rd, imm32); \ + if (zeroup && (int32_t)imm32 < 0) \ + ZEROUP(rd); \ + } else { \ + ORI(rd, xZR, imm32); \ + } \ + } while (0) + #define MOV32w(rd, imm32) MOV32w_(rd, imm32, 1) // GR[rd] = imm64 -#define MOV64x(rd, imm64) \ - MOV32w_(rd, imm64, 0); \ - if (((uint64_t)(imm64)) > 0xffffffffu) { \ - LU32I_D(rd, ((uint64_t)(imm64)) >> 32); \ - LU52I_D(rd, rd, ((uint64_t)(imm64)) >> 52); \ - } +#define MOV64x(rd, imm64) \ + do { \ + MOV32w_(rd, imm64, 0); \ + if (((uint64_t)(imm64)) > 0xffffffffu) { \ + LU32I_D(rd, ((uint64_t)(imm64)) >> 32); \ + LU52I_D(rd, rd, ((uint64_t)(imm64)) >> 52); \ + } \ + } while (0) -#define MOV64xw(A, B) \ - if (rex.w) { \ - MOV64x(A, B); \ - } else { \ - MOV32w(A, B); \ - } -#define MOV64z(A, B) \ - if (rex.is32bits) { \ - MOV32w(A, B); \ - } else { \ - MOV64x(A, B); \ - } +#define MOV64xw(A, B) \ + do { \ + if (rex.w) { \ + MOV64x(A, B); \ + } else { \ + MOV32w(A, B); \ + } \ + } while (0) + +#define MOV64z(A, B) \ + do { \ + if (rex.is32bits) { \ + MOV32w(A, B); \ + } else { \ + MOV64x(A, B); \ + } \ + } while (0) // rd[63:0] = rj[63:0] (pseudo instruction) #define MV(rd, rj) ADDI_D(rd, rj, 0) // rd = rj (pseudo instruction) #define MVxw(rd, rj) \ - if (rex.w) { \ - MV(rd, rj); \ - } else { \ - AND(rd, rj, xMASK); \ - } + do { \ + if (rex.w) { \ + MV(rd, rj); \ + } else { \ + AND(rd, rj, xMASK); \ + } \ + } while (0) + // rd = rj (pseudo instruction) #define MVz(rd, rj) \ - if (rex.is32bits) { \ - AND(rd, rj, xMASK); \ - } else { \ - MV(rd, rj); \ - } + do { \ + if (rex.is32bits) { \ + AND(rd, rj, xMASK); \ + } else { \ + MV(rd, rj); \ + } \ + } while (0) -#define ADDIxw(rd, rj, imm12) \ - if (rex.w) \ - ADDI_D(rd, rj, imm12); \ - else \ - ADDI_W(rd, rj, imm12); -#define ADDIz(rd, rj, imm12) \ - if (rex.is32bits) \ - ADDI_W(rd, rj, imm12); \ - else \ - ADDI_D(rd, rj, imm12); - -#define ADDxw(rd, rj, rk) \ - if (rex.w) \ - ADD_D(rd, rj, rk); \ - else \ - ADD_W(rd, rj, rk); -#define ADDz(rd, rj, rk) \ - if (rex.is32bits) \ - ADD_W(rd, rj, rk); \ - else \ - ADD_D(rd, rj, rk); - -#define LDxw(rd, rj, imm12) \ - if (rex.w) \ - LD_D(rd, rj, imm12); \ - else \ - LD_WU(rd, rj, imm12); - -#define LDz(rd, rj, imm12) \ - if (rex.is32bits) \ - LD_WU(rd, rj, imm12); \ - else \ - LD_D(rd, rj, imm12); - -#define SDxw(rd, rj, imm12) \ - if (rex.w) \ - ST_D(rd, rj, imm12); \ - else \ - ST_W(rd, rj, imm12); -#define SDz(rd, rj, imm12) \ - if (rex.is32bits) \ - ST_W(rd, rj, imm12); \ - else \ - ST_D(rd, rj, imm12); - -#define SUBxw(rd, rj, rk) \ - if (rex.w) \ - SUB_D(rd, rj, rk); \ - else \ - SUB_W(rd, rj, rk); -#define SUBz(rd, rj, rk) \ - if (rex.is32bits) \ - SUB_W(rd, rj, rk); \ - else \ - SUB_D(rd, rj, rk); +#define ADDIxw(rd, rj, imm12) \ + do { \ + if (rex.w) \ + ADDI_D(rd, rj, imm12); \ + else \ + ADDI_W(rd, rj, imm12); \ + } while (0) + +#define ADDIz(rd, rj, imm12) \ + do { \ + if (rex.is32bits) \ + ADDI_W(rd, rj, imm12); \ + else \ + ADDI_D(rd, rj, imm12); \ + } while (0) + +#define ADDxw(rd, rj, rk) \ + do { \ + if (rex.w) \ + ADD_D(rd, rj, rk); \ + else \ + ADD_W(rd, rj, rk); \ + } while (0) + +#define ADDz(rd, rj, rk) \ + do { \ + if (rex.is32bits) \ + ADD_W(rd, rj, rk); \ + else \ + ADD_D(rd, rj, rk); \ + } while (0) + +#define LDxw(rd, rj, imm12) \ + do { \ + if (rex.w) \ + LD_D(rd, rj, imm12); \ + else \ + LD_WU(rd, rj, imm12); \ + } while (0) + +#define LDz(rd, rj, imm12) \ + do { \ + if (rex.is32bits) \ + LD_WU(rd, rj, imm12); \ + else \ + LD_D(rd, rj, imm12); \ + } while (0) + +#define SDxw(rd, rj, imm12) \ + do { \ + if (rex.w) \ + ST_D(rd, rj, imm12); \ + else \ + ST_W(rd, rj, imm12); \ + } while (0) + +#define SDz(rd, rj, imm12) \ + do { \ + if (rex.is32bits) \ + ST_W(rd, rj, imm12); \ + else \ + ST_D(rd, rj, imm12); \ + } while (0) + +#define SUBxw(rd, rj, rk) \ + do { \ + if (rex.w) \ + SUB_D(rd, rj, rk); \ + else \ + SUB_W(rd, rj, rk); \ + } while (0) + +#define SUBz(rd, rj, rk) \ + do { \ + if (rex.is32bits) \ + SUB_W(rd, rj, rk); \ + else \ + SUB_D(rd, rj, rk); \ + } while (0) // PUSH / POP reg[0:63] #define PUSH1(reg) \ @@ -800,19 +859,24 @@ f24-f31 fs0-fs7 Static registers Callee } while (0); // POP reg -#define POP1z(reg) \ - if (rex.is32bits) { \ - POP1_32(reg); \ - } else { \ - POP1(reg); \ - } +#define POP1z(reg) \ + do { \ + if (rex.is32bits) { \ + POP1_32(reg); \ + } else { \ + POP1(reg); \ + } \ + } while (0) + // PUSH reg -#define PUSH1z(reg) \ - if (rex.is32bits) { \ - PUSH1_32(reg); \ - } else { \ - PUSH1(reg); \ - } +#define PUSH1z(reg) \ + do { \ + if (rex.is32bits) { \ + PUSH1_32(reg); \ + } else { \ + PUSH1(reg); \ + } \ + } while (0) // DBAR hint #define DBAR(hint) EMIT(type_hint(0b00111000011100100, hint)) |