diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-04-27 01:13:06 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-26 19:13:06 +0200 |
| commit | c40096e128b9d03f56baae98f0ad6d3e85258c26 (patch) | |
| tree | 32ad3ba640d97d94cdf1cf519c7d23b303daace7 /src | |
| parent | 43d281740778f959f94165386545363591c2f8ea (diff) | |
| download | box64-c40096e128b9d03f56baae98f0ad6d3e85258c26.tar.gz box64-c40096e128b9d03f56baae98f0ad6d3e85258c26.zip | |
[LA64_DYNAREC] Added more opcodes (#1468)
* Added 0F BF MOVSX opcode * Added F7 /5 IMUL opcode * Added 86 XCHG opcode * Added 66 0F PADDQ opcode * Added 0F C6 SHUFPS opcode * Added 66 0F 69 PUNPCKHWD opcode * Added 66 0F DB PAND opcode * Test * Review * Review
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 42 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 25 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 22 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 79 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 17 |
5 files changed, 174 insertions, 11 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 706094db..3390c6ca 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -9,6 +9,7 @@ #include "dynarec.h" #include "emu/x64emu_private.h" #include "emu/x64run_private.h" +#include "la64_emitter.h" #include "x64run.h" #include "x64emu.h" #include "box64stack.h" @@ -726,6 +727,25 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(0); emit_test32(dyn, ninst, rex, ed, gd, x3, x4, x5); break; + case 0x86: + INST_NAME("(LOCK)XCHG Eb, Gb"); + nextop = F8; + if (MODREG) { + GETGB(x1); + GETEB(x2, 0); + BSTRINS_D(wback, gd, wb2 + 7, wb2); + BSTRINS_D(gb1, ed, gb2 + 7, gb2); + } else { + GETGB(x3); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); + // AMSWAP_DB_B(x1, gd, ed); + SMDMB(); + LD_BU(x1, ed, 0); + ST_B(gd, ed, 0); + SMDMB(); + BSTRINS_D(gb1, x1, gb2 + 7, gb2); + } + break; case 0x87: INST_NAME("(LOCK) XCHG Ed, Gd"); nextop = F8; @@ -1675,6 +1695,28 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni UFLAG_RES(xRAX); UFLAG_OP1(xRDX); break; + case 5: + INST_NAME("IMUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x2, rex.w ? d_imul64 : d_imul32); + GETSED(0); + if (rex.w) { + if (ed == xRDX) + gd = x3; + else + gd = xRDX; + MULH_D(gd, xRAX, ed); + MUL_D(xRAX, xRAX, ed); + if (gd != xRDX) { MV(xRDX, gd); } + } else { + ADDI_W(x3, xRAX, 0); // sign extend 32bits-> 64bits + MUL_D(xRDX, x3, ed); // 64 <- 32x32 + AND(xRAX, xRDX, xMASK); + SRLI_D(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + break; case 6: INST_NAME("DIV Ed"); SETFLAGS(X_ALL, SF_SET); diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index d16c8854..a540f714 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -491,6 +491,31 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } if (!rex.w) ZEROUP(gd); break; + case 0xBF: + INST_NAME("MOVSX Gd, Ew"); + nextop = F8; + GETGD; + if (MODREG) { + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + EXT_W_H(gd, ed); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, NULL, 1, 0); + LD_H(gd, ed, fixedaddress); + } + if (!rex.w) ZEROUP(gd); + break; + case 0xC6: + INST_NAME("SHUFPS Gx, Ex, Ib"); + nextop = F8; + GETGX(v0, 1); + GETEX(v1, 0, 1); + u8 = F8; + if (v0 != v1) { + VEXTRINS_D(v0, v1, 0x11); // v0[127:64] = v1[127:64] + } + VSHUF4I_W(v0, v0, u8); + break; case 0xC8: case 0xC9: case 0xCA: diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 6190108f..474bbfc2 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -8,6 +8,7 @@ #include "dynarec.h" #include "emu/x64emu_private.h" #include "emu/x64run_private.h" +#include "la64_emitter.h" #include "x64run.h" #include "x64emu.h" #include "box64stack.h" @@ -62,6 +63,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(q0, 0, 0); VILVL_H(v0, q0, v0); break; + case 0x69: + INST_NAME("PUNPCKHWD Gx,Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + VILVH_H(q0, q1, q0); + break; case 0x6C: INST_NAME("PUNPCKLQDQ Gx,Ex"); nextop = F8; @@ -140,6 +148,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } BSTRINS_D(gd, x1, 15, 0); break; + case 0xD4: + INST_NAME("PADDQ Gx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETEX(q0, 0, 0); + VADD_D(v0, v0, q0); + break; case 0xD6: INST_NAME("MOVQ Ex, Gx"); nextop = F8; @@ -154,6 +169,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int SMWRITE2(); } break; + case 0xDB: + INST_NAME("PAND Gx,Ex"); + nextop = F8; + GETGX(v0, 1); + GETEX(q0, 0, 0); + VAND_V(v0, v0, q0); + break; case 0xEF: INST_NAME("PXOR Gx,Ex"); nextop = F8; diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index a921643c..7860734a 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -685,6 +685,60 @@ f24-f31 fs0-fs7 Static registers Callee #define LDX_HU(rd, rj, rk) EMIT(type_3R(0b00111000001001000, rk, rj, rd)) #define LDX_WU(rd, rj, rk) EMIT(type_3R(0b00111000001010000, rk, rj, rd)) +// Beware the position of rj and rk are swapped in atomic instructions. +#define AMCAS_B(rd, rk, rj) EMIT(type_3R(0b00111000010110000, rk, rj, rd)) +#define AMCAS_H(rd, rk, rj) EMIT(type_3R(0b00111000010110001, rk, rj, rd)) +#define AMCAS_W(rd, rk, rj) EMIT(type_3R(0b00111000010110010, rk, rj, rd)) +#define AMCAS_D(rd, rk, rj) EMIT(type_3R(0b00111000010110011, rk, rj, rd)) +#define AMCAS_DB_B(rd, rk, rj) EMIT(type_3R(0b00111000010110100, rk, rj, rd)) +#define AMCAS_DB_H(rd, rk, rj) EMIT(type_3R(0b00111000010110101, rk, rj, rd)) +#define AMCAS_DB_W(rd, rk, rj) EMIT(type_3R(0b00111000010110110, rk, rj, rd)) +#define AMCAS_DB_D(rd, rk, rj) EMIT(type_3R(0b00111000010110111, rk, rj, rd)) +#define AMSWAP_B(rd, rk, rj) EMIT(type_3R(0b00111000010111000, rk, rj, rd)) +#define AMSWAP_H(rd, rk, rj) EMIT(type_3R(0b00111000010111001, rk, rj, rd)) +#define AMADD_B(rd, rk, rj) EMIT(type_3R(0b00111000010111010, rk, rj, rd)) +#define AMADD_H(rd, rk, rj) EMIT(type_3R(0b00111000010111011, rk, rj, rd)) +#define AMSWAP_DB_B(rd, rk, rj) EMIT(type_3R(0b00111000010111100, rk, rj, rd)) +#define AMSWAP_DB_H(rd, rk, rj) EMIT(type_3R(0b00111000010111101, rk, rj, rd)) +#define AMADD_DB_B(rd, rk, rj) EMIT(type_3R(0b00111000010111110, rk, rj, rd)) +#define AMADD_DB_H(rd, rk, rj) EMIT(type_3R(0b00111000010111111, rk, rj, rd)) +#define AMSWAP_W(rd, rk, rj) EMIT(type_3R(0b00111000011000000, rk, rj, rd)) +#define AMSWAP_D(rd, rk, rj) EMIT(type_3R(0b00111000011000001, rk, rj, rd)) +#define AMADD_W(rd, rk, rj) EMIT(type_3R(0b00111000011000010, rk, rj, rd)) +#define AMADD_D(rd, rk, rj) EMIT(type_3R(0b00111000011000011, rk, rj, rd)) +#define AMAND_W(rd, rk, rj) EMIT(type_3R(0b00111000011000100, rk, rj, rd)) +#define AMAND_D(rd, rk, rj) EMIT(type_3R(0b00111000011000101, rk, rj, rd)) +#define AMOR_W(rd, rk, rj) EMIT(type_3R(0b00111000011000110, rk, rj, rd)) +#define AMOR_D(rd, rk, rj) EMIT(type_3R(0b00111000011000111, rk, rj, rd)) +#define AMXOR_W(rd, rk, rj) EMIT(type_3R(0b00111000011001000, rk, rj, rd)) +#define AMXOR_D(rd, rk, rj) EMIT(type_3R(0b00111000011001001, rk, rj, rd)) +#define AMMAX_W(rd, rk, rj) EMIT(type_3R(0b00111000011001010, rk, rj, rd)) +#define AMMAX_D(rd, rk, rj) EMIT(type_3R(0b00111000011001011, rk, rj, rd)) +#define AMMIN_W(rd, rk, rj) EMIT(type_3R(0b00111000011001100, rk, rj, rd)) +#define AMMIN_D(rd, rk, rj) EMIT(type_3R(0b00111000011001101, rk, rj, rd)) +#define AMMAX_WU(rd, rk, rj) EMIT(type_3R(0b00111000011001110, rk, rj, rd)) +#define AMMAX_DU(rd, rk, rj) EMIT(type_3R(0b00111000011001111, rk, rj, rd)) +#define AMMIN_WU(rd, rk, rj) EMIT(type_3R(0b00111000011010000, rk, rj, rd)) +#define AMMIN_DU(rd, rk, rj) EMIT(type_3R(0b00111000011010001, rk, rj, rd)) +#define AMSWAP_DB_W(rd, rk, rj) EMIT(type_3R(0b00111000011010010, rk, rj, rd)) +#define AMSWAP_DB_D(rd, rk, rj) EMIT(type_3R(0b00111000011010011, rk, rj, rd)) +#define AMADD_DB_W(rd, rk, rj) EMIT(type_3R(0b00111000011010100, rk, rj, rd)) +#define AMADD_DB_D(rd, rk, rj) EMIT(type_3R(0b00111000011010101, rk, rj, rd)) +#define AMAND_DB_W(rd, rk, rj) EMIT(type_3R(0b00111000011010110, rk, rj, rd)) +#define AMAND_DB_D(rd, rk, rj) EMIT(type_3R(0b00111000011010111, rk, rj, rd)) +#define AMOR_DB_W(rd, rk, rj) EMIT(type_3R(0b00111000011011000, rk, rj, rd)) +#define AMOR_DB_D(rd, rk, rj) EMIT(type_3R(0b00111000011011001, rk, rj, rd)) +#define AMXOR_DB_W(rd, rk, rj) EMIT(type_3R(0b00111000011011010, rk, rj, rd)) +#define AMXOR_DB_D(rd, rk, rj) EMIT(type_3R(0b00111000011011011, rk, rj, rd)) +#define AMMAX_DB_W(rd, rk, rj) EMIT(type_3R(0b00111000011011100, rk, rj, rd)) +#define AMMAX_DB_D(rd, rk, rj) EMIT(type_3R(0b00111000011011101, rk, rj, rd)) +#define AMMIN_DB_W(rd, rk, rj) EMIT(type_3R(0b00111000011011110, rk, rj, rd)) +#define AMMIN_DB_D(rd, rk, rj) EMIT(type_3R(0b00111000011011111, rk, rj, rd)) +#define AMMAX_DB_WU(rd, rk, rj) EMIT(type_3R(0b00111000011100000, rk, rj, rd)) +#define AMMAX_DB_DU(rd, rk, rj) EMIT(type_3R(0b00111000011100001, rk, rj, rd)) +#define AMMIN_DB_WU(rd, rk, rj) EMIT(type_3R(0b00111000011100010, rk, rj, rd)) +#define AMMIN_DB_DU(rd, rk, rj) EMIT(type_3R(0b00111000011100011, rk, rj, rd)) + #define FLD_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101110, imm12, rj, fd)) #define FLD_S(fd, rj, imm12) EMIT(type_2RI12(0b0010101100, imm12, rj, fd)) #define FST_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101111, imm12, rj, fd)) @@ -801,16 +855,16 @@ LSX instruction starts with V, LASX instruction starts with XV. */ -#define VADD_B(vd, vj, vk) EMIT(type_3R(0b01110000000010100, vj, vj, vd)) -#define VADD_H(vd, vj, vk) EMIT(type_3R(0b01110000000010101, vj, vj, vd)) -#define VADD_W(vd, vj, vk) EMIT(type_3R(0b01110000000010110, vj, vj, vd)) -#define VADD_D(vd, vj, vk) EMIT(type_3R(0b01110000000010111, vj, vj, vd)) -#define VADD_Q(vd, vj, vk) EMIT(type_3R(0b01110001001011010, vj, vj, vd)) -#define VSUB_B(vd, vj, vk) EMIT(type_3R(0b01110000000011000, vj, vj, vd)) -#define VSUB_H(vd, vj, vk) EMIT(type_3R(0b01110000000011001, vj, vj, vd)) -#define VSUB_W(vd, vj, vk) EMIT(type_3R(0b01110000000011010, vj, vj, vd)) -#define VSUB_D(vd, vj, vk) EMIT(type_3R(0b01110000000011011, vj, vj, vd)) -#define VSUB_Q(vd, vj, vk) EMIT(type_3R(0b01110001001011011, vj, vj, vd)) +#define VADD_B(vd, vj, vk) EMIT(type_3R(0b01110000000010100, vk, vj, vd)) +#define VADD_H(vd, vj, vk) EMIT(type_3R(0b01110000000010101, vk, vj, vd)) +#define VADD_W(vd, vj, vk) EMIT(type_3R(0b01110000000010110, vk, vj, vd)) +#define VADD_D(vd, vj, vk) EMIT(type_3R(0b01110000000010111, vk, vj, vd)) +#define VADD_Q(vd, vj, vk) EMIT(type_3R(0b01110001001011010, vk, vj, vd)) +#define VSUB_B(vd, vj, vk) EMIT(type_3R(0b01110000000011000, vk, vj, vd)) +#define VSUB_H(vd, vj, vk) EMIT(type_3R(0b01110000000011001, vk, vj, vd)) +#define VSUB_W(vd, vj, vk) EMIT(type_3R(0b01110000000011010, vk, vj, vd)) +#define VSUB_D(vd, vj, vk) EMIT(type_3R(0b01110000000011011, vk, vj, vd)) +#define VSUB_Q(vd, vj, vk) EMIT(type_3R(0b01110001001011011, vk, vj, vd)) #define VSADD_B(vd, vj, vk) EMIT(type_3R(0b01110000010001100, vk, vj, vd)) #define VSADD_H(vd, vj, vk) EMIT(type_3R(0b01110000010001101, vk, vj, vd)) #define VSADD_W(vd, vj, vk) EMIT(type_3R(0b01110000010001110, vk, vj, vd)) @@ -1196,6 +1250,10 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VSHUF_H(vd, vj, vk) EMIT(type_3R(0b01110001011110101, vk, vj, vd)) #define VSHUF_W(vd, vj, vk) EMIT(type_3R(0b01110001011110110, vk, vj, vd)) #define VSHUF_D(vd, vj, vk) EMIT(type_3R(0b01110001011110111, vk, vj, vd)) +#define VSHUF4I_B(vd, vj, imm8) EMIT(type_2RI8(0b01110011100100, imm8, vj, vd)) +#define VSHUF4I_H(vd, vj, imm8) EMIT(type_2RI8(0b01110011100101, imm8, vj, vd)) +#define VSHUF4I_W(vd, vj, imm8) EMIT(type_2RI8(0b01110011100110, imm8, vj, vd)) +#define VSHUF4I_D(vd, vj, imm8) EMIT(type_2RI8(0b01110011100111, imm8, vj, vd)) #define VEXTRINS_D(vd, vj, imm8) EMIT(type_2RI8(0b01110011100000, imm8, vj, vd)) #define VEXTRINS_W(vd, vj, imm8) EMIT(type_2RI8(0b01110011100001, imm8, vj, vd)) #define VEXTRINS_H(vd, vj, imm8) EMIT(type_2RI8(0b01110011100010, imm8, vj, vd)) @@ -1203,7 +1261,6 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VLD(vd, rj, imm12) EMIT(type_2RI12(0b0010110000, imm12, rj, vd)) #define VST(vd, rj, imm12) EMIT(type_2RI12(0b0010110001, imm12, rj, vd)) - #define XVADD_B(vd, vj, vk) EMIT(type_3R(0b01110100000010100, vk, vj, vd)) #define XVADD_H(vd, vj, vk) EMIT(type_3R(0b01110100000010101, vk, vj, vd)) #define XVADD_W(vd, vj, vk) EMIT(type_3R(0b01110100000010110, vk, vj, vd)) diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 04dd5ccd..3d4cf991 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -2123,6 +2123,23 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "VSHUF.D", Vt[Rd], Vt[Rj], Vt[Rk]); return buff; } + if (isMask(opcode, "01110011100100iiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VSHUF4I.B", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110011100101iiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VSHUF4I.H", Vt[Rd], Vt[Rj], imm); + + return buff; + } + if (isMask(opcode, "01110011100110iiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VSHUF4I.W", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110011100111iiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VSHUF4I.D", Vt[Rd], Vt[Rj], imm); + return buff; + } if (isMask(opcode, "01110011100000iiiiiiiijjjjjddddd", &a)) { snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.D", Vt[Rd], Vt[Rj], signExtend(imm, 8)); return buff; |