From 3c31ecb115cdc127a59efee3a8b6f14ed4d8bc9a Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 21 Mar 2021 11:03:08 +0100 Subject: [DYNAREC] Added 66 0F 6E/7E opcodes (firsts SSE one) --- src/dynarec/arm64_emitter.h | 59 +++++++++++++++++++++++++++++++--- src/dynarec/arm64_printer.c | 68 +++++++++++++++++++++++++++++++++++++++- src/dynarec/dynarec_arm64_660f.c | 58 ++++++++++++++++++++++++++++------ 3 files changed, 170 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 08117512..9bf56a5b 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -517,13 +517,13 @@ // VLDR #define VMEM_gen(size, opc, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | 0b01<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) // imm15 must be 3-aligned -#define VLDR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b01, ((imm15)>>3)&0xfff, Rn, Dt)) +#define VLDR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b01, ((uint32_t)((imm15)>>3))&0xfff, Rn, Dt)) // imm16 must be 4-aligned -#define VLDR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b11, 0b11, ((imm16)>>4)&0xfff, Rn, Qt)) -// imm15 must be 3-aligned -#define VSTR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b00, ((imm15)>>3)&0xfff, Rn, Dt)) +#define VLDR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b00, 0b11, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt)) +// (imm15) must be 3-aligned +#define VSTR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b00, ((uint32_t)(imm15>>3))&0xfff, Rn, Dt)) // imm16 must be 4-aligned -#define VSTR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b11, 0b10, ((imm16)>>4)&0xfff, Rn, Qt)) +#define VSTR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b00, 0b10, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt)) #define VMEMW_gen(size, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) #define VLDR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt)) @@ -547,5 +547,54 @@ #define VSTR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt)) #define VSTR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt)) +// LOGIC +#define VLOGIC_gen(Q, opc2, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (opc2)<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VEORQ(Vd, Vn, Vm) EMIT(VLOGIC_gen(1, 0b00, Vm, Vn, Vd)) +#define VEOR(Vd, Vn, Vm) EMIT(VLOGIC_gen(0, 0b00, Vm, Vn, Vd)) + +// FMOV +#define FMOV_general(sf, type, mode, opcode, Rn, Rd) ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (mode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd)) +// 32-bit to single-precision +#define FMOVSw(Sd, Wn) EMIT(FMOV_general(0, 0b00, 0b00, 0b111, Wn, Sd)) +// Single-precision to 32-bit +#define FMOVwS(Wd, Sn) EMIT(FMOV_general(0, 0b00, 0b00, 0b110, Sn, Wd)) +// 64-bit to double-precision +#define FMOVDx(Dd, Xn) EMIT(FMOV_general(1, 0b01, 0b00, 0b111, Xn, Dd)) +// 64-bit to top half of 128-bit +#define FMOVD1x(Vd, Xn) EMIT(FMOV_general(1, 0b10, 0b01, 0b111, Xn, Vd)) +// Double-precision to 64-bit +#define FMOVxD(Xd, Dn) EMIT(FMOV_general(1, 0b01, 0b00, 0b110, Dn, Xd)) +// Top half of 128-bit to 64-bit +#define FMOVxD1(Xd, Vn) EMIT(FMOV_general(1, 0b10, 0b01, ob110, Vn, Xd)) + +#define FMOV_register(type, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rn)<<5 | (Rd)) +#define FMOVS(Sd, Sn) EMIT(FMOV_register, 0b00, Sn, Sd) +#define FMOVD(Dd, Dn) EMIT(FMOV_register, 0b01, Dn, Dd) + +// VMOV +#define VMOV_element(imm5, imm4, Rn, Rd) (1<<30 | 1<<29 | 0b01110000<<21 | (imm5)<<16 | (imm4)<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VMOVeB(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<1) | 1, i2, Vn, Vd)) +#define VMOVeH(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<2) | 2, i2<<1, Vn, Vd)) +#define VMOVeS(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<3) | 4, i2<<2, Vn, Vd)) +#define VMOVeD(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<4) | 8, i2<<3, Vn, Vd)) + +#define VMOV_from(imm5, Rn, Rd) (1<<30 | 0<<29 | 0b01110000<<21 | (imm5)<<16 | 0b0011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VMOVQBfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<1) | 1, Wn, Vd)) +#define VMOVQHfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<2) | 2, Wn, Vd)) +#define VMOVQSfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<3) | 4, Wn, Vd)) +#define VMOVQDfrom(Vd, index, Xn) EMIT(VMOV_from(((index)<<4) | 8, Xn, Vd)) + +#define UMOV_gen(Q, imm5, Rn, Rd) ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 0b01<<13 | 1<<12 | 1<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VMOVQDto(Xd, Vn, index) EMIT(UMOV_gen(1, ((index)<<4) | 8, Vn, Xd)) +#define VMOVBto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<1) | 1, Vn, Wd)) +#define VMOVHto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<2) | 2, Vn, Wd)) +#define VMOVSto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<3) | 4, Vn, Wd)) + +// VORR +#define ORR_vector(Q, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | 0b10<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VORRQ(Vd, Vn, Vm) EMIT(ORR_vector(1, Vm, Vn, Vd)) +#define VORR(Dd, Dn, Dm) EMIT(ORR_vector(0, Dm, Dn, Dd)) +#define VMOVQ(Vd, Vn) EMIT(ORR_vector(1, Vn, Vn, Vd)) +#define VMOV(Dd, Dn) EMIT(ORR_vector(0, Dn, Dn, Dd)) #endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c index 00fd4476..ed179df0 100755 --- a/src/dynarec/arm64_printer.c +++ b/src/dynarec/arm64_printer.c @@ -16,7 +16,7 @@ static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", " #define abs(A) (((A)<0)?(-(A)):(A)) typedef struct arm64_print_s { - int N, S, U, L; + int N, S, U, L, Q; int t, n, m, d, t2, a; int f, c, o, h, p; int i, r, s; @@ -61,6 +61,7 @@ int isMask(uint32_t opcode, const char* mask, arm64_print_t *a) case 'S': a->S = (a->S<<1) | v; break; case 'U': a->U = (a->U<<1) | v; break; case 'L': a->L = (a->L<<1) | v; break; + case 'Q': a->Q = (a->Q<<1) | v; break; case 't': a->t = (a->t<<1) | v; break; case '2': a->t2 = (a->t2<<1) | v; break; case 'n': a->n = (a->n<<1) | v; break; @@ -113,6 +114,7 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) #define cond a.c #define immr a.r #define imms a.s + #define opc a.c if(isMask(opcode, "11010101000000110010000000011111", &a)) { snprintf(buff, sizeof(buff), "NOP"); return buff; @@ -704,6 +706,70 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) return buff; } + // ----------- NEON / FPU + + // VORR + if(isMask(opcode, "0Q001110101mmmmm000111nnnnnddddd", &a)) { + char q = a.Q?'Q':'D'; + if(Rn==Rm) + snprintf(buff, sizeof(buff), "VMOV %c%d, %c%d", q, Rd, q, Rn); + else + snprintf(buff, sizeof(buff), "VORR %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); + return buff; + } + + // VEOR + if(isMask(opcode, "0Q101110001mmmmm000111nnnnnddddd", &a)) { + char q = a.Q?'Q':'D'; + snprintf(buff, sizeof(buff), "VEOR %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm); + return buff; + } + + // INS + if(isMask(opcode, "01101110000rrrrr0ssss1nnnnnddddd", &a)) { + char s = '?'; + int idx1=0, idx2=0; + if(immr&1) {s='B'; idx1=(immr)>>1; idx2 = imms; } + else if((immr&3)==2) {s='H'; idx1=(immr)>>2; idx2=(imms)>>1;} + else if((immr&7)==4) {s='S'; idx1=(immr)>>3; idx2=(imms)>>2;} + else if((immr&15)==8) {s='D'; idx1=(immr)>>4; idx2=(imms)>>3;} + snprintf(buff, sizeof(buff), "INS V%d.%c[%d], V%d.%c[%d]", Rd, s, idx1, Rn, s, idx2); + return buff; + } + if(isMask(opcode, "01001110000rrrrr000111nnnnnddddd", &a)) { + char s = '?', R = 0; + int idx1=0; + if(immr&1) {s='B'; idx1=(immr)>>1; } + else if((immr&3)==2) {s='H'; idx1=(immr)>>2;} + else if((immr&7)==4) {s='S'; idx1=(immr)>>3;} + else if((immr&15)==8) {s='D'; idx1=(immr)>>4; R=1;} + snprintf(buff, sizeof(buff), "INS V%d.%c[%d], %s", Rd, s, idx1, R?Xt[Rn]:Wt[Rn]); + return buff; + } + + // LDR / STR + if(isMask(opcode, "ss111101cciiiiiiiiiiiinnnnnttttt", &a)) { + char s = '?'; + int size=imms; + int op=0; + if(size==0 && opc==1) {s='B';} + else if(size==1 && opc==1) {s='H';} + else if(size==2 && opc==1) {s='S';} + else if(size==3 && opc==1) {s='D';} + else if(size==0 && opc==3) {s='Q'; size = 4;} + else if(size==0 && opc==0) {s='B'; op=1;} + else if(size==1 && opc==0) {s='H'; op=1;} + else if(size==2 && opc==0) {s='S'; op=1;} + else if(size==3 && opc==0) {s='D'; op=1;} + else if(size==0 && opc==2) {s='Q'; op=1; size = 4;} + + int offset = imm<>3)+(rex.r<<3); \ +#define GETGX(a) \ + gd = ((nextop&0x38)>>3)+(rex.r<<3); \ a = sse_get_reg(dyn, ninst, x1, gd) uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) @@ -86,6 +86,46 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GOCOND(0x40, "CMOV", "Gw, Ew"); #undef GO + case 0x6E: + INST_NAME("MOVD Gx, Ed"); + nextop = F8; + gd = ((nextop&0x38)>>3)+(rex.r<<3); + GETED(0); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VEORQ(v0, v0, v0); // RAZ vector + if(rex.w) { + VMOVQDfrom(v0, 0, ed); + } else { + VMOVQSfrom(v0, 0, ed); + } + break; + + case 0x7E: + INST_NAME("MOVD Ed,Gx"); + nextop = F8; + gd = ((nextop&0x38)>>3)+(rex.r<<3); + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(rex.w) { + if((nextop&0xC0)==0xC0) { + ed = xRAX + (nextop&7) + (rex.b<<3); + VMOVQDto(ed, v0, 0); + } else { + VMOVQDto(x2, v0, 0); // to avoid Bus Error, using regular store + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + STRx_U12(x2, ed, fixedaddress); + } + } else { + if((nextop&0xC0)==0xC0) { + ed = xRAX + (nextop&7) + (rex.b<<3); + VMOVSto(ed, v0, 0); + } else { + VMOVSto(x2, v0, 0); // to avoid Bus Error, using regular store + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + STRw_U12(x2, ed, fixedaddress); + } + } + break; + case 0xA3: INST_NAME("BT Ew, Gw"); SETFLAGS(X_CF, SF_SET); -- cgit 1.4.1