diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-03-23 13:31:10 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-03-23 13:31:10 +0100 |
| commit | e1a2439fbfbfd1b9520767ff4036998dd5591a23 (patch) | |
| tree | 778255723281d2f3169573592c5def806fe41d22 /src | |
| parent | 9fb2cb86ed87804c41a68601d2aecf3409eb893e (diff) | |
| download | box64-e1a2439fbfbfd1b9520767ff4036998dd5591a23.tar.gz box64-e1a2439fbfbfd1b9520767ff4036998dd5591a23.zip | |
[DYNAREC] Added 66 0F 70 opcode (tricky one)
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 46 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_660f.c | 56 |
2 files changed, 102 insertions, 0 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 069d65d2..9b347af7 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -614,6 +614,21 @@ #define VSTR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt)) #define VSTR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt)) +#define LD1R_gen(Q, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | 0b110<<13 | (size)<<10 | (Rn)<<5 | (Rt)) +#define VLDQ1R_8(Rt, Rn) EMIT(LD1R_gen(1, 0b00, Rn, Rt)) +#define VLDQ1R_16(Rt, Rn) EMIT(LD1R_gen(1, 0b01, Rn, Rt)) +#define VLDQ1R_32(Rt, Rn) EMIT(LD1R_gen(1, 0b10, Rn, Rt)) +#define VLDQ1R_64(Rt, Rn) EMIT(LD1R_gen(1, 0b11, Rn, Rt)) +#define VLD1R_8(Rt, Rn) EMIT(LD1R_gen(0, 0b00, Rn, Rt)) +#define VLD1R_16(Rt, Rn) EMIT(LD1R_gen(0, 0b01, Rn, Rt)) +#define VLD1R_32(Rt, Rn) EMIT(LD1R_gen(0, 0b10, Rn, Rt)) + +#define LD1_single(Q, opcode, S, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | (opcode)<<13 | (S)<<12 | (size)<<10 | (Rn)<<5 | (Rt)) +#define VLD1_8(Rt, index, Rn) EMIT(LD1_single(((index)>>3)&1, 0b000, ((index)>>2)&1, (index)&3, Rn, Rt)) +#define VLD1_16(Rt, index, Rn) EMIT(LD1_single(((index)>>2)&1, 0b010, ((index)>>1)&1, ((index)&1)<<1, Rn, Rt)) +#define VLD1_32(Rt, index, Rn) EMIT(LD1_single(((index)>>1)&1, 0b100, ((index))&1, 0b00, Rn, Rt)) +#define VLD1_64(Rt, index, Rn) EMIT(LD1_single(((index))&1, 0b100, 0, 0b01, Rn, Rt)) + // LOGIC #define VLOGIC_gen(Q, opc2, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (opc2)<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd)) #define VEORQ(Vd, Vn, Vm) EMIT(VLOGIC_gen(1, 0b00, Vm, Vn, Vd)) @@ -941,4 +956,35 @@ #define VZIP1Q_64(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b11, Rm, 0, Rn, Rt)) #define VZIP2Q_64(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b11, Rm, 1, Rn, Rt)) +// TBL +#define TBL_gen(Q, Rm, len, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (Rm)<<16 | (len)<<13 | (op)<<12 | (Rn)<<5 | (Rd)) +//Use Rm[] to pick from Rn element and store in Rd. Out-of-range element gets 0 +#define VTBLQ1_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b00, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1 element and store in Rd. Out-of-range element gets 0 +#define VTBLQ2_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b01, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1, Rn+2 element and store in Rd. Out-of-range element gets 0 +#define VTBLQ3_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b10, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1, Rn+2, Rn+3 element and store in Rd. Out-of-range element gets 0 +#define VTBLQ4_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b11, 0, Rn, Rd)) +//Use Rm[] to pick from Rn element and store in Rd. Out-of-range element stay untouched +#define VTBXQ1_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b00, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1 element and store in Rd. Out-of-range element stay untouched +#define VTBXQ2_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b01, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1, Rn+2 element and store in Rd. Out-of-range element stay untouched +#define VTBXQ3_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b10, 0, Rn, Rd)) +//Use Rm[] to pick from Rn, Rn+1, Rn+2, Rn+3 element and store in Rd. Out-of-range element stay untouched +#define VTBXQ4_8(Rd, Rn, Rm) EMIT(TBL_gen(1, Rm, 0b11, 0, Rn, Rd)) + +// TRN +#define TRN_gen(Q, size, Rm, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<15 | 0b10<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VTRNQ1_64(Rd, Rn, Rm) EMIT(TRN_gen(1, 0b11, Rm, 0, Rn, Rm)) +#define VTRNQ1_32(Rd, Rn, Rm) EMIT(TRN_gen(1, 0b10, Rm, 0, Rn, Rm)) +#define VTRNQ1_16(Rd, Rn, Rm) EMIT(TRN_gen(1, 0b01, Rm, 0, Rn, Rm)) +#define VTRNQ1_8(Rd, Rn, Rm) EMIT(TRN_gen(1, 0b00, Rm, 0, Rn, Rm)) +#define VSWP(Rd, Rn) VTRNQ1_64(Rd, Rn, Rn) +#define VTRNQ2_64(Rd, Rn, Rm) EMIT(TRN_gen(1, 0b11, Rm, 1, Rn, Rm)) +#define VTRNQ2_32(Rd, Rn, Rm) EMIT(TRN_gen(1, 0b10, Rm, 1, Rn, Rm)) +#define VTRNQ2_16(Rd, Rn, Rm) EMIT(TRN_gen(1, 0b01, Rm, 1, Rn, Rm)) +#define VTRNQ2_8(Rd, Rn, Rm) EMIT(TRN_gen(1, 0b00, Rm, 1, Rn, Rm)) + #endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c index bdf29b01..4905ee36 100755 --- a/src/dynarec/dynarec_arm64_660f.c +++ b/src/dynarec/dynarec_arm64_660f.c @@ -223,6 +223,62 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VLDR128_U12(v0, ed, fixedaddress); } break; + case 0x70: + INST_NAME("PSHUFD Gx,Ex,Ib"); + nextop = F8; + GETG; + i32 = -1; + v0 = sse_get_reg(dyn, ninst, x1, gd); + if(MODREG) { + u8 = F8; + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + if(u8==0x4E) { + if(v0==v1) { + VSWP(v0, v0+1); + } else { + VMOVeD(v0, 0, v1, 1); + VMOVeD(v0, 1, v1, 0); + } + } else { + uint32_t swp[4] = { + (0)|(1<<8)|(2<<16)|(3<<24), + (4)|(5<<8)|(6<<16)|(7<<24), + (8)|(9<<8)|(10<<16)|(11<<24), + (12)|(13<<8)|(14<<16)|(15<<24) + }; + d0 = fpu_get_scratch(dyn); + if(v0==v1) { + q1 = fpu_get_scratch(dyn); + VMOVQ(q1, v1); + } else + q1 = v1; + MOV32w(x2, swp[(u8>>(0*2))&3]); + MOV32w(x3, swp[(u8>>(1*2))&3]); + VMOVQSfrom(d0, 0, x2); + VMOVQSfrom(d0, 1, x3); + MOV32w(x2, swp[(u8>>(2*2))&3]); + MOV32w(x3, swp[(u8>>(3*2))&3]); + VMOVQSfrom(d0, 2, x2); + VMOVQSfrom(d0, 3, x3); + VTBLQ1_8(v0, v1, d0); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1); + u8 = F8; + if (u8) { + for (int i=0; i<4; ++i) { + int32_t idx = (u8>>(i*2))&3; + if(idx!=i32) { + ADDx_U12(x2, ed, idx*4); + i32 = idx; + } + VLD1_32(v0, i, x2); + } + } else { + VLD1R_32(v0, ed); + } + } + break; case 0x7E: INST_NAME("MOVD Ed,Gx"); |