diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-03-31 16:26:34 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-03-31 16:26:34 +0200 |
| commit | 139509024113e7303fc8c55854d1509eadf52da5 (patch) | |
| tree | d33b7b0b2df19452d2def6d50728ec98e0683736 /src | |
| parent | dc938e8a60c7a466bb8b25fc465e6dedaff4b183 (diff) | |
| download | box64-139509024113e7303fc8c55854d1509eadf52da5.tar.gz box64-139509024113e7303fc8c55854d1509eadf52da5.zip | |
[DYNAREC] Added 66 0F 38 04 opcode
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 52 | ||||
| -rwxr-xr-x | src/dynarec/arm64_printer.c | 56 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_660f.c | 19 |
3 files changed, 126 insertions, 1 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 364dda4d..a3394276 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -629,6 +629,20 @@ // imm16 must be 4-aligned #define VSTR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b00, 0b10, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt)) +#define VMEMUR_vector(size, opc, imm9, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (Rn)<<5 | (Rt)) +// signed offset, no alignement! +#define VLDR8_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b01, (imm9)&0b111111111, Rn, Vt)) +#define VLDR16_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b01, 0b01, (imm9)&0b111111111, Rn, Vt)) +#define VLDR32_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b10, 0b01, (imm9)&0b111111111, Rn, Vt)) +#define VLDR64_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b11, 0b01, (imm9)&0b111111111, Rn, Vt)) +#define VLDR128_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b11, (imm9)&0b111111111, Rn, Vt)) +// signed offset, no alignement! +#define VSTR8_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b00, (imm9)&0b111111111, Rn, Vt)) +#define VSTR16_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b01, 0b00, (imm9)&0b111111111, Rn, Vt)) +#define VSTR32_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b10, 0b00, (imm9)&0b111111111, Rn, Vt)) +#define VSTR64_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b11, 0b00, (imm9)&0b111111111, Rn, Vt)) +#define VSTR128_I9(Vt, Rn, imm9) EMIT(VMEMUR(0b00, 0b10, (imm9)&0b111111111, Rn, Vt)) + #define VMEMW_gen(size, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) #define VLDR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt)) #define VLDR64_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b11, Rn, Rt)) @@ -1243,6 +1257,15 @@ #define VUMULL2_16(Rd, Rn, Rm) EMIT(MULL_vector(1, 1, 0b01, Rm, Rn, Rd)) #define VUMULL2_32(Rd, Rn, Rm) EMIT(MULL_vector(1, 1, 0b10, Rm, Rn, Rd)) +// MUL +#define MUL_vector(Q, size, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define VMUL_8(Vd, Vn, Vm) EMIT(MUL_vector(0, 0b00, Vm, Vn, Vd)) +#define VMUL_16(Vd, Vn, Vm) EMIT(MUL_vector(0, 0b01, Vm, Vn, Vd)) +#define VMUL_32(Vd, Vn, Vm) EMIT(MUL_vector(0, 0b10, Vm, Vn, Vd)) +#define VMULQ_8(Vd, Vn, Vm) EMIT(MUL_vector(1, 0b00, Vm, Vn, Vd)) +#define VMULQ_16(Vd, Vn, Vm) EMIT(MUL_vector(1, 0b01, Vm, Vn, Vd)) +#define VMULQ_32(Vd, Vn, Vm) EMIT(MUL_vector(1, 0b10, Vm, Vn, Vd)) + // Absolute Difference #define AD_vector(Q, U, size, Rm, ac, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b0111<<12 | (ac)<<11 | 1<<10 | (Rn)<<5 | (Rd)) // Signed Absolute Difference and accumulate @@ -1321,4 +1344,33 @@ #define MOVIQ_8(Rd, imm8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) #define MOVI_8(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) +// SHLL and eXtend Long +#define SHLL_vector(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b10100<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define USHLL2_8(Vd, Vn, imm) EMIT(SHLL_vector(1, 1, 0b0001, (imm)&0x7, Vn, Vd)) +#define USHLL_8(Vd, Vn, imm) EMIT(SHLL_vector(0, 1, 0b0001, (imm)&0x7, Vn, Vd)) +#define SSHLL2_8(Vd, Vn, imm) EMIT(SHLL_vector(1, 0, 0b0001, (imm)&0x7, Vn, Vd)) +#define SSHLL_8(Vd, Vn, imm) EMIT(SHLL_vector(0, 0, 0b0001, (imm)&0x7, Vn, Vd)) +#define USHLL2_16(Vd, Vn, imm) EMIT(SHLL_vector(1, 1, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) +#define USHLL_16(Vd, Vn, imm) EMIT(SHLL_vector(0, 1, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) +#define SSHLL2_16(Vd, Vn, imm) EMIT(SHLL_vector(1, 0, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) +#define SSHLL_16(Vd, Vn, imm) EMIT(SHLL_vector(0, 0, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd)) +#define USHLL2_32(Vd, Vn, imm) EMIT(SHLL_vector(1, 1, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) +#define USHLL_32(Vd, Vn, imm) EMIT(SHLL_vector(0, 1, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) +#define SSHLL2_32(Vd, Vn, imm) EMIT(SHLL_vector(1, 0, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) +#define SSHLL_32(Vd, Vn, imm) EMIT(SHLL_vector(0, 0, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd)) + +#define UXTL_8(Vd, Vn) USHLL_8(Vd, Vn, 0) +#define UXTL2_8(Vd, Vn) USHLL2_8(Vd, Vn, 0) +#define UXTL_16(Vd, Vn) USHLL_16(Vd, Vn, 0) +#define UXTL2_16(Vd, Vn) USHLL2_16(Vd, Vn, 0) +#define UXTL_32(Vd, Vn) USHLL_32(Vd, Vn, 0) +#define UXTL2_32(Vd, Vn) USHLL2_32(Vd, Vn, 0) + +#define SXTL_8(Vd, Vn) SSHLL_8(Vd, Vn, 0) +#define SXTL2_8(Vd, Vn) SSHLL2_8(Vd, Vn, 0) +#define SXTL_16(Vd, Vn) SSHLL_16(Vd, Vn, 0) +#define SXTL2_16(Vd, Vn) SSHLL2_16(Vd, Vn, 0) +#define SXTL_32(Vd, Vn) SSHLL_32(Vd, Vn, 0) +#define SXTL2_32(Vd, Vn) SSHLL2_32(Vd, Vn, 0) + #endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c index fd87692d..c47a0279 100755 --- a/src/dynarec/arm64_printer.c +++ b/src/dynarec/arm64_printer.c @@ -856,6 +856,13 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) return buff; } + // VMUL + if(isMask(opcode, "0Q001110ff1mmmmm100111nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + const char* Vd = Y[((sf)<<1) | a.Q]; + snprintf(buff, sizeof(buff), "VMUL V%d.%s, V%d.%s, V%d.%s", Rd, Vd, Rn, Vd, Rm, Vd); + return buff; + } // CMP if(isMask(opcode, "0Q101110ff1mmmmm100011nnnnnddddd", &a)) { const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; @@ -1157,7 +1164,9 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) if(isMask(opcode, "0QU01110ff1000000c1010nnnnnddddd", &a)) { const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "???"}; const char* Vd = Y[(sf<<1) | a.Q]; - snprintf(buff, sizeof(buff), "%cAD%cLP V%d.%s, V%d.%s", a.U?'U':'S', a.c?'A':'D', Rd, Vd, Rn, Vd); + const char* Z[] = {"4H", "8H", "2S", "4S", "1D", "2D", "??", "???"}; + const char* Va = Z[(sf<<1) | a.Q]; + snprintf(buff, sizeof(buff), "%cAD%cLP V%d.%s, V%d.%s", a.U?'U':'S', a.c?'A':'D', Rd, Va, Rn, Vd); return buff; } if(isMask(opcode, "0QU01110ff110000001110nnnnnddddd", &a)) { @@ -1196,6 +1205,32 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%s1 {V%d.%s}[%d], %s", a.L?"LD":"ST", Rd, Y[scale], idx, XtSp[Rt]); return buff; } + // LDUR/STUR + if(isMask(opcode, "ff111100cL0iiiiiiiii00nnnnnttttt", &a)) { + const char* Y[] = {"B", "H", "S", "D", "Q"}; + int sz = sf; + if(sz==0 && a.c) + sz = 4; + int offset = signExtend(imm, 9); + if(!offset) + snprintf(buff, sizeof(buff), "%sUR %s%d, [%s]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn]); + else + snprintf(buff, sizeof(buff), "%sUR %s%d, [%s, %+d]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn], imm); + return buff; + } + // LDR/STR vector immediate + if(isMask(opcode, "ff111101cLiiiiiiiiiiiinnnnnttttt", &a)) { + const char* Y[] = {"B", "H", "S", "D", "Q"}; + int sz = sf; + if(sz==0 && a.c) + sz = 4; + int offset = imm<<sz; + if(!offset) + snprintf(buff, sizeof(buff), "%sR %s%d, [%s]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn]); + else + snprintf(buff, sizeof(buff), "%sR %s%d, [%s, %+d]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn], imm); + return buff; + } // (S/U)QXT(U)N if(isMask(opcode, "0Q101110ff100001001010nnnnnddddd", &a)) { @@ -1215,6 +1250,25 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) return buff; } + // (S/U)SSHL(2) / (U/S)XTL(2) + if(isMask(opcode, "0QU011110hhhhiii101001nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "?", "??"}; + const char* Z[] = {"8H", "4S", "2D", "?"}; + int sz = 3; + if((a.h&0b1111)==0b0001) sz=0; + else if((a.h&0b1110)==0b0010) sz=1; + else if((a.h&0b1100)==0b0100) sz=2; + int sh=(((a.h)<<3)|(imm)) - (8<<sz); + const char* Vd = Y[(sz<<1)|a.Q]; + const char* Va = Z[sz]; + if(!sh) + snprintf(buff, sizeof(buff), "%cXTL%s V%d.%s, V%d.%s", a.U?'U':'S', a.Q?"2":"", Rd, Va, Rn, Vd); + else + snprintf(buff, sizeof(buff), "%cSHLL%s V%d.%s, V%d.%s, #%d", a.U?'U':'S', a.Q?"2":"", Rd, Va, Rn, Vd, sh); + return buff; + } + + snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); return buff; diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c index 98f44979..36071457 100755 --- a/src/dynarec/dynarec_arm64_660f.c +++ b/src/dynarec/dynarec_arm64_660f.c @@ -234,6 +234,25 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VTBLQ1_8(q0, q0, d0); break; + case 0x04: + INST_NAME("PMADDUBSW Gx,Ex"); + nextop = F8; + GETGX(q0); + GETEX(q1, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + UXTL_8(v0, q0); // this is unsigned, so 0 extended + SXTL_8(v1, q1); // this is signed + VMULQ_16(v0, v0, v1); + SADDLPQ_16(v1, v0); + UXTL2_8(v0, q0); // this is unsigned + SQXTN_16(q0, v1); // SQXTN reset the vector so need to grab the high part first + SXTL2_8(v1, q1); // this is signed + VMULQ_16(v0, v0, v1); + SADDLPQ_16(v0, v0); + SQXTN2_16(q0, v0); + break; + default: DEFAULT; } |