diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-23 20:23:04 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-23 20:23:04 +0200 |
| commit | b0727d6f2b4f2119e835019be2bab577293f0e4f (patch) | |
| tree | 216009dc61e2fe03d79e4d38f30afb1c820f57b2 /src | |
| parent | c66be063aa30b8ec8e3366fa7767596ef2ecc8bb (diff) | |
| download | box64-b0727d6f2b4f2119e835019be2bab577293f0e4f.tar.gz box64-b0727d6f2b4f2119e835019be2bab577293f0e4f.zip | |
[ARM64_DYNAREC] Some small optims to a few AVX opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_0f.c | 17 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 10 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c | 23 |
3 files changed, 44 insertions, 6 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c index cfe57d6f..4a048e00 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c @@ -640,8 +640,16 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int } else if(v2==v1 && (u8==0xe5)) { // easy special case VMOVQ(v0, v2); VMOVeS(v0, 0, v0, 1); + } else if(MODREG && u8==0x88) { + VUZP1Q_32(v0, v2, v1); + } else if(MODREG && u8==0xdd) { + VUZP2Q_32(v0, v2, v1); } else { - d0 = fpu_get_scratch(dyn, ninst); + if((v0==v1) || (v0==v2)) { + d0 = fpu_get_scratch(dyn, ninst); s0 = 0; + } else { + d0 = v0; s0 = 1; + } // first two elements from Vx for(int i=0; i<2; ++i) { VMOVeS(d0, i, v2, (u8>>(i*2))&3); @@ -658,7 +666,7 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int VLD1_32(d0, i, x2); } } - VMOVQ(v0, d0); + if(v0!=d0) VMOVQ(v0, d0); } if(vex.l) { if(MODREG) @@ -672,7 +680,12 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int } else if(v2==v1 && (u8==0xe5)) { VMOVQ(v0, v2); VMOVeS(v0, 0, v0, 1); + } else if(MODREG && u8==0x88) { + VUZP1Q_32(v0, v2, v1); + } else if(MODREG && u8==0xdd) { + VUZP2Q_32(v0, v2, v1); } else { + if(s0) d0 = v0; for(int i=0; i<2; ++i) { VMOVeS(d0, i, v2, (u8>>(i*2))&3); } diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index 2e23b0e1..ca141f08 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -770,7 +770,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, if(!vex.l) YMM0(gd); break; case 0x70: - INST_NAME("VPSHUFD Gx,Ex,Ib"); + INST_NAME("VPSHUFD Gx, Ex, Ib"); nextop = F8; if(MODREG) { u8 = F8; @@ -803,6 +803,14 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, VDUPQ_64(v0, v1, 1); } else if(u8==0xB1) { VREV64Q_32(v0, v1); + } else if(u8==0xFA) { + VZIP2Q_32(v0, v1, v1); + } else if(u8==0x50) { + VZIP1Q_32(v0, v1, v1); + } else if(u8==0xF5) { + VTRNQ2_32(v0, v1, v1); + } else if(u8==0xA0) { + VTRNQ1_32(v0, v1, v1); } else if(v0!=v1) { VMOVeS(v0, 0, v1, (u8>>(0*2))&3); VMOVeS(v0, 1, v1, (u8>>(1*2))&3); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c index ae065c3f..a79c95a0 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c @@ -115,6 +115,14 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } if(u8==0x00 || u8==0x55 || u8==0xAA || u8==0xFF) VDUPQ_32(v0, (v0==v1)?q1:v1, u8&3); + else if(u8==0x50) + VZIP1Q_32(v0, v1, v1); + else if(u8==0xFA) + VZIP2Q_32(v0, v1, v1); + else if(u8==0xA0) + VTRNQ1_32(v0, v1, v1); + else if(u8==0xF5) + VTRNQ2_32(v0, v1, v1); else for(int i=0; i<4; ++i) VMOVeS(v0, i, (v0==v1)?q1:v1, (u8>>(i*2))&3); } @@ -254,7 +262,9 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip nextop = F8; GETGX_empty_VXEX(q0, q2, q1, 1); u8 = F8; - if(q0==q1) { + if((u8&0xf)==0xf) { + if(q0!=q1) VMOVQ(q0, q1); + } else if(q0==q1) { for(int i=0; i<4; ++i) if(u8&(1<<i)) { VMOVeS(q0, i, q1, i); @@ -274,7 +284,9 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } if(vex.l) { GETGY_empty_VYEY(q0, q2, q1); - if(q0==q1) { + if((u8&0xf0)==0xf0) { + if(q0!=q1) VMOVQ(q0, q1); + } else if(q0==q1) { for(int i=0; i<4; ++i) if(u8&(1<<(i+4))) { VMOVeS(q0, i, q1, i); @@ -435,7 +447,12 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip u8 = F8; GETVX(v2, 0); GETGX_empty(v0); - GETGY_empty_VY(q0, q2, 0, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); + if(v0==v2 && u8==1) { + GETGY_empty(q0, 0, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); + q2 = q0; + } else { + GETGY_empty_VY(q0, q2, 0, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); + } if(MODREG) VMOVQ((u8&1)?q0:v0, v1); else |