diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-04-24 19:04:42 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-04-24 19:04:42 +0200 |
| commit | 4585b74310598068c36f9b9b30a940d7e96a1e1d (patch) | |
| tree | c7feb164f55ff84a8429cd0e1a0b9207191d9b6d /src | |
| parent | 768dfd37bb83c9214d05f70e035b140f7269e460 (diff) | |
| download | box64-4585b74310598068c36f9b9b30a940d7e96a1e1d.tar.gz box64-4585b74310598068c36f9b9b30a940d7e96a1e1d.zip | |
[ARM64_DYNAREC] Some optimisation to some (V)(P)BLEND* opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 41 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c | 43 |
2 files changed, 21 insertions, 63 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 41ff0349..f6b5bbe4 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -466,9 +466,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v1 = fpu_get_scratch(dyn, ninst); if(q0!=q1) { VSSHRQ_8(v1, v0, 7); // bit[7]-> bit[7..0] - VBICQ(q0, q0, v1); - VANDQ(v1, q1, v1); - VORRQ(q0, q0, v1); + VBITQ(q0, q1, v1); } break; @@ -1050,38 +1048,13 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x0E: INST_NAME("PBLENDW Gx, Ex, Ib"); nextop = F8; - GETGX(q0, 1); - GETEX(q1, 0, 1); + GETGX(v0, 1); + GETEX(v1, 0, 1); u8 = F8; - i32 = 0; - if(q0!=q1) - while(u8) { - if(u8&1) { - if(!(i32&1) && u8&2) { - if(!(i32&3) && (u8&0xf)==0xf) { - // whole 64bits - VMOVeD(q0, i32>>2, q1, i32>>2); - i32+=4; - u8>>=4; - } else { - // 32bits - VMOVeS(q0, i32>>1, q1, i32>>1); - i32+=2; - u8>>=2; - } - } else { - // 16 bits - VMOVeH(q0, i32, q1, i32); - i32++; - u8>>=1; - } - } else { - // nope - i32++; - u8>>=1; - } - - } + q0 = fpu_get_scratch(dyn, ninst); + MOVI_64(q0, u8); + SXTL_8(q0, q0); // expand 8bits to 16bits... + VBITQ(v0, v1, q0); break; case 0x0F: INST_NAME("PALIGNR Gx, Ex, Ib"); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c index 5830a2f2..549ec78d 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c @@ -343,12 +343,8 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip MOVI_64(q0, u8); SXTL_8(q0, q0); // expand 8bits to 16bits... } - if(v0==v1) { - VBIFQ(v0, v2, q0); - } else { - if(v0!=v2) VMOVQ(v0, v2); - VBITQ(v0, v1, q0); - } + if(v0!=v2) VBIFQ(v0, v2, q0); + if(v0!=v1) VBITQ(v0, v1, q0); } if(!vex.l) YMM0(gd); break; @@ -747,12 +743,8 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, u8, (MODREG)?((nextop&7)+(rex.b<<3)):-1); } VSSHRQ_32(q0, q1, 31); // create mask - if(v0==v1) - VBIFQ(v0, v2, q0); - else { - if(v0!=v2) VMOVQ(v0, v2); - VBITQ(v0, v1, q0); - } + if(v0!=v2) VBIFQ(v0, v2, q0); + if(v0!=v1) VBITQ(v0, v1, q0); } if(!vex.l) YMM0(gd); break; @@ -776,41 +768,34 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, u8, (MODREG)?((nextop&7)+(rex.b<<3)):-1); } VSSHRQ_64(q0, q1, 63); // create mask - if(v0==v1) - VBIFQ(v0, v2, q0); - else { - if(v0!=v2) VMOVQ(v0, v2); - VBITQ(v0, v1, q0); - } + if(v0!=v2) VBIFQ(v0, v2, q0); + if(v0!=v1) VBITQ(v0, v1, q0); } if(!vex.l) YMM0(gd); break; case 0x4C: - INST_NAME("VBLENDPVB Gx, Vx, Ex, XMMImm8"); + INST_NAME("VPBLENDVB Gx, Vx, Ex, XMMImm8"); nextop = F8; q0 = fpu_get_scratch(dyn, ninst); u8 = geted_ib(dyn, addr, ninst, nextop)>>4; + ed = (nextop&7)+(rex.b<<3); for(int l=0; l<1+vex.l; ++l) { if(!l) { q1 = sse_get_reg(dyn, ninst, x1, u8, 0); GETGX_empty_VXEX(v0, v2, v1, 1); F8; } else { - v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, u8, (MODREG)?((nextop&7)+(rex.b<<3)):-1); + v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, u8, (MODREG)?ed:-1); if(MODREG) - v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, vex.v, u8); + v1 = ymm_get_reg(dyn, ninst, x1, ed, 0, gd, vex.v, u8); else VLDR128_U12(v1, ed, fixedaddress+16); - q1 = ymm_get_reg(dyn, ninst, x1, u8, 0, vex.v, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1); - v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, u8, (MODREG)?((nextop&7)+(rex.b<<3)):-1); + q1 = ymm_get_reg(dyn, ninst, x1, u8, 0, vex.v, gd, (MODREG)?ed:-1); + v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, u8, (MODREG)?ed:-1); } VSSHRQ_8(q0, q1, 7); // create mask - if(v0==v1) - VBIFQ(v0, v2, q0); - else { - if(v0!=v2) VMOVQ(v0, v2); - VBITQ(v0, v1, q0); - } + if(v0!=v2) VBIFQ(v0, v2, q0); + if(v0!=v1) VBITQ(v0, v1, q0); } if(!vex.l) YMM0(gd); break; |