diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-21 21:22:43 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-21 21:22:43 +0200 |
| commit | a3413ba96dc4ad4dc2c7c7780d399698285d5e23 (patch) | |
| tree | c0d10d4e1201799bfc053a3e31bbdcb32cbede98 /src | |
| parent | 2b625e4aa5d9d833f3aa68015e57601d310ea223 (diff) | |
| download | box64-a3413ba96dc4ad4dc2c7c7780d399698285d5e23.tar.gz box64-a3413ba96dc4ad4dc2c7c7780d399698285d5e23.zip | |
[ARM64_DYNAREC] Improved AVX.66.0F38 2E/2F to avoid segfault when mask is null
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 68 |
1 files changed, 59 insertions, 9 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 0bb1b37c..33fb9d48 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -548,36 +548,86 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip case 0x2E: INST_NAME("VMASKMOVPS Ex, Gx, Vx"); nextop = F8; - GETGXVXEX(v0, v2, v1, 0); q0 = fpu_get_scratch(dyn, ninst); - // create mask + GETVX(v2, 0); + GETGX(v0, 0); VSSHRQ_32(q0, v2, 31); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), 1); + } else { + WILLWRITE2(); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); + unscaled = 0; + v1 = fpu_get_scratch(dyn, ninst); + // check if mask as anything, else scipt the whole read/write to avoid a SEGFAULT. + // TODO: let a segfault trigger and check if the mask is null instead and ignore the segfault / actually triger: needs to implement SSE reg tracking first! + SQXTN_32(v1, q0); + VMOVQDto(x4, v1, 0); + CBZx(x4, 4+3*4); + VLDR128_U12(v1, ed, fixedaddress); + } + // create mask VBITQ(v1, v0, q0); if(!MODREG) { VSTR128_U12(v1, ed, fixedaddress); } - if(vex.l) { - GETGYVYEY(v0, v2, v1); + if(vex.l && !is_avx_zero(dyn, ninst, vex.v)) { + v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); + v0 = ymm_get_reg(dyn, ninst, x1, gd, 0, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); VSSHRQ_32(q0, v2, 31); + if(MODREG) + v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1, gd, vex.v, -1); + else { + SQXTN_32(v1, q0); + VMOVQDto(x4, v1, 0); + CBZx(x4, 4+3*4); + VLDR128_U12(v1, ed, fixedaddress+16); + } VBITQ(v1, v0, q0); - if(!MODREG) + if(!MODREG) { VSTR128_U12(v1, ed, fixedaddress+16); + } } break; case 0x2F: INST_NAME("VMASKMOVPD Ex, Gx, Vx"); nextop = F8; - GETGXVXEX(v0, v2, v1, 0); q0 = fpu_get_scratch(dyn, ninst); - // create mask + q1 = fpu_get_scratch(dyn, ninst); + GETVX(v2, 0); + GETGX(v0, 0); VSSHRQ_64(q0, v2, 63); + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), 1); + } else { + WILLWRITE2(); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); + unscaled = 0; + v1 = fpu_get_scratch(dyn, ninst); + // check if mask as anything, else scipt the whole read/write to avoid a SEGFAULT. + // TODO: let a segfault trigger and check if the mask is null instead and ignore the segfault / actually triger: needs to implement SSE reg tracking first! + SQXTN_32(q1, q0); + VMOVQDto(x4, q1, 0); + CBZx(x4, 4+3*4); + VLDR128_U12(v1, ed, fixedaddress); + } + // create mask VBITQ(v1, v0, q0); if(!MODREG) { VSTR128_U12(v1, ed, fixedaddress); } - if(vex.l) { - GETGYVYEY(v0, v2, v1); + if(vex.l && !is_avx_zero(dyn, ninst, vex.v)) { + v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); + v0 = ymm_get_reg(dyn, ninst, x1, gd, 0, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); VSSHRQ_64(q0, v2, 63); + if(MODREG) + v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1, gd, vex.v, -1); + else { + SQXTN_32(q1, q0); + VMOVQDto(x4, q1, 0); + CBZx(x4, 4+3*4); + VLDR128_U12(v1, ed, fixedaddress+16); + } VBITQ(v1, v0, q0); if(!MODREG) { VSTR128_U12(v1, ed, fixedaddress+16); |