diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-05-01 10:45:00 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-05-01 10:45:00 +0200 |
| commit | 16b0922ec54fe8355b7288f262b5da011320523f (patch) | |
| tree | 811422732ba6e00a025863c45790753318ad1d99 /src | |
| parent | 9791016c74066e10f7a85773bf838340be75c427 (diff) | |
| download | box64-16b0922ec54fe8355b7288f262b5da011320523f.tar.gz box64-16b0922ec54fe8355b7288f262b5da011320523f.zip | |
[ARM_DYNAREC] Small improvments to VMASKMOVP[S/D] opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 163 |
1 files changed, 63 insertions, 100 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index b35446ab..b404f879 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -691,20 +691,17 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip q0 = fpu_get_scratch(dyn, ninst); VSSHRQ_32(q0, v2, 31); if (MODREG) { - v1 = sse_get_reg(dyn, ninst, x3, (nextop & 7) + (rex.b << 3), 0); - VANDQ(v0, v1, q0); + DEFAULT; + return addr; } else { VEORQ(v0, v0, v0); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); - if(ed!=x3) { - MOVz_REG(x3, ed); - ed = x3; - } VMOVSto(x4, q0, 0); CBZw(x4, 4+1*4); VLD1_32(v0, 0, ed); - ADDx_U12(ed, ed, 4); + ADDx_U12(x3, ed, 4); + ed = x3; VMOVSto(x4, q0, 1); CBZw(x4, 4+1*4); VLD1_32(v0, 1, ed); @@ -723,27 +720,22 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); VSSHRQ_32(q0, v2, 31); - if(MODREG) { - v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, vex.v, -1); - VANDQ(v0, v1, q0); - } else { - VEORQ(v0, v0, v0); - VMOVSto(x4, q0, 0); - CBZw(x4, 4+1*4); - VLD1_32(v0, 0, ed); - ADDx_U12(ed, ed, 4); - VMOVSto(x4, q0, 1); - CBZw(x4, 4+1*4); - VLD1_32(v0, 1, ed); - ADDx_U12(ed, ed, 4); - VMOVSto(x4, q0, 2); - CBZw(x4, 4+1*4); - VLD1_32(v0, 2, ed); - ADDx_U12(ed, ed, 4); - VMOVSto(x4, q0, 3); - CBZw(x4, 4+1*4); - VLD1_32(v0, 3, ed); - } + VEORQ(v0, v0, v0); + VMOVSto(x4, q0, 0); + CBZw(x4, 4+1*4); + VLD1_32(v0, 0, ed); + ADDx_U12(ed, ed, 4); + VMOVSto(x4, q0, 1); + CBZw(x4, 4+1*4); + VLD1_32(v0, 1, ed); + ADDx_U12(ed, ed, 4); + VMOVSto(x4, q0, 2); + CBZw(x4, 4+1*4); + VLD1_32(v0, 2, ed); + ADDx_U12(ed, ed, 4); + VMOVSto(x4, q0, 3); + CBZw(x4, 4+1*4); + VLD1_32(v0, 3, ed); } else YMM0(gd); break; case 0x2D: @@ -756,19 +748,16 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip VSSHRQ_64(q0, v2, 63); VEORQ(v0, v0, v0); if (MODREG) { - v1 = sse_get_reg(dyn, ninst, x3, (nextop & 7) + (rex.b << 3), 0); - VANDQ(v0, v1, q0); + DEFAULT; + return addr; } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); - if(ed!=x3) { - MOVz_REG(x3, ed); - ed = x3; - } VMOVQDto(x4, q0, 0); CBZx(x4, 4+1*4); VLD1_64(v0, 0, ed); - ADDx_U12(ed, ed, 8); + ADDx_U12(x3, ed, 8); + ed = x3; VMOVQDto(x4, q0, 1); CBZx(x4, 4+1*4); VLD1_64(v0, 1, ed); @@ -780,21 +769,13 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); VSSHRQ_64(q0, v2, 63); VEORQ(v0, v0, v0); - if(MODREG) - { - v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, vex.v, -1); - VANDQ(v0, v1, q0); - } - else - { - VMOVQDto(x4, q0, 0); - CBZx(x4, 4+1*4); - VLD1_64(v0, 0, ed); - ADDx_U12(ed, ed, 8); - VMOVQDto(x4, q0, 1); - CBZx(x4, 4+1*4); - VLD1_64(v0, 1, ed); - } + VMOVQDto(x4, q0, 0); + CBZx(x4, 4+1*4); + VLD1_64(v0, 0, ed); + ADDx_U12(ed, ed, 8); + VMOVQDto(x4, q0, 1); + CBZx(x4, 4+1*4); + VLD1_64(v0, 1, ed); } else YMM0(gd); break; case 0x2E: @@ -805,20 +786,17 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip GETGX(v0, 0); VSSHRQ_32(q0, v2, 31); if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), 1); - VBITQ(v1, v0, q0); + DEFAULT; + return addr; } else { addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); - if(ed!=x3) { - MOVz_REG(x3, ed); - ed = x3; - } // check if mask as anything, else skip the whole read/write to avoid a SEGFAULT. // TODO: let a segfault trigger and check if the mask is null instead and ignore the segfault / actually triger: needs to implement SSE reg tracking first! VMOVSto(x4, q0, 0); CBZw(x4, 4+1*4); VST1_32(v0, 0, ed); - ADDx_U12(ed, ed, 4); + ADDx_U12(x3, ed, 4); + ed = x3; VMOVSto(x4, q0, 1); CBZw(x4, 4+1*4); VST1_32(v0, 1, ed); @@ -838,29 +816,23 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); v0 = ymm_get_reg(dyn, ninst, x1, gd, 0, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); VSSHRQ_32(q0, v2, 31); - if(MODREG) { - v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1, gd, vex.v, -1); - VBITQ(v1, v0, q0); - } - else { - VMOVSto(x4, q0, 0); - CBZw(x4, 4+1*4); - VST1_32(v0, 0, ed); - ADDx_U12(ed, ed, 4); - VMOVSto(x4, q0, 1); - CBZw(x4, 4+1*4); - VST1_32(v0, 1, ed); - ADDx_U12(ed, ed, 4); - VMOVSto(x4, q0, 2); - CBZw(x4, 4+1*4); - VST1_32(v0, 2, ed); - ADDx_U12(ed, ed, 4); - VMOVSto(x4, q0, 3); - CBZw(x4, 4+1*4); - VST1_32(v0, 3, ed); - } + VMOVSto(x4, q0, 0); + CBZw(x4, 4+1*4); + VST1_32(v0, 0, ed); + ADDx_U12(ed, ed, 4); + VMOVSto(x4, q0, 1); + CBZw(x4, 4+1*4); + VST1_32(v0, 1, ed); + ADDx_U12(ed, ed, 4); + VMOVSto(x4, q0, 2); + CBZw(x4, 4+1*4); + VST1_32(v0, 2, ed); + ADDx_U12(ed, ed, 4); + VMOVSto(x4, q0, 3); + CBZw(x4, 4+1*4); + VST1_32(v0, 3, ed); } - if(!MODREG) {SMWRITE2();} + SMWRITE2(); break; case 0x2F: INST_NAME("VMASKMOVPD Ex, Gx, Vx"); @@ -870,14 +842,10 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip GETGX(v0, 0); VSSHRQ_64(q0, v2, 63); if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), 1); - VBITQ(v1, v0, q0); + DEFAULT; + return addr; } else { addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); - if(ed!=x3) { - MOVz_REG(x3, ed); - ed = x3; - } unscaled = 0; v1 = fpu_get_scratch(dyn, ninst); // check if mask as anything, else skip the whole read/write to avoid a SEGFAULT. @@ -885,7 +853,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip VMOVQDto(x4, q0, 0); CBZx(x4, 4+1*4); VST1_64(v0, 0, ed); - ADDx_U12(ed, ed, 8); + ADDx_U12(x3, ed, 8); + ed = x3; VMOVQDto(x4, q0, 1); CBZx(x4, 4+1*4); VST1_64(v0, 1, ed); @@ -897,21 +866,15 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); v0 = ymm_get_reg(dyn, ninst, x1, gd, 0, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); VSSHRQ_64(q0, v2, 63); - if(MODREG) { - v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1, gd, vex.v, -1); - VBITQ(v1, v0, q0); - } - else { - VMOVQDto(x4, q0, 0); - CBZx(x4, 4+1*4); - VST1_64(v0, 0, ed); - ADDx_U12(ed, ed, 8); - VMOVQDto(x4, q0, 1); - CBZx(x4, 4+1*4); - VST1_64(v0, 1, ed); - } + VMOVQDto(x4, q0, 0); + CBZx(x4, 4+1*4); + VST1_64(v0, 0, ed); + ADDx_U12(ed, ed, 8); + VMOVQDto(x4, q0, 1); + CBZx(x4, 4+1*4); + VST1_64(v0, 1, ed); } - if(!MODREG) {SMWRITE2();} + SMWRITE2(); break; case 0x30: INST_NAME("VPMOVZXBW Gx, Ex"); |