about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c68
1 files changed, 59 insertions, 9 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
index 0bb1b37c..33fb9d48 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
@@ -548,36 +548,86 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
         case 0x2E:
             INST_NAME("VMASKMOVPS Ex, Gx, Vx");
             nextop = F8;
-            GETGXVXEX(v0, v2, v1, 0);
             q0 = fpu_get_scratch(dyn, ninst);
-            // create mask
+            GETVX(v2, 0);
+            GETGX(v0, 0);
             VSSHRQ_32(q0, v2, 31);
+            if(MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), 1);
+            } else {
+                WILLWRITE2();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0);
+                unscaled = 0;
+                v1 = fpu_get_scratch(dyn, ninst);
+                // check if mask as anything, else scipt the whole read/write to avoid a SEGFAULT.
+                // TODO: let a segfault trigger and check if the mask is null instead and ignore the segfault / actually triger: needs to implement SSE reg tracking first!
+                SQXTN_32(v1, q0);
+                VMOVQDto(x4, v1, 0);
+                CBZx(x4, 4+3*4);
+                VLDR128_U12(v1, ed, fixedaddress);
+            }
+            // create mask
             VBITQ(v1, v0, q0);
             if(!MODREG) {
                 VSTR128_U12(v1, ed, fixedaddress);
             }
-            if(vex.l) {
-                GETGYVYEY(v0, v2, v1);
+            if(vex.l && !is_avx_zero(dyn, ninst, vex.v)) {
+                v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1);
+                v0 = ymm_get_reg(dyn, ninst, x1, gd, 0, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1);
                 VSSHRQ_32(q0, v2, 31);
+                if(MODREG)
+                    v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1, gd, vex.v, -1);
+                else {
+                    SQXTN_32(v1, q0);
+                    VMOVQDto(x4, v1, 0);
+                    CBZx(x4, 4+3*4);
+                    VLDR128_U12(v1, ed, fixedaddress+16);
+                }
                 VBITQ(v1, v0, q0);
-                if(!MODREG)
+                if(!MODREG) {
                     VSTR128_U12(v1, ed, fixedaddress+16);
+                }
             }
             break;
         case 0x2F:
             INST_NAME("VMASKMOVPD Ex, Gx, Vx");
             nextop = F8;
-            GETGXVXEX(v0, v2, v1, 0);
             q0 = fpu_get_scratch(dyn, ninst);
-            // create mask
+            q1 = fpu_get_scratch(dyn, ninst);
+            GETVX(v2, 0);
+            GETGX(v0, 0);
             VSSHRQ_64(q0, v2, 63);
+            if(MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), 1);
+            } else {
+                WILLWRITE2();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0);
+                unscaled = 0;
+                v1 = fpu_get_scratch(dyn, ninst);
+                // check if mask as anything, else scipt the whole read/write to avoid a SEGFAULT.
+                // TODO: let a segfault trigger and check if the mask is null instead and ignore the segfault / actually triger: needs to implement SSE reg tracking first!
+                SQXTN_32(q1, q0);
+                VMOVQDto(x4, q1, 0);
+                CBZx(x4, 4+3*4);
+                VLDR128_U12(v1, ed, fixedaddress);
+            }
+            // create mask
             VBITQ(v1, v0, q0);
             if(!MODREG) {
                 VSTR128_U12(v1, ed, fixedaddress);
             }
-            if(vex.l) {
-                GETGYVYEY(v0, v2, v1);
+            if(vex.l && !is_avx_zero(dyn, ninst, vex.v)) {
+                v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1);
+                v0 = ymm_get_reg(dyn, ninst, x1, gd, 0, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1);
                 VSSHRQ_64(q0, v2, 63);
+                if(MODREG)
+                    v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1, gd, vex.v, -1);
+                else {
+                    SQXTN_32(q1, q0);
+                    VMOVQDto(x4, q1, 0);
+                    CBZx(x4, 4+3*4);
+                    VLDR128_U12(v1, ed, fixedaddress+16);
+                }
                 VBITQ(v1, v0, q0);
                 if(!MODREG) {
                     VSTR128_U12(v1, ed, fixedaddress+16);