about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-09-06 18:01:40 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-09-06 18:01:40 +0200
commita450b9d12f99df13adcae4a570aeb780964d43b2 (patch)
treee77ed434b11de03e9a9769f7da1fa2de9f29b895 /src
parent5ac3eeef1112a8b446aa8955780858977902c265 (diff)
downloadbox64-a450b9d12f99df13adcae4a570aeb780964d43b2.tar.gz
box64-a450b9d12f99df13adcae4a570aeb780964d43b2.zip
[ARM64_DYNAREC] Improved AVX.66.0F38 8C opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c70
1 files changed, 58 insertions, 12 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
index b7fe8a29..15a938bd 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
@@ -562,7 +562,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
                 addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0);
                 unscaled = 0;
                 v1 = fpu_get_scratch(dyn, ninst);
-                // check if mask as anything, else scipt the whole read/write to avoid a SEGFAULT.
+                // check if mask as anything, else skip the whole read/write to avoid a SEGFAULT.
                 // TODO: let a segfault trigger and check if the mask is null instead and ignore the segfault / actually triger: needs to implement SSE reg tracking first!
                 SQXTN_32(v1, q0);
                 VMOVQDto(x4, v1, 0);
@@ -607,7 +607,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
                 addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0);
                 unscaled = 0;
                 v1 = fpu_get_scratch(dyn, ninst);
-                // check if mask as anything, else scipt the whole read/write to avoid a SEGFAULT.
+                // check if mask as anything, else skip the whole read/write to avoid a SEGFAULT.
                 // TODO: let a segfault trigger and check if the mask is null instead and ignore the segfault / actually triger: needs to implement SSE reg tracking first!
                 SQXTN_32(q1, q0);
                 VMOVQDto(x4, q1, 0);
@@ -997,26 +997,72 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             break;
 
         case 0x8C:
-            INST_NAME("VMASKMOVD/Q Vx, Ex, Gx");
+            INST_NAME("VPMASKMOVD/Q Gx, Vx, Ex");
             nextop = F8;
             q0 = fpu_get_scratch(dyn, ninst);
             for(int l=0; l<1+vex.l; ++l) {
-                if(!l) {
-                    GETGX_empty_VXEX(v0, v2, v1, 0);
+                if(MODREG) {
+                    if(!l) {
+                        GETGX_empty_VXEX(v0, v2, v1, 0);
+                    } else {
+                        GETGY_empty_VYEY(v0, v2, v1);
+                    }
+                    if(rex.w)
+                        VSSHRQ_64(q0, v2, 63);
+                    else
+                        VSSHRQ_32(q0, v2, 31);
+                    VANDQ(v0, v1, q0);
                 } else {
-                    GETGY_empty_VYEY(v0, v2, v1);
+                    if(!l) {
+                        GETGX_empty_VX(v0, v2);
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                        v1 = fpu_get_scratch(dyn, ninst);
+                    } else {
+                        GETGY_empty_VY(v0, v2, 0, -1, -1);
+                    }
+                    unscaled = 0;
+                    // check if mask as anything, else skip the whole read/write to avoid a SEGFAULT.
+                    // TODO: let a segfault trigger and check if the mask is null instead and ignore the segfault / actually triger: needs to implement SSE reg tracking first!
+                    VEORQ(v1, v1, v1);
+                    if(rex.w) {
+                        VSSHRQ_64(q0, v2, 63);
+                        VMOVQDto(x4, q0, 0);
+                        CBZx(x4, 4+1*4);
+                        VLD1_64(v1, 0, ed);
+                        ADDx_U12(ed, ed, 4);
+                        VMOVQDto(x4, q0, 1);
+                        CBZx(x4, 4+1*4);
+                        VLD1_64(v1, 1, ed);
+                        if(!l && vex.l)
+                            ADDx_U12(ed, ed, 4);
+                    } else {
+                        VSSHRQ_32(q0, v2, 31);
+                        VMOVSto(x4, q0, 0);
+                        CBZx(x4, 4+1*4);
+                        VLD1_32(v1, 0, ed);
+                        ADDx_U12(ed, ed, 4);
+                        VMOVSto(x4, q0, 1);
+                        CBZx(x4, 4+1*4);
+                        VLD1_32(v1, 1, ed);
+                        ADDx_U12(ed, ed, 4);
+                        VMOVSto(x4, q0, 2);
+                        CBZx(x4, 4+1*4);
+                        VLD1_32(v1, 2, ed);
+                        ADDx_U12(ed, ed, 4);
+                        VMOVSto(x4, q0, 3);
+                        CBZx(x4, 4+1*4);
+                        VLD1_32(v1, 3, ed);
+                        if(!l && vex.l)
+                            ADDx_U12(ed, ed, 4);
+                    }
+                    VMOVQ(v0, v1);
                 }
-                if(rex.w)
-                    VSSHRQ_64(q0, v2, 63);
-                else
-                    VSSHRQ_32(q0, v2, 31);
-                VANDQ(v0, v1, q0);
             }
             if(!vex.l) YMM0(gd);
             break;
 
         case 0x8E:
-            INST_NAME("VMASKMOVD/Q Ex, Vx, Gx");
+            INST_NAME("VPMASKMOVD/Q Ex, Vx, Gx");
             nextop = F8;
             q0 = fpu_get_scratch(dyn, ninst);
             for(int l=0; l<1+vex.l; ++l) {