about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-11 13:33:24 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-11 13:33:24 +0200
commite64b4d5c049e62c14051310d0297acfbbcd7f458 (patch)
tree9f3aa636e07eded850b4de5ea9a015bacd8d6f4c /src
parent59caccfeae8fec11b2806c5d856611b9b98f285e (diff)
downloadbox64-e64b4d5c049e62c14051310d0297acfbbcd7f458.tar.gz
box64-e64b4d5c049e62c14051310d0297acfbbcd7f458.zip
[ARM64_DYNAREC] Fixed AVX.66.0F E1-E3 opcodes and Added AVX.66.0F C6 and AVX.0F 17 opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_0f.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f.c59
2 files changed, 60 insertions, 11 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c
index 1e939754..fabdc8a8 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c
@@ -196,6 +196,18 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             YMM0(gd);
             break;
+        case 0x17:
+            INST_NAME("VMOVHPS Ex,Gx");
+            nextop = F8;
+            GETGX(v0, 0);
+            if(MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1);
+                VMOVeD(v1, 0, v0, 1);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                VST1_64(v0, 1, ed);
+            }
+            break;
 
         case 0x28:
             INST_NAME("VMOVAPS Gx, Ex");
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
index 62dd2c0f..a7392b29 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
@@ -1205,6 +1205,31 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                 LDRH_U12(gd, wback, u8*2);
             }
             break;
+        case 0xC6:
+            INST_NAME("VSHUFPD Gx, Vx, Ex, Ib");
+            nextop = F8;
+            q0 = fpu_get_scratch(dyn, ninst);
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 1); u8 = F8; } else { GETGY_empty_VYEY(v0, v2, v1); u8>>=2; }
+                if((u8&3)==0b01)
+                    VEXTQ_8(v0, v2, v1, 8);
+                else if(v0==v1 && v0==v2) {
+                    switch(u8&3) {
+                        case 0b00: VDUPQ_64(v0, v0, 0); break;
+                        case 0b01: VEXTQ_8(v0, v2, v1, 8); break;
+                        case 0b10: break;
+                        case 0b11: VDUPQ_64(v0, v0, 1); break;
+                    }
+                } else if(v0==v1) {
+                    VMOVeD(v0, 1, v1, (u8>>1)&1);
+                    VMOVeD(v0, 0, v2, u8&1);
+                } else {
+                    if(v0!=v2 || (u8&1)) VMOVeD(v0, 0, v2, u8&1);
+                    VMOVeD(v0, 1, v1, (u8>>1)&1);
+                }
+            }
+            if(!vex.l) YMM0(gd);
+            break;
 
         case 0xD0:
             INST_NAME("VADDSUBPD Gx, Vx, Ex");
@@ -1462,11 +1487,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             q1 = fpu_get_scratch(dyn, ninst);
             MOVI_32(q1, 15);
             for(int l=0; l<1+vex.l; ++l) {
-                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
-                UQXTN_32(q0, v1);
-                UMIN_32(q0, q0, q1);    // limit to -15 .. +15 values
-                NEG_16(q0, q0);
-                VDUPQ_16(q0, q0, 0);    // only the low 8bits will be used anyway
+                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VY(v0, v2, 0, -1, -1); }
+                if(!l) {
+                    UQXTN_32(q0, v1);
+                    UMIN_32(q0, q0, q1);    // limit to -15 .. +15 values
+                    NEG_16(q0, q0);
+                    VDUPQ_16(q0, q0, 0);    // only the low 8bits will be used anyway
+                }
                 SSHLQ_16(v0, v2, q0);
             }
             if(!vex.l) YMM0(gd);
@@ -1478,16 +1505,26 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             q1 = fpu_get_scratch(dyn, ninst);
             MOVI_32(q1, 31);
             for(int l=0; l<1+vex.l; ++l) {
-                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
-                UQXTN_32(q0, v1);
-                UMIN_32(q0, q0, q1);        // limit to 0 .. +31 values
-                NEG_32(q0, q0);
-                VDUPQ_32(q0, q0, 0);    // only the low 8bits will be used anyway
+                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VY(v0, v2, 0, -1, -1); }
+                if(!l) {
+                    UQXTN_32(q0, v1);
+                    UMIN_32(q0, q0, q1);        // limit to 0 .. +31 values
+                    NEG_32(q0, q0);
+                    VDUPQ_32(q0, q0, 0);    // only the low 8bits will be used anyway
+                }
                 SSHLQ_32(v0, v2, q0);
             }
             if(!vex.l) YMM0(gd);
             break;
-
+        case 0xE3:
+            INST_NAME("VPAVGW Gx, Vx, Ex");
+            nextop = F8;
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
+                URHADDQ_16(v0, v2, v1);
+            }
+            if(!vex.l) YMM0(gd);
+            break;
         case 0xE4:
             INST_NAME("VPMULHUW Gx, Vx, Ex");
             nextop = F8;