about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-28 10:35:59 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-28 10:35:59 +0200
commit32b2b388c8d3e1ccbc036820eafba77f5425a761 (patch)
treeec81778860f19525fd3820788af50d2080d5b6b1 /src
parent013643782f14b8545d30157cb33b13d405d29f7c (diff)
downloadbox64-32b2b388c8d3e1ccbc036820eafba77f5425a761.tar.gz
box64-32b2b388c8d3e1ccbc036820eafba77f5425a761.zip
[ARM64_DYNAREC] Added AVX.66.0F38 91/93 opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c74
1 files changed, 74 insertions, 0 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
index 8b0bcbab..41295f6d 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
@@ -1105,6 +1105,80 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             }
             if(!vex.l) {YMM0(gd); YMM0(vex.v);}
             break;
+        case 0x91:
+        case 0x93:
+            if(opcode==0x91) {INST_NAME("VPGATHERQD Gx, VSIB, Vx");} else {INST_NAME("VGATHERQPD/VGATHERQPS Gx, VSIB, Vx");}
+            nextop = F8;
+            if(((nextop&7)!=4) || MODREG) {UDF(0);}
+            GETG;
+            u8 = F8; //SIB
+            if((u8&0x7)==0x5 && !(nextop&0xC0)) {
+                MOV64x(x5, F32S64);
+                eb1 = x5;
+            } else
+                eb1 = xRAX + (u8&0x7)+(rex.b<<3); // base
+            eb2 = ((u8>>3)&7)+(rex.x<<3); // index
+            if(nextop&0x40)
+                i32 = F8S;
+            else if(nextop&0x80)
+                i32 = F32S;
+            else
+                i32 = 0;
+            if(!i32) ed = eb1;
+            else {
+                ed = x3;
+                if(i32>0 && i32<4096) ADDx_U12(ed, eb1, i32);
+                else if(i32<0 && i32>-4096) SUBx_U12(ed, eb1, -i32);
+                else {MOV64x(ed, i32); ADDx_REG(ed, ed, eb1);}
+            }
+            // ed is base
+            wb1 = u8>>6;    // scale
+            if(!rex.w) {
+                v0 = sse_get_reg(dyn, ninst, x1, gd, 1);
+                v2 = sse_get_reg(dyn, ninst, x1, vex.v, 1);
+                v1 = sse_get_reg(dyn, ninst, x1, eb2, 0);
+                if(vex.l) q1 = ymm_get_reg(dyn, ninst, x1, eb2, 0, gd, vex.v, -1);
+                // prepare mask
+                VSSHRQ_32(v2, v2, 31);
+                // slow gather, not much choice here...
+                for(int i=0; i<2+vex.l*2; ++i) {
+                    VMOVSto(x4, v2, i);
+                    CBZw(x4, 4+4*4);
+                    VMOVQDto(x4, (i&2)?q1:v1, i&1);
+                    ADDx_REG_LSL(x4, ed, x4, wb1);
+                    VLD1_32(v0, i, x4);
+                    VMOVQSfrom(v2, i, xZR);
+                }
+                if(!vex.l) { VMOVQDfrom(v0, 1, xZR); VMOVeD(v2, 1, v0, 1); }
+                YMM0(gd); 
+                YMM0(vex.v);
+            } else {
+                for(int l=0; l<1+vex.l; ++l) {
+                    if(!l) {
+                        v0 = sse_get_reg(dyn, ninst, x1, gd, 1);
+                        v2 = sse_get_reg(dyn, ninst, x1, vex.v, 1);
+                        v1 = sse_get_reg(dyn, ninst, x1, eb2, 0);
+                    } else {
+                        v0 = ymm_get_reg(dyn, ninst, x1, gd, 1, vex.v, (!rex.w)?eb2:-1, -1);
+                        v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 1, gd, (!rex.w)?eb2:-1, -1);
+                        v1 = ymm_get_reg(dyn, ninst, x1, eb2, 0, gd, vex.v, -1);
+                    }
+                    // prepare mask
+                    VSSHRQ_64(v2, v2, 63);
+                    // slow gather, not much choice here...
+                    for(int i=0; i<2; ++i) {
+                        VMOVQDto(x4, v2, i);
+                        CBZw(x4, 4+4*4);
+                        VMOVQDto(x4, v1, i);
+                        ADDx_REG_LSL(x4, ed, x4, wb1);
+                        VLD1_64(v0, i, x4);
+                        VMOVQDfrom(v2, i, xZR);
+                    }
+                }
+                if(!vex.l) { YMM0(gd); YMM0(vex.v); }
+            }
+            if(!vex.l) YMM0(vex.v);
+            break;
 
         case 0x98:
             INST_NAME("VFMADD132PS/D Gx, Vx, Ex");