diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-28 10:35:59 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-28 10:35:59 +0200 |
| commit | 32b2b388c8d3e1ccbc036820eafba77f5425a761 (patch) | |
| tree | ec81778860f19525fd3820788af50d2080d5b6b1 /src | |
| parent | 013643782f14b8545d30157cb33b13d405d29f7c (diff) | |
| download | box64-32b2b388c8d3e1ccbc036820eafba77f5425a761.tar.gz box64-32b2b388c8d3e1ccbc036820eafba77f5425a761.zip | |
[ARM64_DYNAREC] Added AVX.66.0F38 91/93 opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 8b0bcbab..41295f6d 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -1105,6 +1105,80 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } if(!vex.l) {YMM0(gd); YMM0(vex.v);} break; + case 0x91: + case 0x93: + if(opcode==0x91) {INST_NAME("VPGATHERQD Gx, VSIB, Vx");} else {INST_NAME("VGATHERQPD/VGATHERQPS Gx, VSIB, Vx");} + nextop = F8; + if(((nextop&7)!=4) || MODREG) {UDF(0);} + GETG; + u8 = F8; //SIB + if((u8&0x7)==0x5 && !(nextop&0xC0)) { + MOV64x(x5, F32S64); + eb1 = x5; + } else + eb1 = xRAX + (u8&0x7)+(rex.b<<3); // base + eb2 = ((u8>>3)&7)+(rex.x<<3); // index + if(nextop&0x40) + i32 = F8S; + else if(nextop&0x80) + i32 = F32S; + else + i32 = 0; + if(!i32) ed = eb1; + else { + ed = x3; + if(i32>0 && i32<4096) ADDx_U12(ed, eb1, i32); + else if(i32<0 && i32>-4096) SUBx_U12(ed, eb1, -i32); + else {MOV64x(ed, i32); ADDx_REG(ed, ed, eb1);} + } + // ed is base + wb1 = u8>>6; // scale + if(!rex.w) { + v0 = sse_get_reg(dyn, ninst, x1, gd, 1); + v2 = sse_get_reg(dyn, ninst, x1, vex.v, 1); + v1 = sse_get_reg(dyn, ninst, x1, eb2, 0); + if(vex.l) q1 = ymm_get_reg(dyn, ninst, x1, eb2, 0, gd, vex.v, -1); + // prepare mask + VSSHRQ_32(v2, v2, 31); + // slow gather, not much choice here... + for(int i=0; i<2+vex.l*2; ++i) { + VMOVSto(x4, v2, i); + CBZw(x4, 4+4*4); + VMOVQDto(x4, (i&2)?q1:v1, i&1); + ADDx_REG_LSL(x4, ed, x4, wb1); + VLD1_32(v0, i, x4); + VMOVQSfrom(v2, i, xZR); + } + if(!vex.l) { VMOVQDfrom(v0, 1, xZR); VMOVeD(v2, 1, v0, 1); } + YMM0(gd); + YMM0(vex.v); + } else { + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + v0 = sse_get_reg(dyn, ninst, x1, gd, 1); + v2 = sse_get_reg(dyn, ninst, x1, vex.v, 1); + v1 = sse_get_reg(dyn, ninst, x1, eb2, 0); + } else { + v0 = ymm_get_reg(dyn, ninst, x1, gd, 1, vex.v, (!rex.w)?eb2:-1, -1); + v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 1, gd, (!rex.w)?eb2:-1, -1); + v1 = ymm_get_reg(dyn, ninst, x1, eb2, 0, gd, vex.v, -1); + } + // prepare mask + VSSHRQ_64(v2, v2, 63); + // slow gather, not much choice here... + for(int i=0; i<2; ++i) { + VMOVQDto(x4, v2, i); + CBZw(x4, 4+4*4); + VMOVQDto(x4, v1, i); + ADDx_REG_LSL(x4, ed, x4, wb1); + VLD1_64(v0, i, x4); + VMOVQDfrom(v2, i, xZR); + } + } + if(!vex.l) { YMM0(gd); YMM0(vex.v); } + } + if(!vex.l) YMM0(vex.v); + break; case 0x98: INST_NAME("VFMADD132PS/D Gx, Vx, Ex"); |