diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-05-31 22:33:12 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-05-31 22:33:12 +0200 |
| commit | bfebcba43198f81cfa014a4fd1884a46c878f659 (patch) | |
| tree | 07e399a3d7eaa544968fc9e8f30b69029d8e72d7 /src/dynarec | |
| parent | dde85b761b43c5fecb89177b09d6e596e6338a0c (diff) | |
| download | box64-bfebcba43198f81cfa014a4fd1884a46c878f659.tar.gz box64-bfebcba43198f81cfa014a4fd1884a46c878f659.zip | |
[ARM64_DYNAREC] Added a bunch of AVX/BMI2/ADX opcodes
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 20 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67.c | 9 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx.c | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 213 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c | 87 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c | 93 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f30f.c | 30 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 20 |
8 files changed, 456 insertions, 20 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index fa006bcd..903c427c 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -900,6 +900,26 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STH(x1, ed, fixedaddress); } break; + + case 0xF6: + INST_NAME("ADCX Gd, Ed"); + nextop = F8; + READFLAGS(X_CF); + SETFLAGS(X_CF, SF_SUBSET); + GETED(0); + GETGD; + MRS_nzvc(x3); + BFIx(x3, xFlags, 29, 1); // set C + MSR_nzvc(x3); // load CC into ARM CF + IFX(X_CF) { + ADCSxw_REG(gd, gd, ed); + CSETw(x3, cCS); + BFIw(xFlags, x3, F_CF, 1); + } else { + ADCxw_REG(gd, gd, ed); + } + break; + default: DEFAULT; } diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index 7a59de5d..6d8c336e 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -57,9 +57,14 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETREX(); + while(opcode==0x67) opcode = F8; + rep = 0; - while((opcode==0xF2) || (opcode==0xF3)) { - rep = opcode-0xF1; + while((opcode==0xF2) || (opcode==0xF3) || (opcode>=0x40 && opcode<=0x4F)) { + if((opcode==0xF2) || (opcode==0xF3)) + rep = opcode-0xF1; + if(opcode>=0x40 && opcode<=0x4F) + rex.rex = opcode; opcode = F8; } diff --git a/src/dynarec/arm64/dynarec_arm64_avx.c b/src/dynarec/arm64/dynarec_arm64_avx.c index 14d79ad4..ad17147d 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx.c +++ b/src/dynarec/arm64/dynarec_arm64_avx.c @@ -53,10 +53,14 @@ uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = dynarec64_AVX_0F(dyn, addr, ip, ninst, vex, ok, need_epilog); else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_66)) addr = dynarec64_AVX_66_0F(dyn, addr, ip, ninst, vex, ok, need_epilog); + else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_F3)) + addr = dynarec64_AVX_F3_0F(dyn, addr, ip, ninst, vex, ok, need_epilog); else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_66)) addr = dynarec64_AVX_66_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog); else if( (vex.m==VEX_M_0F3A) && (vex.p==VEX_P_66)) addr = dynarec64_AVX_66_0F3A(dyn, addr, ip, ninst, vex, ok, need_epilog); + else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_F2)) + addr = dynarec64_AVX_F2_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog); else {DEFAULT;} if((*ok==-1) && (box64_dynarec_log>=LOG_INFO || box64_dynarec_dump || box64_dynarec_missing)) { diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index 06044df1..649cf797 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -39,7 +39,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int q0, q1, q2; int d0, d1, d2; int s0; - uint64_t tmp64u; + uint64_t tmp64u, tmp64u2; int64_t j64; int64_t fixedaddress; int unscaled; @@ -57,6 +57,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, MAYUSE(j64); MAYUSE(cacheupd); + /* Remember to not create a new fpu_scratch after some GY/VY/EY is created, because Y can be in the scratch area and might overlap (and scratch will win) */ + rex_t rex = vex.rex; switch(opcode) { @@ -153,20 +155,14 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, case 0x6B: INST_NAME("PACKSSDW Gx,Ex"); nextop = F8; - GETGX_empty_VXEX(v0, v2, v1, 0); - if(v0==v1) { - q0 = fpu_get_scratch(dyn, ninst); - VMOVQ(q0, v0); - } - SQXTN_16(v0, v2); - if(v2==v1) { - VMOVeD(v0, 1, v0, 0); - } else { - SQXTN2_16(v0, (v0==v1)?q0:v1); - } - if(vex.l) { - GETGY_empty_VYEY(v0, v2, v1); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + GETGX_empty_VXEX(v0, v2, v1, 0); + } else { + GETGY_empty_VYEY(v0, v2, v1); + } if(v0==v1) { + q0 = fpu_get_scratch(dyn, ninst); VMOVQ(q0, v0); } SQXTN_16(v0, v2); @@ -175,7 +171,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } else { SQXTN2_16(v0, (v0==v1)?q0:v1); } - } else YMM0(gd); + } + if(!vex.l) YMM0(gd); break; case 0x6F: @@ -201,6 +198,192 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } if(!vex.l) YMM0(gd); break; + case 0x70: + INST_NAME("VPSHUFD Gx,Ex,Ib"); + nextop = F8; + if(MODREG) { + u8 = F8; + d0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); + GETGX_empty(v0); + } else { + GETGY_empty_EY(v0, v1); + } + if(u8==0x4E) { + if(v0==v1) { + VEXTQ_8(v0, v0, v0, 8); + } else { + VMOVeD(v0, 0, v1, 1); + VMOVeD(v0, 1, v1, 0); + } + } else if(u8==0x00) { + VDUPQ_32(v0, v1, 0); + } else if(u8==0x55) { + VDUPQ_32(v0, v1, 1); + } else if(u8==0xAA) { + VDUPQ_32(v0, v1, 2); + } else if(u8==0xFF) { + VDUPQ_32(v0, v1, 3); + } else if(u8==0x44) { + VDUPQ_64(v0, v1, 0); + } else if(u8==0xEE) { + VDUPQ_64(v0, v1, 1); + } else if(u8==0xB1) { + VREV64Q_32(v0, v1); + } else if(v0!=v1) { + VMOVeS(v0, 0, v1, (u8>>(0*2))&3); + VMOVeS(v0, 1, v1, (u8>>(1*2))&3); + VMOVeS(v0, 2, v1, (u8>>(2*2))&3); + VMOVeS(v0, 3, v1, (u8>>(3*2))&3); + } else { + if(!l) { + uint64_t swp[4] = { + (0)|(1<<8)|(2<<16)|(3<<24), + (4)|(5<<8)|(6<<16)|(7<<24), + (8)|(9<<8)|(10<<16)|(11<<24), + (12)|(13<<8)|(14<<16)|(15<<24) + }; + tmp64u = swp[(u8>>(0*2))&3] | (swp[(u8>>(1*2))&3]<<32); + MOV64x(x2, tmp64u); + VMOVQDfrom(d0, 0, x2); + tmp64u2 = swp[(u8>>(2*2))&3] | (swp[(u8>>(3*2))&3]<<32); + if(tmp64u2==tmp64u) { + VMOVQDfrom(d0, 1, x2); + } else { + MOV64x(x3, tmp64u2); + VMOVQDfrom(d0, 1, x3); + } + } + VTBLQ1_8(v0, v1, d0); + } + } + } else { + SMREAD(); + for(int l=0; l<1+vex.l; ++l) { + i32 = -1; + if(!l) { + GETGX_empty(v0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1); + u8 = F8; + } else { + GETGY_empty(v0, -1, -1, -1); + ADDx_U12(x3, ed, 16); + ed = x3; + } + if (u8) { + for (int i=0; i<4; ++i) { + int32_t idx = (u8>>(i*2))&3; + if(idx!=i32) { + ADDx_U12(x2, ed, idx*4); + i32 = idx; + } + VLD1_32(v0, i, x2); + } + } else { + VLDQ1R_32(v0, ed); + } + } + } + if(!vex.l) YMM0(gd); + break; + + case 0x73: + nextop = F8; + switch((nextop>>3)&7) { + case 2: + INST_NAME("VPSRLQ Vx, Ex, Ib"); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + GETEX_Y(v1, 0, 1); + GETVX_empty(v0); + u8 = F8; + } else { + GETVY_empty_EY(v0, v1); + } + if(u8) { + if (u8>63) { + VEORQ(v0, v0, v0); + } else if(u8) { + VSHRQ_64(v0, v1, u8); + } + } else if(v0!=v1) + VMOVQ(v0, v1); + } + if(!vex.l) YMM0(vex.v); + break; + case 3: + INST_NAME("VPSRLDQ Vx, Ex, Ib"); + q1 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + GETEX_Y(v1, 0, 1); + GETVX_empty(v0); + u8 = F8; + } else { + GETVY_empty_EY(v0, v1); + } + if(u8) { + if(u8>15) { + VEORQ(v0, v0, v0); + } else { + if(!l) VEORQ(q1, q1, q1); + VEXTQ_8(v0, v1, q1, u8); + } + } else if(v0!=v1) + VMOVQ(v0, v1); + } + if(!vex.l) YMM0(vex.v); + break; + case 6: + INST_NAME("VPSLLQ Vx, Ex, Ib"); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + GETEX_Y(v1, 0, 1); + GETVX_empty(v0); + u8 = F8; + } else { + GETVY_empty_EY(v0, v1); + } + if(u8) { + if (u8>63) { + VEORQ(v0, v0, v0); + } else { + VSHLQ_64(v0, v1, u8); + } + } else if(v0!=v1) + VMOVQ(v0, v1); + } + if(!vex.l) YMM0(vex.v); + break; + case 7: + INST_NAME("VPSLLDQ Vx, Ex, Ib"); + q1 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + GETEX_Y(v1, 0, 1); + GETVX_empty(v0); + u8 = F8; + } else { + GETVY_empty_EY(v0, v1); + } + if(u8) { + if(u8>15) { + VEORQ(v0, v0, v0); + } else if(u8>0) { + if(!l) VEORQ(q1, q1, q1); + VEXTQ_8(v0, q1, v1, 16-u8); + } + } else if(v0!=v1) + VMOVQ(v0, v1); + } + if(!vex.l) YMM0(vex.v); + break; + default: + DEFAULT; + } + break; default: DEFAULT; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c new file mode 100644 index 00000000..294a7797 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c @@ -0,0 +1,87 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "my_cpuid.h" +#include "emu/x87emu_private.h" +#include "emu/x64shaext.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +uintptr_t dynarec64_AVX_F2_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog) +{ + (void)ip; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop, u8; + uint8_t gd, ed, vd; + uint8_t wback, wb1, wb2; + uint8_t eb1, eb2, gb1, gb2; + int32_t i32, i32_; + int cacheupd = 0; + int v0, v1, v2; + int q0, q1, q2; + int d0, d1, d2; + int s0; + uint64_t tmp64u; + int64_t j64; + int64_t fixedaddress; + int unscaled; + MAYUSE(wb1); + MAYUSE(wb2); + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(q0); + MAYUSE(q1); + MAYUSE(d0); + MAYUSE(d1); + MAYUSE(s0); + MAYUSE(j64); + MAYUSE(cacheupd); + + rex_t rex = vex.rex; + + switch(opcode) { + + case 0xF6: + INST_NAME("MULX Gd, Vd, Ed (,RDX)"); + nextop = F8; + GETGD; + GETED(0); + GETVD; + if(rex.w) { + // 64bits mul + UMULH(x3, xRDX, ed); + MULx(vd, xRDX, ed); + MOVx_REG(gd, x3); + } else { + // 32bits mul + UMULL(x3, xRDX, ed); + MOVw_REG(vd, x3); + LSRx(gd, x3, 32); + } + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c new file mode 100644 index 00000000..7ff19269 --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c @@ -0,0 +1,93 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "my_cpuid.h" +#include "emu/x87emu_private.h" +#include "emu/x64shaext.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog) +{ + (void)ip; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop, u8; + uint8_t gd, ed, vd; + uint8_t wback, wb1, wb2; + uint8_t eb1, eb2, gb1, gb2; + int32_t i32, i32_; + int cacheupd = 0; + int v0, v1, v2; + int q0, q1, q2; + int d0, d1, d2; + int s0; + uint64_t tmp64u; + int64_t j64; + int64_t fixedaddress; + int unscaled; + MAYUSE(wb1); + MAYUSE(wb2); + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(q0); + MAYUSE(q1); + MAYUSE(d0); + MAYUSE(d1); + MAYUSE(s0); + MAYUSE(j64); + MAYUSE(cacheupd); + + rex_t rex = vex.rex; + + switch(opcode) { + + case 0x6F: + INST_NAME("VMOVDQU Gx,Ex");// no alignment constraint on NEON here, so same as MOVDQA + nextop = F8; + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); + GETGX_empty(v0); + VMOVQ(v0, v1); + if(vex.l) { + v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, -12, -1); + GETGY_empty(v0, (nextop&7)+(rex.b<<3), -1, -1); + VMOVQ(v0, v1); + } + } else { + GETGX_empty(v0); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); + VLDR128_U12(v0, ed, fixedaddress); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + VLDR128_U12(v0, ed, fixedaddress+16); + } + } + if(!vex.l) YMM0(gd); + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index 079cd0bc..96fe7543 100644 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -172,6 +172,36 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } } break; + + case 0x38: /* MAP */ + opcode = F8; + switch(opcode) { + + case 0xF6: + INST_NAME("ADOX Gd, Ed"); + nextop = F8; + READFLAGS(X_OF); + SETFLAGS(X_OF, SF_SUBSET); + GETED(0); + GETGD; + MRS_nzvc(x3); + LSRw(x4, xFlags, F_OF); + BFIx(x3, x4, 29, 1); // set C + MSR_nzvc(x3); // load CC into ARM CF + IFX(X_OF) { + ADCSxw_REG(gd, gd, ed); + CSETw(x3, cCS); + BFIw(xFlags, x3, F_OF, 1); + } else { + ADCxw_REG(gd, gd, ed); + } + break; + + default: + DEFAULT; + } + break; + case 0x51: INST_NAME("SQRTSS Gx, Ex"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index b72b4e99..cc4ef30a 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -90,6 +90,8 @@ // GETGD get x64 register in gd #define GETGD gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3) +// GETVD get x64 register in vd +#define GETVD vd = xRAX+vex.v //GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI #define GETED(D) if(MODREG) { \ ed = xRAX+(nextop&7)+(rex.b<<3); \ @@ -510,13 +512,21 @@ VLD128(ey, ed, fixedaddress+16); \ gy = ymm_get_reg_empty(dyn, ninst, x1, gd, -1, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1) -// Get EX as a quad, (x1 is used) +// Get empty VY, and non-writen EY +#define GETVY_empty_EY(vy, ey) \ + if(MODREG) \ + ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, vex.v, -1, -1); \ + else \ + VLD128(ey, ed, fixedaddress+16); \ + vy = ymm_get_reg_empty(dyn, ninst, x1, vex.v, -1, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1) + +// Get EX as a quad, (x3 is used) #define GETEX_Y(a, w, D) \ if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ + a = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), w); \ } else { \ if(w) {WILLWRITE2();} else {SMREAD();} \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, D); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, D); \ unscaled = 0; \ a = fpu_get_scratch(dyn, ninst); \ VLD128(a, ed, fixedaddress); \ @@ -1093,8 +1103,10 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define dynarec64_AVX STEPNAME(dynarec64_AVX) #define dynarec64_AVX_0F STEPNAME(dynarec64_AVX_0F) #define dynarec64_AVX_66_0F STEPNAME(dynarec64_AVX_66_0F) +#define dynarec64_AVX_F3_0F STEPNAME(dynarec64_AVX_F2_0F38) #define dynarec64_AVX_66_0F38 STEPNAME(dynarec64_AVX_66_0F38) #define dynarec64_AVX_66_0F3A STEPNAME(dynarec64_AVX_66_0F3A) +#define dynarec64_AVX_F2_0F38 STEPNAME(dynarec64_AVX_F2_0F38) #define geted STEPNAME(geted) #define geted32 STEPNAME(geted32) @@ -1516,8 +1528,10 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); +uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); +uintptr_t dynarec64_AVX_F2_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); #if STEP < 2 #define PASS2(A) |