diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-02 20:10:00 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-02 20:10:00 +0200 |
| commit | f1f690552fc5c2d1a1db12afa2bbfcfe4300e49d (patch) | |
| tree | c3021b593b0a74bbb29569de27859aa17deaba8f /src | |
| parent | 57dcf3401c9e73a982cc0c5abdfeb0cb8b00cdb9 (diff) | |
| download | box64-f1f690552fc5c2d1a1db12afa2bbfcfe4300e49d.tar.gz box64-f1f690552fc5c2d1a1db12afa2bbfcfe4300e49d.zip | |
[ARM64_DYNAREC] Added AVX.66.0F38 08-0A/1C-1E/30-35/58/59/90/92/A8/A9/B8/B9 opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 26 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 30 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 300 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 15 |
4 files changed, 366 insertions, 5 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 050eece5..4817e130 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1080,6 +1080,12 @@ int convert_bitmask(uint64_t bitmask); #define VMOVHto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<2) | 2, Vn, Wd)) #define VMOVSto(Wd, Vn, index) EMIT(UMOV_gen(0, ((index)<<3) | 4, Vn, Wd)) +#define SMOV_gen(Q, imm5, Rn, Rd) ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 0b01<<13 | 0<<12 | 1<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define SMOVQDto(Xd, Vn, index) EMIT(SMOV_gen(1, ((index)<<4) | 8, Vn, Xd)) +#define SMOVQBto(Xd, Vn, index) EMIT(SMOV_gen(1, ((index)<<1) | 1, Vn, Xd)) +#define SMOVQHto(Xd, Vn, index) EMIT(SMOV_gen(1, ((index)<<2) | 2, Vn, Xd)) +#define SMOVQSto(Xd, Vn, index) EMIT(SMOV_gen(1, ((index)<<3) | 4, Vn, Xd)) + #define MVN_vector(Q, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | 0b10000<<17 | 0b00101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) #define VMVNQ(Rd, Rn) EMIT(MVN_vector(1, Rn, Rd)) @@ -1161,11 +1167,6 @@ int convert_bitmask(uint64_t bitmask); #define FMULS(Sd, Sn, Sm) EMIT(FMUL_scalar(0b00, Sm, Sn, Sd)) #define FMULD(Dd, Dn, Dm) EMIT(FMUL_scalar(0b01, Dm, Dn, Dd)) -#define FMLA_vector(Q, op, sz, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (op)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11001<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VFMLAS(Sd, Sn, Sm) EMIT(FMLA_vector(0, 0, 0, Sm, Sn, Sd)) -#define VFMLAQS(Sd, Sn, Sm) EMIT(FMLA_vector(1, 0, 0, Sm, Sn, Sd)) -#define VFMLAQD(Dd, Dn, Dm) EMIT(FMLA_vector(1, 0, 1, Dm, Dn, Dd)) - // DIV #define FDIV_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11111<<11 | 1<<10 | (Rn)<<5 | (Rd)) #define VFDIVS(Sd, Sn, Sm) EMIT(FDIV_vector(0, 0, Sm, Sn, Sd)) @@ -1450,6 +1451,21 @@ int convert_bitmask(uint64_t bitmask); // FMAXNM NaN vs Number: number is picked #define FMAXNMD(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b10, Dn, Dd)) +// Fused Add Multiply +#define FMADD_gen(type, o1, Rm, o0, Ra, Rn, Rd) (0b11111<<24 | (type)<<22 | (o1)<<21 | (Rm)<<16 | (o0)<<0 | (Ra)<<10 | (Rn)<<5 | (Rd)) +// scalar Rd = Ra + Rn*Rm +#define FMADD_32(Sd, Sa, Sn, Sm) EMIT(FMADD_gen(0b00, 0, Sm, 0, Sa, Sn, Sd)) +// scalar Rd = Ra + Rn*Rm +#define FMADD_64(Dd, Da, Dn, Dm) EMIT(FMADD_gen(0b01, 0, Dm, 0, Da, Dn, Dd)) + +#define FMLA_vector(Q, op, sz, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (op)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11001<<11 | 1<<10 | (Rn)<<5 | (Rd)) +// Vd += Vn*Vm +#define VFMLAS(Vd, Vn, Vm) EMIT(FMLA_vector(0, 0, 0, Vm, Vn, Vd)) +// Vd += Vn*Vm +#define VFMLAQS(Vd, Vn, Vm) EMIT(FMLA_vector(1, 0, 0, Vm, Vn, Vd)) +// Vd += Vn*Vm +#define VFMLAQD(Vd, Vn, Vm) EMIT(FMLA_vector(1, 0, 1, Vm, Vn, Vd)) + // ZIP / UZP #define ZIP_gen(Q, size, Rm, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b11<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) #define VZIP1Q_8(Rt, Rn, Rm) EMIT(ZIP_gen(1, 0b00, Rm, 0, Rn, Rt)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 98bbcc98..9e8d10ab 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -930,6 +930,22 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "UMOV %s, %c%d.%c[%d]", a.Q?Xt[Rd]:Wt[Rd], q, Rn, s, index); return buff; } + // SMOV + if(isMask(opcode, "0Q001110000rrrrr001011nnnnnddddd", &a)) { + char q = a.Q?'Q':'D'; + char s = '?'; + int sz=0; + if(a.Q==0 && immr&1) {s='B'; sz=0; } + else if(/*a.Q==0 &&*/ (immr&3)==2) {s='H'; sz=1; } + else if(/*a.Q==0 &&*/ (immr&7)==4) {s='S'; sz=2; } + else if(a.Q==1 && (immr&15)==8) {s='D'; sz=3; } + int index = (immr)>>(sz+1); + if(sz>2) + snprintf(buff, sizeof(buff), "MOV %s, %c%d.%c[%d]", a.Q?Xt[Rd]:Wt[Rd], q, Rn, s, index); + else + snprintf(buff, sizeof(buff), "SMOV %s, %c%d.%c[%d]", a.Q?Xt[Rd]:Wt[Rd], q, Rn, s, index); + return buff; + } // VEOR if(isMask(opcode, "0Q101110001mmmmm000111nnnnnddddd", &a)) { char q = a.Q?'Q':'D'; @@ -1303,6 +1319,20 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "F%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s); return buff; } + // FMADD + if(isMask(opcode, "00011111tt0mmmmmoaaaaannnnnddddd", &a)) { + char s = (a.t==0b00)?'S':((a.t==0b01)?'D':'?'); + int n = (a.t==0)?1:2; + snprintf(buff, sizeof(buff), "FM%s V%d.%d%c, V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"SUB":"ADD", Rd, n, s, Ra, n, s, Rn, n, s, Rm, n, s); + } + // FMLA + if(isMask(opcode, "0Q001110of1mmmmm110011nnnnnddddd", &a)) { + char s = (sf==0)?'S':((sf==1)?'D':'?'); + int n = (sf==0)?2:1; + n *= a.Q?2:1; + snprintf(buff, sizeof(buff), "FML%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"S":"A", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s); + return buff; + } // NEG if(isMask(opcode, "0Q101110ff100000101110nnnnnddddd", &a)) { const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index be983b14..12c6e3da 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -77,6 +77,52 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(!vex.l) YMM0(gd); break; + case 0x08: + INST_NAME("VPSIGNB Gx, Vx, Ex"); + nextop = F8; + v1 = fpu_get_scratch(dyn, ninst); + v0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); } + NEGQ_8(v0, q2); // get NEG + CMLTQ_0_8(v1, q1); // calculate mask + VBIFQ(v0, q2, v1); // put back positive values + CMEQQ_0_8(v1, q1); // handle case where Ex is 0 + VBICQ(q0,v0, v1); + } + if(!vex.l) YMM0(gd); + break; + case 0x09: + INST_NAME("VPSIGNW Gx, Vx, Ex"); + nextop = F8; + v1 = fpu_get_scratch(dyn, ninst); + v0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); } + NEGQ_16(v0, q2); // get NEG + CMLTQ_0_16(v1, q1); // calculate mask + VBIFQ(v0, q2, v1); // put back positive values + CMEQQ_0_16(v1, q1); // handle case where Ex is 0 + VBICQ(q0, v0, v1); + } + if(!vex.l) YMM0(gd); + break; + case 0x0A: + INST_NAME("VPSIGND Gx, Vx, Ex"); + nextop = F8; + v1 = fpu_get_scratch(dyn, ninst); + v0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); } + NEGQ_32(v0, q2); // get NEG + CMLTQ_0_32(v1, q1); // calculate mask + VBIFQ(v0, q2, v1); // put back positive values + CMEQQ_0_32(v1, q1); // handle case where Ex is 0 + VBICQ(q0, v0, v1); + } + if(!vex.l) YMM0(gd); + break; + case 0x18: INST_NAME("VBROADCASTSS Gx, Ex"); nextop = F8; @@ -95,6 +141,34 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } else YMM0(gd); break; + case 0x1C: + INST_NAME("VPABSB Gx, Ex"); + nextop = F8; + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_EX(v0, v1, 0); } else { GETGY_empty_EY(v0, v1); } + ABSQ_8(v0, v1); + } + if(!vex.l) YMM0(gd); + break; + case 0x1D: + INST_NAME("VPABSW Gx, Ex"); + nextop = F8; + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_EX(v0, v1, 0); } else { GETGY_empty_EY(v0, v1); } + ABSQ_16(v0, v1); + } + if(!vex.l) YMM0(gd); + break; + case 0x1E: + INST_NAME("VPABSD Gx, Ex"); + nextop = F8; + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_EX(v0, v1, 0); } else { GETGY_empty_EY(v0, v1); } + ABSQ_32(v0, v1); + } + if(!vex.l) YMM0(gd); + break; + case 0x2C: INST_NAME("VMASKMOVPS Gx, Vx, Ex"); nextop = F8; @@ -162,6 +236,232 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } } break; + case 0x30: + INST_NAME("VPMOVZXBW Gx, Ex"); + nextop = F8; + GETG; + if(vex.l) { GETEX(q1, 0, 0); } else { GETEX64(q1, 0, 0); YMM0(gd); } + GETGX_empty(q0); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + UXTL2_8(v0, q1); + } + UXTL_8(q0, q1); + break; + case 0x31: + INST_NAME("VPMOVZXBD Gx, Ex"); + nextop = F8; + GETG; + if(vex.l) { GETEX64(q1, 0, 0); } else { GETEX32(q1, 0, 0); YMM0(gd); } + GETGX_empty(q0); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + UXTL_8(v0, q1); + UXTL2_16(v0, v0); + } + UXTL_8(q0, q1); + UXTL_16(q0, q0); + break; + case 0x32: + INST_NAME("VPMOVZXBQ Gx, Ex"); + nextop = F8; + GETG; + if(vex.l) { GETEX32(q1, 0, 0); } else { GETEX16(q1, 0, 0); YMM0(gd); } + GETGX_empty(q0); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + UXTL_8(v0, q1); + UXTL_16(v0, v0); + UXTL2_32(v0, v0); + } + UXTL_8(q0, q1); + UXTL_16(q0, q0); + UXTL_32(q0, q0); + break; + case 0x33: + INST_NAME("VPMOVZXWD Gx, Ex"); + nextop = F8; + GETG; + if(vex.l) { GETEX(q1, 0, 0); } else { GETEX64(q1, 0, 0); YMM0(gd); } + GETGX_empty(q0); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + UXTL2_16(v0, q1); + } + UXTL_16(q0, q1); + break; + case 0x34: + INST_NAME("VPMOVZXWQ Gx, Ex"); + nextop = F8; + GETG; + if(vex.l) { GETEX64(q1, 0, 0); } else { GETEX32(q1, 0, 0); YMM0(gd); } + GETGX_empty(q0); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + UXTL_16(v0, q1); + UXTL2_32(v0, v0); + } + UXTL_16(q0, q1); + UXTL_32(q0, q0); + break; + case 0x35: + INST_NAME("VPMOVZXDQ Gx, Ex"); + nextop = F8; + GETG; + if(vex.l) { GETEX(q1, 0, 0); } else { GETEX64(q1, 0, 0); YMM0(gd); } + GETGX_empty(q0); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + UXTL2_32(v0, q1); + } + UXTL_32(q0, q1); + break; + + case 0x58: + INST_NAME("VPBROADCASTD Gx, Ex"); + nextop = F8; + if(MODREG) { + GETGX_empty_EX(v0, v1, 0); + VDUPQ_32(v0, v1, 0); + } else { + GETGX_empty(v0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0, 0, 0, rex, NULL, 0, 0); + VLDQ1R_32(v0, ed); + } + if(vex.l) { + GETGY_empty(q0, -1, -1, -1); + VMOVQ(q0, v0); + } else YMM0(gd); + break; + case 0x59: + INST_NAME("VPBROADCASTQ Gx, Ex"); + nextop = F8; + if(MODREG) { + GETGX_empty_EX(v0, v1, 0); + VDUPQ_64(v0, v1, 0); + } else { + GETGX_empty(v0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0, 0, 0, rex, NULL, 0, 0); + VLDQ1R_64(v0, ed); + } + if(vex.l) { + GETGY_empty(q0, -1, -1, -1); + VMOVQ(q0, v0); + } else YMM0(gd); + break; + + case 0x90: + case 0x92: + if(opcode==0x90) {INST_NAME("VPGATHERDD Gx, VSIB, Vx");} else {INST_NAME("VGATHERDPD/VGATHERDPS Gx, VSIB, Vx");} + nextop = F8; + if(((nextop&7)!=4) || MODREG) {UDF(0);} + GETG; + u8 = F8; //SIB + eb1 = xRAX + (u8&0x7)+(rex.b<<3); // base + eb2 = ((u8>>3)&7)+(rex.x<<3); // index + if(nextop&0x40) + i32 = F8S; + else if(nextop&0x80) + i32 = F32S; + else + i32 = 0; + if(!i32) ed = eb1; + else { + ed = x3; + if(i32>0 && i32<4096) ADDx_U12(ed, eb1, i32); + else if(i32<0 && i32>-4096) SUBx_U12(ed, eb1, -i32); + else {MOV64x(ed, i32); ADDx_REG(ed, ed, eb1);} + } + // ed is base + wb1 = u8>>6; // scale + if(wb1) q1 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + v0 = sse_get_reg(dyn, ninst, x1, gd, 1); + v2 = sse_get_reg(dyn, ninst, x1, vex.v, 1); + v1 = sse_get_reg(dyn, ninst, x1, eb2, 0); + } else { + v0 = ymm_get_reg(dyn, ninst, x1, gd, 1, vex.v, eb2, -1); + v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 1, gd, eb2, -1); + v1 = ymm_get_reg(dyn, ninst, x1, eb2, 0, gd, vex.v, -1); + } + // prepare mask + if(rex.w) VSSHRQ_64(v2, v2, 63); else VSSHRQ_32(v2, v2, 31); // prescale the values + if(wb1) VSHLQ_32(q1, v1, wb1); else q1 = v1; + // slow gather, not much choice here... + if(rex.w) for(int i=0; i<2; ++i) { + VMOVQDto(x4, v2, i); + TBZ(x4, 0, 4+4*4); + SMOVQSto(x4, q1, i); + ADDx_REG(x4, x4, ed); + VLD1_64(v0, i, x4); + VMOVQDfrom(v2, i, xZR); + } else for(int i=0; i<4; ++i) { + VMOVSto(x4, v2, i); + TBZ(x4, 0, 4+4*4); + SMOVQSto(x4, q1, i); + ADDx_REG(x4, x4, ed); + VLD1_32(v0, i, x4); + VMOVQSfrom(v2, i, xZR); + } + } + if(!vex.l) {YMM0(gd); YMM0(vex.v);} + break; + + case 0xA8: + INST_NAME("VFMADD213PS/D Gx, Vx, Ex"); + nextop = F8; + if(MODREG) q0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_VXEX(v0, v2, v1, 0); } else { GETGY_VYEY(v0, v2, v1); } + if(MODREG) VMOVQ(q0, v1); else q0 = v1; + if(rex.w) VFMLAQD(q0, v0, v2); else VFMLAQS(q0, v0, v2); + VMOVQ(v0, q0); + } + if(!vex.l) YMM0(gd); + break; + case 0xA9: + INST_NAME("VFMADD213SS/D Gx, Vx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETVX(v2, 0); + if(rex.w) {GETEXSD(v1, 0, 0);} else {GETEXSS(v1, 0, 0);} + q0 = fpu_get_scratch(dyn, ninst); + if(rex.w) { + FMADD_64(q0, v1, v0, v2); + VMOVeD(v0, 0, q0, 0); + } else { + FMADD_32(q0, v1, v0, v2); + VMOVeS(v0, 0, q0, 0); + } + YMM0(gd); + break; + + case 0xB8: + INST_NAME("VFMADD231PS/D Gx, Vx, Ex"); + nextop = F8; + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_VXEX(v0, v2, v1, 0); } else { GETGY_VYEY(v0, v2, v1); } + if(rex.w) VFMLAQD(v0, v1, v2); else VFMLAQS(v0, v1, v2); + } + if(!vex.l) YMM0(gd); + break; + case 0xB9: + INST_NAME("VFMADD231SS/D Gx, Vx, Ex"); + nextop = F8; + GETGX(v0, 1); + GETVX(v2, 0); + if(rex.w) {GETEXSD(v1, 0, 0);} else {GETEXSS(v1, 0, 0);} + q0 = fpu_get_scratch(dyn, ninst); + if(rex.w) { + FMADD_64(q0, v0, v1, v2); + VMOVeD(v0, 0, q0, 0); + } else { + FMADD_32(q0, v0, v1, v2); + VMOVeS(v0, 0, q0, 0); + } + YMM0(gd); + break; case 0xDC: INST_NAME("VAESENC Gx, Vx, Ex"); // AES-NI diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 7471ba55..ed593ca7 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -506,6 +506,12 @@ GETEX_Y(ex, 1, D); \ GETGX(gx, 0) +// Get GX and and non-writen VX and EX +#define GETGX_VXEX(gx, vx, ex, D) \ + GETVX(vx, 0); \ + GETEX_Y(ex, 0, D); \ + GETGX(gx, 1) + #define GETGXVXEX_empty(gx, vx, ex, D) \ GETVX(vx, 0); \ GETGX(gx, 0); \ @@ -529,6 +535,15 @@ VLDR128_U12(ey, ed, fixedaddress+16); \ gy = ymm_get_reg(dyn, ninst, x1, gd, 0, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1) +// Get GY and non-writen VY and EY +#define GETGY_VYEY(gy, vy, ey) \ + vy = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); \ + if(MODREG) \ + ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, vex.v, -1); \ + else \ + VLDR128_U12(ey, ed, fixedaddress+16); \ + gy = ymm_get_reg(dyn, ninst, x1, gd, 1, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1) + // Get empty EY and non-writen VY and GY #define GETGYVYEY_empty(gy, vy, ey) \ vy = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); \ |