diff options
| author | phorcys <phorcys@126.com> | 2025-07-29 23:29:59 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-29 17:29:59 +0200 |
| commit | 5fd843750fe1af2b8775f0e491cd07d1cf06fe77 (patch) | |
| tree | 2f6e4d62d3fd5d936de7f44e433bc842d98d1307 | |
| parent | 8180cb321818abcec2ffe41d908c3095380e79b1 (diff) | |
| download | box64-5fd843750fe1af2b8775f0e491cd07d1cf06fe77.tar.gz box64-5fd843750fe1af2b8775f0e491cd07d1cf06fe77.zip | |
[LA64_DYNAREC] Add la64 avx int cmp ops. (#2856)
VCMP{EQ,GT}{B,W,D,Q}
VCOMISS, VUCOMISS, VCOMISD, VUCOMISD| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_0f.c | 41 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 76 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f38.c | 24 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 12 |
4 files changed, 146 insertions, 7 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c index cb1921a5..8520045d 100644 --- a/src/dynarec/la64/dynarec_la64_avx_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_0f.c @@ -229,6 +229,47 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in SMWRITE2(); } break; + case 0x2E: + // no special check... + case 0x2F: + if (opcode == 0x2F) { + INST_NAME("VCOMISS Gx, Ex"); + } else { + INST_NAME("VUCOMISS Gx, Ex"); + } + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + SET_DFNONE(); + nextop = F8; + GETGYx(d0, 0); + GETEYSS(v0, 0, 0); + CLEAR_FLAGS(x2); + // if isnan(d0) || isnan(v0) + IFX (X_ZF | X_PF | X_CF) { + FCMP_S(fcc0, d0, v0, cUN); + BCEQZ_MARK(fcc0); + ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); + B_MARK3_nocond; + } + MARK; + // else if isless(d0, v0) + IFX (X_CF) { + FCMP_S(fcc1, d0, v0, cLT); + BCEQZ_MARK2(fcc1); + ORI(xFlags, xFlags, 1 << F_CF); + B_MARK3_nocond; + } + MARK2; + // else if d0 == v0 + IFX (X_ZF) { + FCMP_S(fcc2, d0, v0, cEQ); + BCEQZ_MARK3(fcc2); + ORI(xFlags, xFlags, 1 << F_ZF); + } + MARK3; + IFX (X_ALL) { + SPILL_EFLAGS(); + } + break; case 0x50: nextop = F8; INST_NAME("VMOVMSKPS Gd, Ex"); diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index 977419e1..63b2469e 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -223,6 +223,46 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; + case 0x2E: + // no special check... + case 0x2F: + if (opcode == 0x2F) { + INST_NAME("VCOMISD Gx, Ex"); + } else { + INST_NAME("VUCOMISD Gx, Ex"); + } + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + SET_DFNONE(); + nextop = F8; + GETGYx(d0, 0); + GETEYSD(v0, 0, 0); + + CLEAR_FLAGS(x3); + // if isnan(d0) || isnan(v0) + IFX (X_ZF | X_PF | X_CF) { + FCMP_D(fcc0, d0, v0, cUN); + BCEQZ_MARK(fcc0); + ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); + B_MARK3_nocond; + } + MARK; + // else if isless(d0, v0) + IFX (X_CF) { + FCMP_D(fcc1, d0, v0, cLT); + BCEQZ_MARK2(fcc1); + ORI(xFlags, xFlags, 1 << F_CF); + B_MARK3_nocond; + } + MARK2; + // else if d0 == v0 + IFX (X_ZF) { + FCMP_D(fcc2, d0, v0, cEQ); + BCEQZ_MARK3(fcc2); + ORI(xFlags, xFlags, 1 << F_ZF); + } + MARK3; + SPILL_EFLAGS(); + break; case 0x50: nextop = F8; INST_NAME("VMOVMSKPD Gd, Ex"); @@ -410,6 +450,24 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VPICKEVxy(B, v0, q0, v0); } break; + case 0x64: + INST_NAME("VPCMPGTB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSLTxy(B, v0, v2, v1); + break; + case 0x65: + INST_NAME("VPCMPGTW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSLTxy(H, v0, v2, v1); + break; + case 0x66: + INST_NAME("VPCMPGTD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSLTxy(W, v0, v2, v1); + break; case 0x67: INST_NAME("VPACKUSWB Gx, Vx, Ex"); nextop = F8; @@ -646,6 +704,24 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, DEFAULT; } break; + case 0x74: + INST_NAME("VPCMPEQB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSEQxy(B, v0, v1, v2); + break; + case 0x75: + INST_NAME("VPCMPEQW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSEQxy(H, v0, v1, v2); + break; + case 0x76: + INST_NAME("VPCMPEQD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSEQxy(W, v0, v1, v2); + break; case 0x7C: INST_NAME("VHADDPD Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c index 26808486..aa832385 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c @@ -339,6 +339,12 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i GETGY_empty_VYEY_xy(v0, v1, v2, 0); VMULWEVxy(D_W, v0, v1, v2); break; + case 0x29: + INST_NAME("VPCMPEQQ Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSEQxy(D, v0, v2, v1); + break; case 0x2B: INST_NAME("VPACKUSDW Gx, Vx, Ex"); nextop = F8; @@ -507,6 +513,12 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i GETGY_empty_VYEY_xy(v0, v1, v2, 0); XVPERM_W(v0, v2, v1); break; + case 0x37: + INST_NAME("VPCMPGTQ Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSLTxy(D, v0, v2, v1); + break; case 0x38: INST_NAME("VPMINSB Gx, Vx, Ex"); nextop = F8; @@ -872,7 +884,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i q0 = fpu_get_scratch(dyn); VFMADDxyxw(q0, v0, v2, v1); VFMSUBxyxw(v0, v0, v2, v1); - if(rex.w){ + if (rex.w) { VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1)); } else { VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1)); @@ -886,7 +898,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i q0 = fpu_get_scratch(dyn); VFMSUBxyxw(q0, v0, v2, v1); VFMADDxyxw(v0, v0, v2, v1); - if(rex.w){ + if (rex.w) { VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1)); } else { VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1)); @@ -956,7 +968,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i q0 = fpu_get_scratch(dyn); VFMADDxyxw(q0, v1, v0, v2); VFMSUBxyxw(v0, v1, v0, v2); - if(rex.w){ + if (rex.w) { VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1)); } else { VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1)); @@ -970,7 +982,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i q0 = fpu_get_scratch(dyn); VFMSUBxyxw(q0, v1, v0, v2); VFMADDxyxw(v0, v1, v0, v2); - if(rex.w){ + if (rex.w) { VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1)); } else { VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1)); @@ -1040,7 +1052,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i q0 = fpu_get_scratch(dyn); VFMADDxyxw(q0, v1, v2, v0); VFMSUBxyxw(v0, v1, v2, v0); - if(rex.w){ + if (rex.w) { VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1)); } else { VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1)); @@ -1054,7 +1066,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i q0 = fpu_get_scratch(dyn); VFMSUBxyxw(q0, v1, v2, v0); VFMADDxyxw(v0, v1, v2, v0); - if(rex.w){ + if (rex.w) { VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1)); } else { VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1)); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 0c49824b..aa5399b9 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -3295,4 +3295,14 @@ LSX instruction starts with V, LASX instruction starts with XV. VSEQ_##width(vd, vj, vk); \ } \ } while (0) -#endif //__ARM64_EMITTER_H__ + +#define VSLTxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSLT_##width(vd, vj, vk); \ + } else { \ + VSLT_##width(vd, vj, vk); \ + } \ + } while (0) + +#endif //__LA64_EMITTER_H__ |