diff options
| author | phorcys <phorcys@126.com> | 2025-07-29 15:55:40 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-29 09:55:40 +0200 |
| commit | 0734efea394ad1086f33a5e01df6aab900f01b19 (patch) | |
| tree | 32a2b6aedc4ea1258a43f908bad5739634adce1f | |
| parent | f43a221ffca63e96e33301148df240a91df0d3c2 (diff) | |
| download | box64-0734efea394ad1086f33a5e01df6aab900f01b19.tar.gz box64-0734efea394ad1086f33a5e01df6aab900f01b19.zip | |
[LA64_DYNAREC] Add la64 avx float cmp ops. (#2854)
VCMPPD,VCMPPS,VCMPSD,VCMPSS
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_0f.c | 24 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 24 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f2_0f.c | 44 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f3_0f.c | 49 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 8 |
5 files changed, 132 insertions, 17 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c index f9211fbe..c0302d45 100644 --- a/src/dynarec/la64/dynarec_la64_avx_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_0f.c @@ -472,6 +472,30 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in DEFAULT; } break; + case 0xC2: + INST_NAME("VCMPPS Gx, Vx, Ex, Ib"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 1); + u8 = F8; + switch (u8 & 0xf) { + case 0x00: VFCMPxy(S, v0, v1, v2, cEQ); break; // Equal, not unordered + case 0x01: VFCMPxy(S, v0, v1, v2, cLT); break; // Less than + case 0x02: VFCMPxy(S, v0, v1, v2, cLE); break; // Less or equal + case 0x03: VFCMPxy(S, v0, v1, v2, cUN); break; // unordered + case 0x04: VFCMPxy(S, v0, v1, v2, cUNE); break; // Not Equal (or unordered on ARM, not on X86...) + case 0x05: VFCMPxy(S, v0, v2, v1, cULE); break; // Greater or equal or unordered + case 0x06: VFCMPxy(S, v0, v2, v1, cULT); break; // Greater or unordered + case 0x07: VFCMPxy(S, v0, v1, v2, cOR); break; // Greater or unordered + case 0x08: VFCMPxy(S, v0, v1, v2, cUEQ); break; // Equal, or unordered + case 0x09: VFCMPxy(S, v0, v1, v2, cULT); break; // Less than or unordered + case 0x0a: VFCMPxy(S, v0, v1, v2, cULE); break; // Less or equal or unordered + case 0x0b: XVXOR_V(v0, v0, v0); break; // false + case 0x0c: VFCMPxy(S, v0, v1, v2, cNE); break; // Not Eual, ordered + case 0x0d: VFCMPxy(S, v0, v2, v1, cLE); break; // Greater or Equal ordered + case 0x0e: VFCMPxy(S, v0, v2, v1, cLT); break; // Greater ordered + case 0x0f: VSEQxy(B, v0, v1, v1); break; // true + } + break; case 0xC6: INST_NAME("VSHUFPS Gx, Vx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index 6ae03e3c..977419e1 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -730,6 +730,30 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; + case 0xC2: + INST_NAME("VCMPPD Gx, Vx, Ex, Ib"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 1); + u8 = F8; + switch (u8 & 0xf) { + case 0x00: VFCMPxy(D, v0, v1, v2, cEQ); break; // Equal, not unordered + case 0x01: VFCMPxy(D, v0, v1, v2, cLT); break; // Less than + case 0x02: VFCMPxy(D, v0, v1, v2, cLE); break; // Less or equal + case 0x03: VFCMPxy(D, v0, v1, v2, cUN); break; // unordered + case 0x04: VFCMPxy(D, v0, v1, v2, cUNE); break; // Not Equal (or unordered on ARM, not on X86...) + case 0x05: VFCMPxy(D, v0, v2, v1, cULE); break; // Greater or equal or unordered + case 0x06: VFCMPxy(D, v0, v2, v1, cULT); break; // Greater or unordered + case 0x07: VFCMPxy(D, v0, v1, v2, cOR); break; // Greater or unordered + case 0x08: VFCMPxy(D, v0, v1, v2, cUEQ); break; // Equal, or unordered + case 0x09: VFCMPxy(D, v0, v1, v2, cULT); break; // Less than or unordered + case 0x0a: VFCMPxy(D, v0, v1, v2, cULE); break; // Less or equal or unordered + case 0x0b: XVXOR_V(v0, v0, v0); break; // false + case 0x0c: VFCMPxy(D, v0, v1, v2, cNE); break; // Not Eual, ordered + case 0x0d: VFCMPxy(D, v0, v2, v1, cLE); break; // Greater or Equal ordered + case 0x0e: VFCMPxy(D, v0, v2, v1, cLT); break; // Greater ordered + case 0x0f: VSEQxy(B, v0, v1, v1); break; // true + } + break; case 0xC6: INST_NAME("VSHUFPD Gx, Vx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c index 84cbe5e3..afe0b086 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c @@ -131,7 +131,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, BCEQZ(fcc0, 4 + 4); FNEG_D(d1, d1); } - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d1, 0); break; case 0x58: @@ -150,7 +150,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_D(d0, d0); } MARK; - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d0, 0); break; case 0x59: @@ -169,7 +169,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_D(d0, d0); } MARK; - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d0, 0); break; case 0x5C: @@ -188,7 +188,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_D(d0, d0); } MARK; - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d0, 0); break; case 0x5D: @@ -204,7 +204,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FCMP_D(fcc0, v2, v1, cULE); FSEL(q0, v1, v2, fcc0); } - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, q0, 0); break; case 0x5E: @@ -223,7 +223,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_D(d0, d0); } MARK; - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d0, 0); break; case 0x5F: @@ -239,7 +239,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FCMP_D(fcc0, v2, v1, cLT); FSEL(q0, v2, v1, fcc0); } - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, q0, 0); break; case 0x70: @@ -298,6 +298,36 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VBITSEL_Vxy(v0, v0, d1, d0); } break; + case 0xC2: + INST_NAME("VCMPSD Gx, Vx, Ex, Ib"); + nextop = F8; + GETVYx(v1, 0); + GETEYSD(v2, 0, 1); + GETGYx_empty(v0); + q0 = fpu_get_scratch(dyn); + u8 = F8; + switch (u8 & 0xf) { + case 0x00: VFCMP_D(q0, v1, v2, cEQ); break; // Equal, not unordered + case 0x01: VFCMP_D(q0, v1, v2, cLT); break; // Less than + case 0x02: VFCMP_D(q0, v1, v2, cLE); break; // Less or equal + case 0x03: VFCMP_D(q0, v1, v2, cUN); break; // unordered + case 0x04: VFCMP_D(q0, v1, v2, cUNE); break; // Not Equal (or unordered on ARM, not on X86...) + case 0x05: VFCMP_D(q0, v2, v1, cULE); break; // Greater or equal or unordered + case 0x06: VFCMP_D(q0, v2, v1, cULT); break; // Greater or unordered + case 0x07: VFCMP_D(q0, v1, v2, cOR); break; // Greater or unordered + case 0x08: VFCMP_D(q0, v1, v2, cUEQ); break; // Equal, or unordered + case 0x09: VFCMP_D(q0, v1, v2, cULT); break; // Less than or unordered + case 0x0a: VFCMP_D(q0, v1, v2, cULE); break; // Less or equal or unordered + case 0x0b: VXOR_V(q0, q0, q0); break; // false + case 0x0c: VFCMP_D(q0, v1, v2, cNE); break; // Not Eual, ordered + case 0x0d: VFCMP_D(q0, v2, v1, cLE); break; // Greater or Equal ordered + case 0x0e: VFCMP_D(q0, v2, v1, cLT); break; // Greater ordered + case 0x0f: VSEQ_B(q0, v1, v1); break; // true + } + XVXOR_V(v0, v0, v0); + XVINSVE0_D(v0, q0, 0); + YMM_UNMARK_UPPER_ZERO(v0); + break; case 0xD0: INST_NAME("VADDSUBPS Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c index a61dcbb7..ebe56672 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c @@ -115,7 +115,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, BCEQZ(fcc0, 4 + 4); FNEG_S(d1, d1); } - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d1, 0); break; case 0x52: @@ -130,7 +130,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, } else { FRSQRT_S(d0, v1); } - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; case 0x53: @@ -145,7 +145,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, } else { FRECIP_S(d0, v1); } - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; case 0x58: @@ -164,7 +164,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_S(d0, d0); } MARK; - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; case 0x59: @@ -183,7 +183,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_S(d0, d0); } MARK; - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; case 0x5C: @@ -202,7 +202,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_S(d0, d0); } MARK; - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; case 0x5D: @@ -218,7 +218,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FCMP_S(fcc0, v2, v1, cULE); FSEL(q0, v1, v2, fcc0); } - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, q0, 0); break; case 0x5E: @@ -237,7 +237,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_S(d0, d0); } MARK; - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; case 0x5F: @@ -253,7 +253,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FCMP_S(fcc0, v2, v1, cLT); FSEL(q0, v2, v1, fcc0); } - if(v0 != v1) VOR_V(v0, v1, v1); + if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, q0, 0); break; case 0x6F: @@ -320,7 +320,36 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; - + case 0xC2: + INST_NAME("VCMPSS Gx, Vx, Ex, Ib"); + nextop = F8; + GETVYx(v1, 0); + GETEYSS(v2, 0, 1); + GETGYx_empty(v0); + q0 = fpu_get_scratch(dyn); + u8 = F8; + switch (u8 & 0xf) { + case 0x00: VFCMP_S(q0, v1, v2, cEQ); break; // Equal, not unordered + case 0x01: VFCMP_S(q0, v1, v2, cLT); break; // Less than + case 0x02: VFCMP_S(q0, v1, v2, cLE); break; // Less or equal + case 0x03: VFCMP_S(q0, v1, v2, cUN); break; // unordered + case 0x04: VFCMP_S(q0, v1, v2, cUNE); break; // Not Equal (or unordered on ARM, not on X86...) + case 0x05: VFCMP_S(q0, v2, v1, cULE); break; // Greater or equal or unordered + case 0x06: VFCMP_S(q0, v2, v1, cULT); break; // Greater or unordered + case 0x07: VFCMP_S(q0, v1, v2, cOR); break; // Greater or unordered + case 0x08: VFCMP_S(q0, v1, v2, cUEQ); break; // Equal, or unordered + case 0x09: VFCMP_S(q0, v1, v2, cULT); break; // Less than or unordered + case 0x0a: VFCMP_S(q0, v1, v2, cULE); break; // Less or equal or unordered + case 0x0b: VXOR_V(q0, q0, q0); break; // false + case 0x0c: VFCMP_S(q0, v1, v2, cNE); break; // Not Eual, ordered + case 0x0d: VFCMP_S(q0, v2, v1, cLE); break; // Greater or Equal ordered + case 0x0e: VFCMP_S(q0, v2, v1, cLT); break; // Greater ordered + case 0x0f: VSEQ_B(q0, v1, v1); break; // true + } + XVXOR_V(v0, v0, v0); + XVINSVE0_W(v0, q0, 0); + YMM_UNMARK_UPPER_ZERO(v0); + break; default: DEFAULT; } diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index b6fe7b65..0c49824b 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -3287,4 +3287,12 @@ LSX instruction starts with V, LASX instruction starts with XV. } \ } while (0) +#define VSEQxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSEQ_##width(vd, vj, vk); \ + } else { \ + VSEQ_##width(vd, vj, vk); \ + } \ + } while (0) #endif //__ARM64_EMITTER_H__ |