diff options
| author | phorcys <phorcys@126.com> | 2025-07-29 15:08:24 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-29 09:08:24 +0200 |
| commit | f43a221ffca63e96e33301148df240a91df0d3c2 (patch) | |
| tree | 90d7fd67d372a8056fd092585ec084bd9249d975 /src | |
| parent | 397399fd30cc8ebe2c45440a2afc8e7c6c0f80f4 (diff) | |
| download | box64-f43a221ffca63e96e33301148df240a91df0d3c2.tar.gz box64-f43a221ffca63e96e33301148df240a91df0d3c2.zip | |
[LA64_DYNAREC] Add la64 avx float ops part 3. (#2845)
* add cpuext.frecipe for LoongArch V1.1
* Fix VFRSQRTE in sse op RSQRTPS/RSQRTSS
* Fix VFRECIPE in sse op RCPPS/RCPSS
* V{MAX,MIN}{PD,PS,SD,SS}
* VRCPPS,VRCPSS
* VRSQRTPS,VRSQRTSS
* VSQRT{PD,PS,SD,SS}Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 12 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_0f.c | 63 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 43 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f2_0f.c | 58 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f3_0f.c | 88 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f30f.c | 18 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 92 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 64 | ||||
| -rw-r--r-- | src/include/hostext.h | 1 | ||||
| -rw-r--r-- | src/os/hostext_common.c | 2 | ||||
| -rw-r--r-- | src/os/hostext_linux.c | 1 |
11 files changed, 412 insertions, 30 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index bbe42e3c..81eac050 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -790,7 +790,11 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SKIPTEST(x1); GETEX(q0, 0, 0); GETGX_empty(q1); - VFRSQRT_S(q1, q0); + if(cpuext.frecipe){ + VFRSQRTE_S(q1, q0); + } else { + VFRSQRT_S(q1, q0); + } break; case 0x53: INST_NAME("RCPPS Gx, Ex"); @@ -798,7 +802,11 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SKIPTEST(x1); GETEX(q0, 0, 0); GETGX_empty(q1); - // TODO: use v1.1 vfrecipe when possible + if(cpuext.frecipe){ + VFRECIPE_S(q1, q0); + }else{ + VFRECIP_S(q1, q0); + } VFRECIP_S(q1, q0); break; case 0x54: diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c index 01ae9f5b..f9211fbe 100644 --- a/src/dynarec/la64/dynarec_la64_avx_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_0f.c @@ -245,6 +245,43 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in VPICKVE2GR_DU(gd, d1, 0); } break; + case 0x51: + INST_NAME("VSQRTPS Gx, Ex"); + nextop = F8; + GETGY_empty_EY_xy(v0, v1, 0); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFCMPxy(S, d0, v1, v1, cEQ); + VFSQRTxy(S, v0, v1); + VFCMPxy(S, d1, v0, v0, cEQ); + VANDN_Vxy(d1, d1, d0); + VSLLIxy(W, d1, d1, 31); + VOR_Vxy(v0, v0, d1); + } else { + VFSQRTxy(S, v0, v1); + } + break; + case 0x52: + INST_NAME("VRSQRTPS Gx, Ex"); + nextop = F8; + GETGY_empty_EY_xy(v0, v1, 0); + if (cpuext.frecipe) { + VFRSQRTExy(S, v0, v1); + } else { + VFRSQRTxy(S, v0, v1); + } + break; + case 0x53: + INST_NAME("VRCPPS Gx, Ex"); + nextop = F8; + GETGY_empty_EY_xy(v0, v1, 0); + if (cpuext.frecipe) { + VFRECIPExy(S, v0, v1); + } else { + VFRECIPxy(S, v0, v1); + } + break; case 0x54: INST_NAME("VANDPS Gx, Vx, Ex"); nextop = F8; @@ -323,6 +360,19 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in VBITSEL_Vxy(v0, v0, d1, d0); } break; + case 0x5D: + INST_NAME("VMINPS Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + if (BOX64ENV(dynarec_fastnan)) { + VFMINxy(S, v0, v2, v1); + } else { + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + VFCMPxy(S, q0, v2, v1, cULE); + VBITSEL_Vxy(v0, v1, v2, q0); + } + break; case 0x5E: INST_NAME("VDIVPS Gx, Vx, Ex"); nextop = F8; @@ -341,6 +391,19 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in VBITSEL_Vxy(v0, v0, d1, d0); } break; + case 0x5F: + INST_NAME("VMAXPS Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + if (BOX64ENV(dynarec_fastnan)) { + VFMAXxy(S, v0, v2, v1); + } else { + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + VFCMPxy(S, q0, v2, v1, cLT); + VBITSEL_Vxy(v0, v2, v1, q0); + } + break; case 0x77: if (!vex.l) { INST_NAME("VZEROUPPER"); diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index 4217a713..6ae03e3c 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -239,6 +239,23 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VPICKVE2GR_DU(gd, d1, 0); } break; + case 0x51: + INST_NAME("VSQRTPD Gx, Ex"); + nextop = F8; + GETGY_empty_EY_xy(v0, v1, 0); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFCMPxy(D, d0, v1, v1, cEQ); + VFSQRTxy(D, v0, v1); + VFCMPxy(D, d1, v0, v0, cEQ); + VANDN_Vxy(d1, d1, d0); + VSLLIxy(D, d1, d1, 63); + VOR_Vxy(v0, v0, d1); + } else { + VFSQRTxy(D, v0, v1); + } + break; case 0x54: INST_NAME("VANDPD Gx, Vx, Ex"); nextop = F8; @@ -317,6 +334,19 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VBITSEL_Vxy(v0, v0, d1, d0); } break; + case 0x5D: + INST_NAME("VMINPD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + if (BOX64ENV(dynarec_fastnan)) { + VFMINxy(D, v0, v2, v1); + } else { + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + VFCMPxy(D, q0, v2, v1, cULE); + VBITSEL_Vxy(v0, v1, v2, q0); + } + break; case 0x5E: INST_NAME("VDIVPD Gx, Vx, Ex"); nextop = F8; @@ -335,6 +365,19 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VBITSEL_Vxy(v0, v0, d1, d0); } break; + case 0x5F: + INST_NAME("VMAXPD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + if (BOX64ENV(dynarec_fastnan)) { + VFMAXxy(D, v0, v2, v1); + } else { + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + VFCMPxy(D, q0, v2, v1, cLT); + VBITSEL_Vxy(v0, v2, v1, q0); + } + break; case 0x60: INST_NAME("VPUNPCKLBW Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c index 2f492c85..84cbe5e3 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c @@ -116,6 +116,24 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VREPLVE_D(q0, q1, 0); } break; + case 0x51: + INST_NAME("VSQRTSD Gx, Vx, Ex"); + nextop = F8; + GETVYx(v1, 0); + GETEYSD(v2, 0, 0); + GETGYx_empty(v0); + d1 = fpu_get_scratch(dyn); + FSQRT_D(d1, v2); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + VXOR_V(d0, d0, d0); + FCMP_D(fcc0, v2, d0, cLT); + BCEQZ(fcc0, 4 + 4); + FNEG_D(d1, d1); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_D(v0, d1, 0); + break; case 0x58: INST_NAME("VADDSD Gx, Vx, Ex"); nextop = F8; @@ -132,7 +150,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_D(d0, d0); } MARK; - VOR_V(v0, v1, v1); + if(v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d0, 0); break; case 0x59: @@ -151,7 +169,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_D(d0, d0); } MARK; - VOR_V(v0, v1, v1); + if(v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d0, 0); break; case 0x5C: @@ -170,9 +188,25 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_D(d0, d0); } MARK; - VOR_V(v0, v1, v1); + if(v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d0, 0); break; + case 0x5D: + INST_NAME("VMINSD Gx, Vx, Ex"); + nextop = F8; + GETVYx(v1, 1); + GETEYSD(v2, 0, 0); + GETGYx_empty(v0); + q0 = fpu_get_scratch(dyn); + if (BOX64ENV(dynarec_fastnan)) { + FMIN_D(q0, v1, v2); + } else { + FCMP_D(fcc0, v2, v1, cULE); + FSEL(q0, v1, v2, fcc0); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_D(v0, q0, 0); + break; case 0x5E: INST_NAME("VDIVSD Gx, Vx, Ex"); nextop = F8; @@ -189,9 +223,25 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_D(d0, d0); } MARK; - VOR_V(v0, v1, v1); + if(v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d0, 0); break; + case 0x5F: + INST_NAME("VMAXSD Gx, Vx, Ex"); + nextop = F8; + GETVYx(v1, 1); + GETEYSD(v2, 0, 0); + GETGYx_empty(v0); + q0 = fpu_get_scratch(dyn); + if (BOX64ENV(dynarec_fastnan)) { + FMAX_D(q0, v1, v2); + } else { + FCMP_D(fcc0, v2, v1, cLT); + FSEL(q0, v2, v1, fcc0); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_D(v0, q0, 0); + break; case 0x70: INST_NAME("VPSHUFLW Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c index f2ea3acd..a61dcbb7 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c @@ -100,6 +100,54 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VPACKOD_W(q0, q1, q1); } break; + case 0x51: + INST_NAME("VSQRTSS Gx, Vx, Ex"); + nextop = F8; + GETVYx(v1, 0); + GETEYSS(v2, 0, 0); + GETGYx_empty(v0); + d1 = fpu_get_scratch(dyn); + FSQRT_S(d1, v2); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + VXOR_V(d0, d0, d0); + FCMP_S(fcc0, v2, d0, cLT); + BCEQZ(fcc0, 4 + 4); + FNEG_S(d1, d1); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_W(v0, d1, 0); + break; + case 0x52: + INST_NAME("VRSQRTSS Gx, Vx, Ex"); + nextop = F8; + GETVYx(v1, 0); + GETEYSS(v2, 0, 0); + GETGYx_empty(v0); + d0 = fpu_get_scratch(dyn); + if (cpuext.frecipe) { + FRSQRTE_S(d0, v1); + } else { + FRSQRT_S(d0, v1); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_W(v0, d0, 0); + break; + case 0x53: + INST_NAME("VRCPSS Gx, Vx, Ex"); + nextop = F8; + GETVYx(v1, 0); + GETEYSS(v2, 0, 0); + GETGYx_empty(v0); + d0 = fpu_get_scratch(dyn); + if (cpuext.frecipe) { + FRECIPE_S(d0, v1); + } else { + FRECIP_S(d0, v1); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_W(v0, d0, 0); + break; case 0x58: INST_NAME("VADDSS Gx, Vx, Ex"); nextop = F8; @@ -116,7 +164,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_S(d0, d0); } MARK; - VOR_V(v0, v1, v1); + if(v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; case 0x59: @@ -135,7 +183,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_S(d0, d0); } MARK; - VOR_V(v0, v1, v1); + if(v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; case 0x5C: @@ -154,9 +202,25 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_S(d0, d0); } MARK; - VOR_V(v0, v1, v1); + if(v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; + case 0x5D: + INST_NAME("VMINSS Gx, Vx, Ex"); + nextop = F8; + GETVYx(v1, 1); + GETEYSS(v2, 0, 0); + GETGYx_empty(v0); + q0 = fpu_get_scratch(dyn); + if (BOX64ENV(dynarec_fastnan)) { + FMIN_S(q0, v1, v2); + } else { + FCMP_S(fcc0, v2, v1, cULE); + FSEL(q0, v1, v2, fcc0); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_W(v0, q0, 0); + break; case 0x5E: INST_NAME("VDIVSS Gx, Vx, Ex"); nextop = F8; @@ -173,9 +237,25 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, FNEG_S(d0, d0); } MARK; - VOR_V(v0, v1, v1); + if(v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; + case 0x5F: + INST_NAME("VMAXSS Gx, Vx, Ex"); + nextop = F8; + GETVYx(v1, 1); + GETEYSS(v2, 0, 0); + GETGYx_empty(v0); + q0 = fpu_get_scratch(dyn); + if (BOX64ENV(dynarec_fastnan)) { + FMAX_S(q0, v1, v2); + } else { + FCMP_S(fcc0, v2, v1, cLT); + FSEL(q0, v2, v1, fcc0); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_W(v0, q0, 0); + break; case 0x6F: INST_NAME("VMOVDQU Gx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c index b9b028b7..a09f4b2d 100644 --- a/src/dynarec/la64/dynarec_la64_f30f.c +++ b/src/dynarec/la64/dynarec_la64_f30f.c @@ -188,11 +188,11 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(v0, 1); GETEXSS(v1, 0, 0); q0 = fpu_get_scratch(dyn); - q1 = fpu_get_scratch(dyn); - LU12I_W(x3, 0x3f800); // 1.0f - MOVGR2FR_W(q0, x3); - FSQRT_S(q1, v1); - FDIV_S(q0, q0, q1); + if(cpuext.frecipe){ + FRSQRTE_S(q0, v1); + }else{ + FRSQRT_S(q0, v1); + } VEXTRINS_W(v0, q0, 0); break; case 0x53: @@ -201,9 +201,11 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(v0, 1); GETEXSS(v1, 0, 0); d1 = fpu_get_scratch(dyn); - LU12I_W(x3, 0x3f800); // 1.0f - MOVGR2FR_W(d1, x3); - FDIV_S(d1, d1, v1); + if(cpuext.frecipe){ + FRECIPE_S(d1, v1); + }else{ + FRECIP_S(d1, v1); + } VEXTRINS_W(v0, d1, 0); break; case 0x58: diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 57feffcf..b6fe7b65 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -1266,8 +1266,12 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VFSQRT_D(vd, vj) EMIT(type_2R(0b0111001010011100111010, vj, vd)) #define VFRECIP_S(vd, vj) EMIT(type_2R(0b0111001010011100111101, vj, vd)) #define VFRECIP_D(vd, vj) EMIT(type_2R(0b0111001010011100111110, vj, vd)) +#define VFRECIPE_S(vd, vj) EMIT(type_2R(0b0111001010011101000101, vj, vd)) +#define VFRECIPE_D(vd, vj) EMIT(type_2R(0b0111001010011101000110, vj, vd)) #define VFRSQRT_S(vd, vj) EMIT(type_2R(0b0111001010011101000001, vj, vd)) #define VFRSQRT_D(vd, vj) EMIT(type_2R(0b0111001010011101000010, vj, vd)) +#define VFRSQRTE_S(vd, vj) EMIT(type_2R(0b0111001010011101001001, vj, vd)) +#define VFRSQRTE_D(vd, vj) EMIT(type_2R(0b0111001010011101001010, vj, vd)) #define VFCVTL_S_H(vd, vj) EMIT(type_2R(0b0111001010011101111010, vj, vd)) #define VFCVTH_S_H(vd, vj) EMIT(type_2R(0b0111001010011101111011, vj, vd)) #define VFCVTL_D_S(vd, vj) EMIT(type_2R(0b0111001010011101111100, vj, vd)) @@ -3206,17 +3210,81 @@ LSX instruction starts with V, LASX instruction starts with XV. } \ } while (0) -#define VREPLVEIxy(width, vd, vj, imm) \ - do { \ - if (vex.l) { \ - if (imm > 0) { \ - ADDI_D(x5, xZR, imm); \ - XVREPLVE_##width(vd, vj, x5); \ - } else { \ - XVREPLVE0_##width(vd, vj); \ - } \ - } else { \ - VREPLVEI_##width(vd, vj, imm); \ - } \ +#define VFRECIPxy(width, vd, vj) \ + do { \ + if (vex.l) { \ + XVFRECIP_##width(vd, vj); \ + } else { \ + VFRECIP_##width(vd, vj); \ + } \ + } while (0) + +#define VFRECIPExy(width, vd, vj) \ + do { \ + if (vex.l) { \ + XVFRECIPE_##width(vd, vj); \ + } else { \ + VFRECIPE_##width(vd, vj); \ + } \ + } while (0) + +#define VFRSQRTxy(width, vd, vj) \ + do { \ + if (vex.l) { \ + XVFRSQRT_##width(vd, vj); \ + } else { \ + VFRSQRT_##width(vd, vj); \ + } \ + } while (0) + +#define VFRSQRTExy(width, vd, vj) \ + do { \ + if (vex.l) { \ + XVFRSQRTE_##width(vd, vj); \ + } else { \ + VFRSQRTE_##width(vd, vj); \ + } \ + } while (0) + +#define VFSQRTxy(width, vd, vj) \ + do { \ + if (vex.l) { \ + XVFSQRT_##width(vd, vj); \ + } else { \ + VFSQRT_##width(vd, vj); \ + } \ + } while (0) + +#define VFMAXxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVFMAX_##width(vd, vj, vk); \ + } else { \ + VFMAX_##width(vd, vj, vk); \ + } \ } while (0) + +#define VFMINxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVFMIN_##width(vd, vj, vk); \ + } else { \ + VFMIN_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VREPLVEIxy(width, vd, vj, imm) \ + do { \ + if (vex.l) { \ + if (imm > 0) { \ + ADDI_D(x5, xZR, imm); \ + XVREPLVE_##width(vd, vj, x5); \ + } else { \ + XVREPLVE0_##width(vd, vj); \ + } \ + } else { \ + VREPLVEI_##width(vd, vj, imm); \ + } \ + } while (0) + #endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 39d4c612..99c396cf 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -7556,6 +7556,70 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.D", XVt[Rd], Xt[Rj]); return buff; } + if (isMask(opcode, "0000000100010100010101jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRECIP.S", Ft[Rd], Ft[Rj]); + return buff; + } + if (isMask(opcode, "0000000100010100011101jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRECIPE.S", Ft[Rd], Ft[Rj]); + return buff; + } + if (isMask(opcode, "0000000100010100010110jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRECIP.D", Ft[Rd], Ft[Rj]); + return buff; + } + if (isMask(opcode, "0000000100010100011110jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRECIPE.D", Ft[Rd], Ft[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100111101jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRECIP.S", Vt[Rd], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100111110jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRECIP.D", Vt[Rd], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011101000101jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRECIPE.S", Vt[Rd], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011101000110jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRECIPE.D", Vt[Rd], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0000000100010100011001jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRSQRT.S", Ft[Rd], Ft[Rj]); + return buff; + } + if (isMask(opcode, "0000000100010100100001jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRSQRTE.S", Ft[Rd], Ft[Rj]); + return buff; + } + if (isMask(opcode, "0000000100010100011010jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRSQRT.D", Ft[Rd], Ft[Rj]); + return buff; + } + if (isMask(opcode, "0000000100010100100010jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRSQRTE.D", Ft[Rd], Ft[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011101000001jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRSQRT.S", Vt[Rd], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011101000010jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRSQRT.D", Vt[Rd], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011101001001jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRSQRTE.S", Vt[Rd], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011101001010jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRSQRTE.D", Vt[Rd], Vt[Rj]); + return buff; + } snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); return buff; } diff --git a/src/include/hostext.h b/src/include/hostext.h index 579e76ac..6c3b119e 100644 --- a/src/include/hostext.h +++ b/src/include/hostext.h @@ -41,6 +41,7 @@ typedef union cpu_ext_s { uint64_t lam_bh : 1; uint64_t lamcas : 1; uint64_t scq : 1; + uint64_t frecipe : 1; #endif }; uint64_t x; diff --git a/src/os/hostext_common.c b/src/os/hostext_common.c index 258e1af4..c167d2a7 100644 --- a/src/os/hostext_common.c +++ b/src/os/hostext_common.c @@ -39,6 +39,8 @@ void PrintHostCpuFeatures(void) printf_log_prefix(0, LOG_INFO, " LAMCAS"); if (cpuext.scq) printf_log_prefix(0, LOG_INFO, " SCQ"); + if (cpuext.frecipe) + printf_log_prefix(0, LOG_INFO, " FRECIP"); printf_log_prefix(0, LOG_INFO, "\n"); #elif defined(RV64) printf_log(LOG_INFO, "Dynarec for rv64g"); diff --git a/src/os/hostext_linux.c b/src/os/hostext_linux.c index a2a45db2..c202a33f 100644 --- a/src/os/hostext_linux.c +++ b/src/os/hostext_linux.c @@ -191,6 +191,7 @@ int DetectHostCpuFeatures(void) if (((cpucfg2 >> 6) & 0b11) != 3) return 0; // LSX/LASX must present cpuext.lbt = (cpucfg2 >> 18) & 0b1; + cpuext.frecipe = (cpucfg2 >> 25) & 0b1; cpuext.lam_bh = (cpucfg2 >> 27) & 0b1; cpuext.lamcas = (cpucfg2 >> 28) & 0b1; cpuext.scq = (cpucfg2 >> 30) & 0b1; |