about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-07-29 15:08:24 +0800
committerGitHub <noreply@github.com>2025-07-29 09:08:24 +0200
commitf43a221ffca63e96e33301148df240a91df0d3c2 (patch)
tree90d7fd67d372a8056fd092585ec084bd9249d975 /src
parent397399fd30cc8ebe2c45440a2afc8e7c6c0f80f4 (diff)
downloadbox64-f43a221ffca63e96e33301148df240a91df0d3c2.tar.gz
box64-f43a221ffca63e96e33301148df240a91df0d3c2.zip
[LA64_DYNAREC] Add la64 avx float ops part 3. (#2845)
*  add cpuext.frecipe for LoongArch V1.1
  *  Fix VFRSQRTE in sse op  RSQRTPS/RSQRTSS
  *  Fix VFRECIPE in sse op  RCPPS/RCPSS
  *  V{MAX,MIN}{PD,PS,SD,SS}
  *  VRCPPS,VRCPSS
  *  VRSQRTPS,VRSQRTSS
  *  VSQRT{PD,PS,SD,SS}
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c12
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_0f.c63
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c43
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f2_0f.c58
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f3_0f.c88
-rw-r--r--src/dynarec/la64/dynarec_la64_f30f.c18
-rw-r--r--src/dynarec/la64/la64_emitter.h92
-rw-r--r--src/dynarec/la64/la64_printer.c64
-rw-r--r--src/include/hostext.h1
-rw-r--r--src/os/hostext_common.c2
-rw-r--r--src/os/hostext_linux.c1
11 files changed, 412 insertions, 30 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index bbe42e3c..81eac050 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -790,7 +790,11 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SKIPTEST(x1);
             GETEX(q0, 0, 0);
             GETGX_empty(q1);
-            VFRSQRT_S(q1, q0);
+            if(cpuext.frecipe){
+                VFRSQRTE_S(q1, q0);
+            } else {
+                VFRSQRT_S(q1, q0);
+            }
             break;
         case 0x53:
             INST_NAME("RCPPS Gx, Ex");
@@ -798,7 +802,11 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SKIPTEST(x1);
             GETEX(q0, 0, 0);
             GETGX_empty(q1);
-            // TODO: use v1.1 vfrecipe when possible
+            if(cpuext.frecipe){
+                VFRECIPE_S(q1, q0);
+            }else{
+                VFRECIP_S(q1, q0);
+            }
             VFRECIP_S(q1, q0);
             break;
         case 0x54:
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c
index 01ae9f5b..f9211fbe 100644
--- a/src/dynarec/la64/dynarec_la64_avx_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_0f.c
@@ -245,6 +245,43 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                 VPICKVE2GR_DU(gd, d1, 0);
             }
             break;
+        case 0x51:
+            INST_NAME("VSQRTPS Gx, Ex");
+            nextop = F8;
+            GETGY_empty_EY_xy(v0, v1, 0);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                d0 = fpu_get_scratch(dyn);
+                d1 = fpu_get_scratch(dyn);
+                VFCMPxy(S, d0, v1, v1, cEQ);
+                VFSQRTxy(S, v0, v1);
+                VFCMPxy(S, d1, v0, v0, cEQ);
+                VANDN_Vxy(d1, d1, d0);
+                VSLLIxy(W, d1, d1, 31);
+                VOR_Vxy(v0, v0, d1);
+            } else {
+                VFSQRTxy(S, v0, v1);
+            }
+            break;
+        case 0x52:
+            INST_NAME("VRSQRTPS Gx, Ex");
+            nextop = F8;
+            GETGY_empty_EY_xy(v0, v1, 0);
+            if (cpuext.frecipe) {
+                VFRSQRTExy(S, v0, v1);
+            } else {
+                VFRSQRTxy(S, v0, v1);
+            }
+            break;
+        case 0x53:
+            INST_NAME("VRCPPS Gx, Ex");
+            nextop = F8;
+            GETGY_empty_EY_xy(v0, v1, 0);
+            if (cpuext.frecipe) {
+                VFRECIPExy(S, v0, v1);
+            } else {
+                VFRECIPxy(S, v0, v1);
+            }
+            break;
         case 0x54:
             INST_NAME("VANDPS Gx, Vx, Ex");
             nextop = F8;
@@ -323,6 +360,19 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                 VBITSEL_Vxy(v0, v0, d1, d0);
             }
             break;
+        case 0x5D:
+            INST_NAME("VMINPS Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            if (BOX64ENV(dynarec_fastnan)) {
+                VFMINxy(S, v0, v2, v1);
+            } else {
+                q0 = fpu_get_scratch(dyn);
+                q1 = fpu_get_scratch(dyn);
+                VFCMPxy(S, q0, v2, v1, cULE);
+                VBITSEL_Vxy(v0, v1, v2, q0);
+            }
+            break;
         case 0x5E:
             INST_NAME("VDIVPS Gx, Vx, Ex");
             nextop = F8;
@@ -341,6 +391,19 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                 VBITSEL_Vxy(v0, v0, d1, d0);
             }
             break;
+        case 0x5F:
+            INST_NAME("VMAXPS Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            if (BOX64ENV(dynarec_fastnan)) {
+                VFMAXxy(S, v0, v2, v1);
+            } else {
+                q0 = fpu_get_scratch(dyn);
+                q1 = fpu_get_scratch(dyn);
+                VFCMPxy(S, q0, v2, v1, cLT);
+                VBITSEL_Vxy(v0, v2, v1, q0);
+            }
+            break;
         case 0x77:
             if (!vex.l) {
                 INST_NAME("VZEROUPPER");
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 4217a713..6ae03e3c 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -239,6 +239,23 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VPICKVE2GR_DU(gd, d1, 0);
             }
             break;
+        case 0x51:
+            INST_NAME("VSQRTPD Gx, Ex");
+            nextop = F8;
+            GETGY_empty_EY_xy(v0, v1, 0);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                d0 = fpu_get_scratch(dyn);
+                d1 = fpu_get_scratch(dyn);
+                VFCMPxy(D, d0, v1, v1, cEQ);
+                VFSQRTxy(D, v0, v1);
+                VFCMPxy(D, d1, v0, v0, cEQ);
+                VANDN_Vxy(d1, d1, d0);
+                VSLLIxy(D, d1, d1, 63);
+                VOR_Vxy(v0, v0, d1);
+            } else {
+                VFSQRTxy(D, v0, v1);
+            }
+            break;
         case 0x54:
             INST_NAME("VANDPD Gx, Vx, Ex");
             nextop = F8;
@@ -317,6 +334,19 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VBITSEL_Vxy(v0, v0, d1, d0);
             }
             break;
+        case 0x5D:
+            INST_NAME("VMINPD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            if (BOX64ENV(dynarec_fastnan)) {
+                VFMINxy(D, v0, v2, v1);
+            } else {
+                q0 = fpu_get_scratch(dyn);
+                q1 = fpu_get_scratch(dyn);
+                VFCMPxy(D, q0, v2, v1, cULE);
+                VBITSEL_Vxy(v0, v1, v2, q0);
+            }
+            break;
         case 0x5E:
             INST_NAME("VDIVPD Gx, Vx, Ex");
             nextop = F8;
@@ -335,6 +365,19 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VBITSEL_Vxy(v0, v0, d1, d0);
             }
             break;
+        case 0x5F:
+            INST_NAME("VMAXPD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            if (BOX64ENV(dynarec_fastnan)) {
+                VFMAXxy(D, v0, v2, v1);
+            } else {
+                q0 = fpu_get_scratch(dyn);
+                q1 = fpu_get_scratch(dyn);
+                VFCMPxy(D, q0, v2, v1, cLT);
+                VBITSEL_Vxy(v0, v2, v1, q0);
+            }
+            break;
         case 0x60:
             INST_NAME("VPUNPCKLBW Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
index 2f492c85..84cbe5e3 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
@@ -116,6 +116,24 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VREPLVE_D(q0, q1, 0);
             }
             break;
+        case 0x51:
+            INST_NAME("VSQRTSD Gx, Vx, Ex");
+            nextop = F8;
+            GETVYx(v1, 0);
+            GETEYSD(v2, 0, 0);
+            GETGYx_empty(v0);
+            d1 = fpu_get_scratch(dyn);
+            FSQRT_D(d1, v2);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                d0 = fpu_get_scratch(dyn);
+                VXOR_V(d0, d0, d0);
+                FCMP_D(fcc0, v2, d0, cLT);
+                BCEQZ(fcc0, 4 + 4);
+                FNEG_D(d1, d1);
+            }
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VEXTRINS_D(v0, d1, 0);
+            break;
         case 0x58:
             INST_NAME("VADDSD Gx, Vx, Ex");
             nextop = F8;
@@ -132,7 +150,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_D(d0, d0);
             }
             MARK;
-            VOR_V(v0, v1, v1);
+            if(v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, d0, 0);
             break;
         case 0x59:
@@ -151,7 +169,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_D(d0, d0);
             }
             MARK;
-            VOR_V(v0, v1, v1);
+            if(v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, d0, 0);
             break;
         case 0x5C:
@@ -170,9 +188,25 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_D(d0, d0);
             }
             MARK;
-            VOR_V(v0, v1, v1);
+            if(v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, d0, 0);
             break;
+        case 0x5D:
+            INST_NAME("VMINSD Gx, Vx, Ex");
+            nextop = F8;
+            GETVYx(v1, 1);
+            GETEYSD(v2, 0, 0);
+            GETGYx_empty(v0);
+            q0 = fpu_get_scratch(dyn);
+            if (BOX64ENV(dynarec_fastnan)) {
+                FMIN_D(q0, v1, v2);
+            } else {
+                FCMP_D(fcc0, v2, v1, cULE);
+                FSEL(q0, v1, v2, fcc0);
+            }
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VEXTRINS_D(v0, q0, 0);
+            break;
         case 0x5E:
             INST_NAME("VDIVSD Gx, Vx, Ex");
             nextop = F8;
@@ -189,9 +223,25 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_D(d0, d0);
             }
             MARK;
-            VOR_V(v0, v1, v1);
+            if(v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, d0, 0);
             break;
+        case 0x5F:
+            INST_NAME("VMAXSD Gx, Vx, Ex");
+            nextop = F8;
+            GETVYx(v1, 1);
+            GETEYSD(v2, 0, 0);
+            GETGYx_empty(v0);
+            q0 = fpu_get_scratch(dyn);
+            if (BOX64ENV(dynarec_fastnan)) {
+                FMAX_D(q0, v1, v2);
+            } else {
+                FCMP_D(fcc0, v2, v1, cLT);
+                FSEL(q0, v2, v1, fcc0);
+            }
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VEXTRINS_D(v0, q0, 0);
+            break;
         case 0x70:
             INST_NAME("VPSHUFLW Gx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
index f2ea3acd..a61dcbb7 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
@@ -100,6 +100,54 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VPACKOD_W(q0, q1, q1);
             }
             break;
+        case 0x51:
+            INST_NAME("VSQRTSS Gx, Vx, Ex");
+            nextop = F8;
+            GETVYx(v1, 0);
+            GETEYSS(v2, 0, 0);
+            GETGYx_empty(v0);
+            d1 = fpu_get_scratch(dyn);
+            FSQRT_S(d1, v2);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                d0 = fpu_get_scratch(dyn);
+                VXOR_V(d0, d0, d0);
+                FCMP_S(fcc0, v2, d0, cLT);
+                BCEQZ(fcc0, 4 + 4);
+                FNEG_S(d1, d1);
+            }
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VEXTRINS_W(v0, d1, 0);
+            break;
+        case 0x52:
+            INST_NAME("VRSQRTSS Gx, Vx, Ex");
+            nextop = F8;
+            GETVYx(v1, 0);
+            GETEYSS(v2, 0, 0);
+            GETGYx_empty(v0);
+            d0 = fpu_get_scratch(dyn);
+            if (cpuext.frecipe) {
+                FRSQRTE_S(d0, v1);
+            } else {
+                FRSQRT_S(d0, v1);
+            }
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VEXTRINS_W(v0, d0, 0);
+            break;
+        case 0x53:
+            INST_NAME("VRCPSS Gx, Vx, Ex");
+            nextop = F8;
+            GETVYx(v1, 0);
+            GETEYSS(v2, 0, 0);
+            GETGYx_empty(v0);
+            d0 = fpu_get_scratch(dyn);
+            if (cpuext.frecipe) {
+                FRECIPE_S(d0, v1);
+            } else {
+                FRECIP_S(d0, v1);
+            }
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VEXTRINS_W(v0, d0, 0);
+            break;
         case 0x58:
             INST_NAME("VADDSS Gx, Vx, Ex");
             nextop = F8;
@@ -116,7 +164,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_S(d0, d0);
             }
             MARK;
-            VOR_V(v0, v1, v1);
+            if(v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
         case 0x59:
@@ -135,7 +183,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_S(d0, d0);
             }
             MARK;
-            VOR_V(v0, v1, v1);
+            if(v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
         case 0x5C:
@@ -154,9 +202,25 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_S(d0, d0);
             }
             MARK;
-            VOR_V(v0, v1, v1);
+            if(v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
+        case 0x5D:
+            INST_NAME("VMINSS Gx, Vx, Ex");
+            nextop = F8;
+            GETVYx(v1, 1);
+            GETEYSS(v2, 0, 0);
+            GETGYx_empty(v0);
+            q0 = fpu_get_scratch(dyn);
+            if (BOX64ENV(dynarec_fastnan)) {
+                FMIN_S(q0, v1, v2);
+            } else {
+                FCMP_S(fcc0, v2, v1, cULE);
+                FSEL(q0, v1, v2, fcc0);
+            }
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VEXTRINS_W(v0, q0, 0);
+            break;
         case 0x5E:
             INST_NAME("VDIVSS Gx, Vx, Ex");
             nextop = F8;
@@ -173,9 +237,25 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_S(d0, d0);
             }
             MARK;
-            VOR_V(v0, v1, v1);
+            if(v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
+        case 0x5F:
+            INST_NAME("VMAXSS Gx, Vx, Ex");
+            nextop = F8;
+            GETVYx(v1, 1);
+            GETEYSS(v2, 0, 0);
+            GETGYx_empty(v0);
+            q0 = fpu_get_scratch(dyn);
+            if (BOX64ENV(dynarec_fastnan)) {
+                FMAX_S(q0, v1, v2);
+            } else {
+                FCMP_S(fcc0, v2, v1, cLT);
+                FSEL(q0, v2, v1, fcc0);
+            }
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VEXTRINS_W(v0, q0, 0);
+            break;
         case 0x6F:
             INST_NAME("VMOVDQU Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c
index b9b028b7..a09f4b2d 100644
--- a/src/dynarec/la64/dynarec_la64_f30f.c
+++ b/src/dynarec/la64/dynarec_la64_f30f.c
@@ -188,11 +188,11 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGX(v0, 1);
             GETEXSS(v1, 0, 0);
             q0 = fpu_get_scratch(dyn);
-            q1 = fpu_get_scratch(dyn);
-            LU12I_W(x3, 0x3f800); // 1.0f
-            MOVGR2FR_W(q0, x3);
-            FSQRT_S(q1, v1);
-            FDIV_S(q0, q0, q1);
+            if(cpuext.frecipe){
+                FRSQRTE_S(q0, v1);
+            }else{
+                FRSQRT_S(q0, v1);
+            }
             VEXTRINS_W(v0, q0, 0);
             break;
         case 0x53:
@@ -201,9 +201,11 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGX(v0, 1);
             GETEXSS(v1, 0, 0);
             d1 = fpu_get_scratch(dyn);
-            LU12I_W(x3, 0x3f800); // 1.0f
-            MOVGR2FR_W(d1, x3);
-            FDIV_S(d1, d1, v1);
+            if(cpuext.frecipe){
+                FRECIPE_S(d1, v1);
+            }else{
+                FRECIP_S(d1, v1);
+            }
             VEXTRINS_W(v0, d1, 0);
             break;
         case 0x58:
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 57feffcf..b6fe7b65 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -1266,8 +1266,12 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VFSQRT_D(vd, vj)             EMIT(type_2R(0b0111001010011100111010, vj, vd))
 #define VFRECIP_S(vd, vj)            EMIT(type_2R(0b0111001010011100111101, vj, vd))
 #define VFRECIP_D(vd, vj)            EMIT(type_2R(0b0111001010011100111110, vj, vd))
+#define VFRECIPE_S(vd, vj)           EMIT(type_2R(0b0111001010011101000101, vj, vd))
+#define VFRECIPE_D(vd, vj)           EMIT(type_2R(0b0111001010011101000110, vj, vd))
 #define VFRSQRT_S(vd, vj)            EMIT(type_2R(0b0111001010011101000001, vj, vd))
 #define VFRSQRT_D(vd, vj)            EMIT(type_2R(0b0111001010011101000010, vj, vd))
+#define VFRSQRTE_S(vd, vj)           EMIT(type_2R(0b0111001010011101001001, vj, vd))
+#define VFRSQRTE_D(vd, vj)           EMIT(type_2R(0b0111001010011101001010, vj, vd))
 #define VFCVTL_S_H(vd, vj)           EMIT(type_2R(0b0111001010011101111010, vj, vd))
 #define VFCVTH_S_H(vd, vj)           EMIT(type_2R(0b0111001010011101111011, vj, vd))
 #define VFCVTL_D_S(vd, vj)           EMIT(type_2R(0b0111001010011101111100, vj, vd))
@@ -3206,17 +3210,81 @@ LSX instruction starts with V, LASX instruction starts with XV.
         }                               \
     } while (0)
 
-#define VREPLVEIxy(width, vd, vj, imm)         \
-    do {                                       \
-        if (vex.l) {                           \
-            if (imm > 0) {                     \
-                ADDI_D(x5, xZR, imm);          \
-                XVREPLVE_##width(vd, vj, x5);  \
-            } else {                           \
-                XVREPLVE0_##width(vd, vj); \
-            }                                  \
-        } else {                               \
-            VREPLVEI_##width(vd, vj, imm);     \
-        }                                      \
+#define VFRECIPxy(width, vd, vj)      \
+    do {                              \
+        if (vex.l) {                  \
+            XVFRECIP_##width(vd, vj); \
+        } else {                      \
+            VFRECIP_##width(vd, vj);  \
+        }                             \
+    } while (0)
+
+#define VFRECIPExy(width, vd, vj)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVFRECIPE_##width(vd, vj); \
+        } else {                       \
+            VFRECIPE_##width(vd, vj);  \
+        }                              \
+    } while (0)
+
+#define VFRSQRTxy(width, vd, vj)      \
+    do {                              \
+        if (vex.l) {                  \
+            XVFRSQRT_##width(vd, vj); \
+        } else {                      \
+            VFRSQRT_##width(vd, vj);  \
+        }                             \
+    } while (0)
+
+#define VFRSQRTExy(width, vd, vj)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVFRSQRTE_##width(vd, vj); \
+        } else {                       \
+            VFRSQRTE_##width(vd, vj);  \
+        }                              \
+    } while (0)
+
+#define VFSQRTxy(width, vd, vj)      \
+    do {                             \
+        if (vex.l) {                 \
+            XVFSQRT_##width(vd, vj); \
+        } else {                     \
+            VFSQRT_##width(vd, vj);  \
+        }                            \
+    } while (0)
+
+#define VFMAXxy(width, vd, vj, vk)      \
+    do {                                \
+        if (vex.l) {                    \
+            XVFMAX_##width(vd, vj, vk); \
+        } else {                        \
+            VFMAX_##width(vd, vj, vk);  \
+        }                               \
     } while (0)
+
+#define VFMINxy(width, vd, vj, vk)      \
+    do {                                \
+        if (vex.l) {                    \
+            XVFMIN_##width(vd, vj, vk); \
+        } else {                        \
+            VFMIN_##width(vd, vj, vk);  \
+        }                               \
+    } while (0)
+
+#define VREPLVEIxy(width, vd, vj, imm)        \
+    do {                                      \
+        if (vex.l) {                          \
+            if (imm > 0) {                    \
+                ADDI_D(x5, xZR, imm);         \
+                XVREPLVE_##width(vd, vj, x5); \
+            } else {                          \
+                XVREPLVE0_##width(vd, vj);    \
+            }                                 \
+        } else {                              \
+            VREPLVEI_##width(vd, vj, imm);    \
+        }                                     \
+    } while (0)
+
 #endif //__ARM64_EMITTER_H__
diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c
index 39d4c612..99c396cf 100644
--- a/src/dynarec/la64/la64_printer.c
+++ b/src/dynarec/la64/la64_printer.c
@@ -7556,6 +7556,70 @@ const char* la64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.D", XVt[Rd], Xt[Rj]);
         return buff;
     }
+    if (isMask(opcode, "0000000100010100010101jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRECIP.S", Ft[Rd], Ft[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0000000100010100011101jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRECIPE.S", Ft[Rd], Ft[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0000000100010100010110jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRECIP.D", Ft[Rd], Ft[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0000000100010100011110jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRECIPE.D", Ft[Rd], Ft[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111001010011100111101jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRECIP.S", Vt[Rd], Vt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111001010011100111110jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRECIP.D", Vt[Rd], Vt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111001010011101000101jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRECIPE.S", Vt[Rd], Vt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111001010011101000110jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRECIPE.D", Vt[Rd], Vt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0000000100010100011001jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRSQRT.S", Ft[Rd], Ft[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0000000100010100100001jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRSQRTE.S", Ft[Rd], Ft[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0000000100010100011010jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRSQRT.D", Ft[Rd], Ft[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0000000100010100100010jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "FRSQRTE.D", Ft[Rd], Ft[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111001010011101000001jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRSQRT.S", Vt[Rd], Vt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111001010011101000010jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRSQRT.D", Vt[Rd], Vt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111001010011101001001jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRSQRTE.S", Vt[Rd], Vt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111001010011101001010jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRSQRTE.D", Vt[Rd], Vt[Rj]);
+        return buff;
+    }
     snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode));
     return buff;
 }
diff --git a/src/include/hostext.h b/src/include/hostext.h
index 579e76ac..6c3b119e 100644
--- a/src/include/hostext.h
+++ b/src/include/hostext.h
@@ -41,6 +41,7 @@ typedef union cpu_ext_s {
         uint64_t lam_bh : 1;
         uint64_t lamcas : 1;
         uint64_t scq : 1;
+        uint64_t frecipe : 1;
 #endif
     };
     uint64_t x;
diff --git a/src/os/hostext_common.c b/src/os/hostext_common.c
index 258e1af4..c167d2a7 100644
--- a/src/os/hostext_common.c
+++ b/src/os/hostext_common.c
@@ -39,6 +39,8 @@ void PrintHostCpuFeatures(void)
         printf_log_prefix(0, LOG_INFO, " LAMCAS");
     if (cpuext.scq)
         printf_log_prefix(0, LOG_INFO, " SCQ");
+    if (cpuext.frecipe)
+        printf_log_prefix(0, LOG_INFO, " FRECIP");
     printf_log_prefix(0, LOG_INFO, "\n");
 #elif defined(RV64)
     printf_log(LOG_INFO, "Dynarec for rv64g");
diff --git a/src/os/hostext_linux.c b/src/os/hostext_linux.c
index a2a45db2..c202a33f 100644
--- a/src/os/hostext_linux.c
+++ b/src/os/hostext_linux.c
@@ -191,6 +191,7 @@ int DetectHostCpuFeatures(void)
         if (((cpucfg2 >> 6) & 0b11) != 3) return 0; // LSX/LASX must present
 
         cpuext.lbt = (cpucfg2 >> 18) & 0b1;
+        cpuext.frecipe = (cpucfg2 >> 25) & 0b1;
         cpuext.lam_bh = (cpucfg2 >> 27) & 0b1;
         cpuext.lamcas = (cpucfg2 >> 28) & 0b1;
         cpuext.scq = (cpucfg2 >> 30) & 0b1;