about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-06 16:48:05 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-06 16:48:05 +0200
commit0dc5761ca6743d5a5f0f6d3086828f6d6ed5c720 (patch)
tree2ab46d1455b5cd96049b2f74a1f058842d2115d6 /src/dynarec
parentdf47fb5be83f3226b11703f23722c7c0b6a0b271 (diff)
downloadbox64-0dc5761ca6743d5a5f0f6d3086828f6d6ed5c720.tar.gz
box64-0dc5761ca6743d5a5f0f6d3086828f6d6ed5c720.zip
[INTERPRETER] Added suport for F16C extension (linked to AVX flag) ([ARM64_DYNAREC] too)
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c17
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c48
3 files changed, 77 insertions, 0 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index c7bb614d..844d29dd 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1375,18 +1375,30 @@ int convert_bitmask(uint64_t bitmask);
 #define FCVTN(Vd, Vn)               EMIT(FCVTN_vector(0, 1, Vn, Vd))
 // Convert Vn from 2*Double to higher Vd as 2*float, use FPCR rounding
 #define FCVTN2(Vd, Vn)              EMIT(FCVTN_vector(1, 1, Vn, Vd))
+// Convert Vn from 2*Float to lower Vd as 2*float16 and clears the upper half, use FPCR rounding
+#define FCVTN16(Vd, Vn)             EMIT(FCVTN_vector(0, 0, Vn, Vd))
+// Convert Vn from 2*Float to higher Vd as 2*float16, use FPCR rounding
+#define FCVTN162(Vd, Vn)            EMIT(FCVTN_vector(1, 0, Vn, Vd))
 
 #define FCVTXN_vector(Q, sz, Rn, Rd)   ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10110<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
 // Convert Vn from 2*Double to lower Vd as 2*float and clears the upper half
 #define FCVTXN(Vd, Vn)              EMIT(FCVTXN_vector(0, 1, Vn, Vd))
 // Convert Vn from 2*Double to higher Vd as 2*float
 #define FCVTXN2(Vd, Vn)             EMIT(FCVTXN_vector(1, 1, Vn, Vd))
+// Convert Vn from 2*Float to lower Vd as 2*float16 and clears the upper half
+#define FCVTXN16(Vd, Vn)            EMIT(FCVTXN_vector(0, 0, Vn, Vd))
+// Convert Vn from 2*Float to higher Vd as 2*float16
+#define FCVTXN162(Vd, Vn)           EMIT(FCVTXN_vector(1, 0, Vn, Vd))
 
 #define FCVTL_vector(Q, sz, Rn, Rd)     ((Q)<<30 | 0<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10111<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
 // Convert lower Vn from 2*float to Vd as 2*double
 #define FCVTL(Vd, Vn)               EMIT(FCVTL_vector(0, 1, Vn, Vd))
 // Convert higher Vn from 2*float to Vd as 2*double
 #define FCVTL2(Vd, Vn)              EMIT(FCVTL_vector(1, 1, Vn, Vd))
+// Convert lower Vn from 2*float16 to Vd as 2*float
+#define FCVTL16(Vd, Vn)             EMIT(FCVTL_vector(0, 0, Vn, Vd))
+// Convert higher Vn from 2*float16 to Vd as 2*float
+#define FCVTL162(Vd, Vn)            EMIT(FCVTL_vector(1, 0, Vn, Vd))
 
 #define SCVTF_scalar(sf, type, rmode, opcode, Rn, Rd)   ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd))
 #define SCVTFSw(Sd, Wn)             EMIT(SCVTF_scalar(0, 0b00, 0b00, 0b010, Wn, Sd))
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
index a2a45435..e4ebd5b2 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
@@ -158,6 +158,23 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             if(!vex.l) YMM0(gd);
             break;
 
+        case 0x13:
+            INST_NAME("VCVTPH2PS Gx, Ex");
+            nextop = F8;
+            GETEX_Y(v1, 0, 0);
+            GETGX_empty(v0);
+            if(vex.l && v0==v1) {
+                q1 = fpu_get_scratch(dyn, ninst);
+                VMOVQ(q1, v1);
+                v1 = q1;
+            }
+            FCVTL16(v0, v1);
+            if(vex.l) {
+                GETGY_empty(v0, -1, -1, -1);
+                FCVTL162(v0, v1);
+            } else YMM0(gd);
+            break;
+
         case 0x17:
             INST_NAME("VPTEST GX, EX");
             SETFLAGS(X_ALL, SF_SET);
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
index cdbe93f6..e667f562 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
@@ -402,6 +402,54 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             F8; // read u8, but it's been already handled
             break;
 
+        case 0x1D:
+            INST_NAME("VCVTPS2PH Ex, Gx");
+            nextop = F8;
+            GETGX(v0, 0);
+            if(MODREG) {
+                v1 = sse_get_reg_empty(dyn, ninst, x3, (nextop&7)+(rex.b<<3));
+            } else {
+                WILLWRITE2();
+                v1 = fpu_get_scratch(dyn, ninst);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, &unscaled, 0xfff<<(3+vex.l), vex.l?15:7, rex, NULL, 0, 1);
+            }
+            if(vex.l && v0==v1) {
+                q0 = fpu_get_scratch(dyn, ninst);
+                VMOVQ(q0, v0);
+                v0 = q0;
+            }
+            u8 = F8;
+            if(u8&4) {
+                s0 = sse_setround(dyn, ninst, x1, x2, x6);
+            } else {
+                u8&=3;
+                if(u8==1) u8=2;
+                else if(u8==2) u8=1;
+                MRS_fpcr(x1);               // get fpscr
+                MOV32w(x2, u8);
+                MOVx_REG(x6, x1);
+                BFIx(x1, x2, 22, 2);     // inject new round
+                MSR_fpcr(x1);               // put new fpscr
+                s0 = x6;
+            }
+            FCVTN16(v1, v0);
+            if(vex.l) {
+                GETGY(v0, 0, MODREG?((nextop&7)+(rex.b<<3)):-1, -1,-1);
+                FCVTN162(v1, v0);
+            }
+            x87_restoreround(dyn, ninst, s0);
+            if(MODREG) {
+                YMM0((nextop&7)+(rex.b<<3));
+            } else {
+                if(vex.l) {
+                    VST128(v1, ed, fixedaddress);
+                } else {
+                    VST64(v1, ed, fixedaddress);
+                }
+                SMWRITE2();
+            }
+            break;
+
         case 0x20:
             INST_NAME("VINSERTD Gx, Vx, Ex, Ib");
             nextop = F8;