about summary refs log tree commit diff stats
path: root/src/emu
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-06 16:48:05 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-06 16:48:05 +0200
commit0dc5761ca6743d5a5f0f6d3086828f6d6ed5c720 (patch)
tree2ab46d1455b5cd96049b2f74a1f058842d2115d6 /src/emu
parentdf47fb5be83f3226b11703f23722c7c0b6a0b271 (diff)
downloadbox64-0dc5761ca6743d5a5f0f6d3086828f6d6ed5c720.tar.gz
box64-0dc5761ca6743d5a5f0f6d3086828f6d6ed5c720.zip
[INTERPRETER] Added suport for F16C extension (linked to AVX flag) ([ARM64_DYNAREC] too)
Diffstat (limited to 'src/emu')
-rw-r--r--src/emu/x64runavx660f38.c13
-rw-r--r--src/emu/x64runavx660f3a.c24
-rw-r--r--src/emu/x87emu_private.c99
-rw-r--r--src/emu/x87emu_private.h3
4 files changed, 138 insertions, 1 deletions
diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c
index e1302e1e..8bd9c051 100644
--- a/src/emu/x64runavx660f38.c
+++ b/src/emu/x64runavx660f38.c
@@ -521,6 +521,19 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             CLEAR_FLAG(F_PF);
             break;
 
+        case 0x13:  /* VCVTPH2PS Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            if(vex.l) {
+                for(int i=3; i>=0; --i)
+                    GY->ud[i] = cvtf16_32(EX->uw[4+i]);
+            } else GY->u128 = 0;
+            for(int i=3; i>=0; --i)
+                GX->ud[i] = cvtf16_32(EX->uw[i]);
+            break;
+
         case 0x16:  /* VPERMPS Gx, Vx, Ex */
             // same code as 0x36
             nextop = F8;
diff --git a/src/emu/x64runavx660f3a.c b/src/emu/x64runavx660f3a.c
index 2110dff3..a8f213d6 100644
--- a/src/emu/x64runavx660f3a.c
+++ b/src/emu/x64runavx660f3a.c
@@ -548,6 +548,30 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             break;
 
+        case 0x1D:  /* VCVTPS2PH Ex, Gx, u8 */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            u8 = F8;
+            if(u8&4)
+                u8 = emu->mxcsr.f.MXCSR_RC;
+            else
+                u8 = u8&3;
+            for(int i=0; i<4; ++i)
+                EX->uw[i] = cvtf32_16(GX->ud[i], u8);
+            if(vex.l) {
+                GETGY;
+                for(int i=0; i<4; ++i)
+                    EX->uw[4+i] = cvtf32_16(GY->ud[i], u8);
+            }
+            if(MODREG) {
+                if(!vex.l) EX->q[1] = 0;
+                GETEY;
+                EY->u128 = 0;
+            }
+            break;
+
+
         case 0x20:      // VPINSRB GX, Vx, ED, u8
             nextop = F8;
             GETED(1);   // It's ED, and not EB
diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c
index da480e6a..9164ea30 100644
--- a/src/emu/x87emu_private.c
+++ b/src/emu/x87emu_private.c
@@ -540,4 +540,101 @@ void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits)
         for(int i=0; i<(is32bits?8:16); ++i)
             memset(&emu->ymm[i], 0, 16);
     }
-}
\ No newline at end of file
+}
+
+typedef union f16_s {
+    uint16_t u16;
+    struct {
+        uint16_t fraction:10;
+        uint16_t exponant:5;
+        uint16_t sign:1;
+    };
+} f16_t;
+
+typedef union f32_s {
+    uint32_t u32;
+    struct {
+        uint32_t fraction:23;
+        uint32_t exponant:8;
+        uint32_t sign:1;
+    };
+} f32_t;
+
+uint32_t cvtf16_32(uint16_t v)
+{
+    f16_t in = (f16_t)v;
+    f32_t ret = {0};
+    ret.sign = in.sign;
+    ret.fraction = in.fraction<<13;
+    if(!in.exponant)
+        ret.exponant = 0;
+    else if(in.exponant==0b11111)
+        ret.exponant = 0b11111111;
+    else {
+        int e = in.exponant - 15;
+        ret.exponant = e + 127;
+    }
+    return ret.u32;
+}
+uint16_t cvtf32_16(uint32_t v, uint8_t rounding)
+{
+    f32_t in = (f32_t)v;
+    f16_t ret = {0};
+    ret.sign = in.sign;
+    rounding&=3;
+    if(!in.exponant) {
+        // zero and denormals
+        ret.exponant = 0;
+        ret.fraction = in.fraction>>13;
+        return ret.u16;
+    } else if(in.exponant==0b11111111) {
+        // nan and infinites
+        ret.exponant = 0b11111;
+        ret.fraction = in.fraction;
+        return ret.u16;
+    } else {
+        // regular numbers
+        int e = in.exponant - 127;
+        uint16_t f = (in.fraction>>13);
+        uint16_t r = in.fraction&0b1111111111111;
+        switch(rounding) {
+            case 0: // nearest even
+                if(r>=0b1000000000000)
+                    ++f;
+                break;
+            case 1: // round down
+                f += r?ret.sign:0;
+                break;
+            case 2: // round up
+                f += r?(1-ret.sign):0;
+                break;
+            case 3: // truncate
+                break;
+        }
+        if(f>0b1111111111) {
+            ++e;
+            f>>=1;
+        }
+        // remove msb, it's implicit
+        if(!f) e = -15;
+        else if(e<-14) { 
+            // flush to zero
+            e = -15; f = 0;
+        }
+        else if(e>15) { 
+            if((rounding==1 && !in.sign) || (rounding==2 && in.sign) || (rounding==3)) {
+                // Clamp to max
+                f=0b1111111111;
+                e = 15;
+            } else {
+                // overflow to inifity
+                f=0;
+                e = 16;
+            }
+        }
+        ret.fraction = f;
+        ret.exponant = e+15;
+    }
+
+    return ret.u16;
+}
diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h
index b389028a..b77e416b 100644
--- a/src/emu/x87emu_private.h
+++ b/src/emu/x87emu_private.h
@@ -220,4 +220,7 @@ void fpu_xsave(x64emu_t* emu, void* ed, int is32bits);
 void fpu_xsave_mask(x64emu_t* emu, void* ed, int is32bits, uint64_t mask);
 void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits);
 
+uint32_t cvtf16_32(uint16_t v);
+uint16_t cvtf32_16(uint32_t v, uint8_t rounding);
+
 #endif //__X87RUN_PRIVATE_H_