about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-05-28 14:55:17 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-05-28 14:55:17 +0200
commitb79f86b8d1c864cc58d79730a628e72c56ea960d (patch)
treecab9d2fb0095d8dfe529b7645fbb05dfde8649e0 /src
parentb9f5929439ab5e0ebf9d64b2dc2659a9a018f19d (diff)
downloadbox64-b79f86b8d1c864cc58d79730a628e72c56ea960d.tar.gz
box64-b79f86b8d1c864cc58d79730a628e72c56ea960d.zip
[INTERPRETER] More avx/avx2 opcodes
Diffstat (limited to 'src')
-rw-r--r--src/emu/x64runavx0f.c33
-rw-r--r--src/emu/x64runavx660f.c64
-rw-r--r--src/emu/x64runavx660f38.c136
-rw-r--r--src/emu/x64runavx660f3a.c53
-rw-r--r--src/emu/x64runavxf20f.c115
-rw-r--r--src/emu/x64runavxf30f.c137
6 files changed, 532 insertions, 6 deletions
diff --git a/src/emu/x64runavx0f.c b/src/emu/x64runavx0f.c
index 34372ca6..8fec79f9 100644
--- a/src/emu/x64runavx0f.c
+++ b/src/emu/x64runavx0f.c
@@ -268,7 +268,20 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             } else
                 GY->u128 = 0;
             break;
-
+        case 0x5A:                      /* VCVTPS2PD Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            if(vex.l) {
+                GETEY;
+                GY->d[1] = EX->f[3];
+                GY->d[0] = EX->f[2];
+            } else
+                GY->u128 = 0;
+            GX->d[1] = EX->f[1];
+            GX->d[0] = EX->f[0];
+            break;
         case 0x5B:                      /* VCVTDQ2PS Gx, Ex */
             nextop = F8;
             GETEX(0);
@@ -304,6 +317,24 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GY->u128 = 0;
             break;
 
+        case 0x5E:                      /* VDIVPS Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            for(int i=0; i<4; ++i)
+                GX->f[i] = VX->f[i] / EX->f[i];
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<4; ++i)
+                    GY->f[i] = VY->f[i] / EY->f[i];
+            } else
+                GY->u128 = 0;
+            break;
+
+
         case 0x77:
             if(!vex.l) {    // VZEROUPPER
                 if(vex.v!=0) {
diff --git a/src/emu/x64runavx660f.c b/src/emu/x64runavx660f.c
index 3136433f..c8f23596 100644
--- a/src/emu/x64runavx660f.c
+++ b/src/emu/x64runavx660f.c
@@ -47,6 +47,7 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
     reg64_t *oped, *opgd;
     sse_regs_t *opex, *opgx, *opvx, eax1;
     sse_regs_t *opey, *opgy, *opvy, eay1;
+    int is_nan;
 
 
 #ifdef TEST_INTERPRETER
@@ -205,7 +206,40 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             } else
                 GY->u128 = 0;
             break;
-        
+
+        case 0x5E:  /* VDIVPD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            for (int i=0; i<2; ++i) {
+                #ifndef NOALIGN
+                is_nan = isnan(VX->d[i]) || isnan(EX->d[i]);
+                #endif
+                GX->d[i] = VX->d[i] / EX->d[i];
+                #ifndef NOALIGN
+                if(!is_nan && isnan(GX->d[i]))
+                    GX->d[i] = -NAN;
+                #endif
+            }
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for (int i=0; i<2; ++i) {
+                    #ifndef NOALIGN
+                    is_nan = isnan(VY->d[i]) || isnan(EY->d[i]);
+                    #endif
+                    GY->d[i] = VY->d[i] / EY->d[i];
+                    #ifndef NOALIGN
+                    if(!is_nan && isnan(GY->d[i]))
+                        GY->d[i] = -NAN;
+                    #endif
+                }
+            } else
+                GY->u128 = 0;
+            break;
+
         case 0x64:  /* VPCMPGTB Gx,Vx, Ex */
             nextop = F8;
             GETEX(0);
@@ -615,6 +649,34 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             break;
 
+        case 0xE6:  /* CVTTPD2DQ Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffff)
+                GX->sd[0] = 0x80000000;
+            else
+                GX->sd[0] = EX->d[0];
+            if(isnan(EX->d[1]) || isinf(EX->d[1]) || EX->d[1]>0x7fffffff)
+                GX->sd[1] = 0x80000000;
+            else
+                GX->sd[1] = EX->d[1];
+            if(vex.l) {
+                GETEY;
+                if(isnan(EY->d[0]) || isinf(EY->d[0]) || EY->d[0]>0x7fffffff)
+                    GX->sd[2] = 0x80000000;
+                else
+                    GX->sd[2] = EY->d[0];
+                if(isnan(EY->d[1]) || isinf(EY->d[1]) || EY->d[1]>0x7fffffff)
+                    GX->sd[3] = 0x80000000;
+                else
+                    GX->sd[3] = EY->d[1];
+            } else
+                GX->q[1] = 0;
+            GY->u128 = 0;
+            break;
+
         case 0xEB:  /* VPOR Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c
index 3ec1f0ff..ae6d1cbf 100644
--- a/src/emu/x64runavx660f38.c
+++ b/src/emu/x64runavx660f38.c
@@ -59,7 +59,7 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
 {
     uint8_t opcode;
     uint8_t nextop;
-    uint8_t tmp8u;
+    uint8_t tmp8u, u8;
     int8_t tmp8s;
     int32_t tmp32s, tmp32s2;
     uint32_t tmp32u, tmp32u2;
@@ -233,6 +233,140 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             GY->u128 = EX->u128;
             break;
 
+        case 0x92:  /* VGATHERDPD/VGATHERDPS Gx, VSIB, Vx */
+            nextop = F8;
+            if(((nextop&7)!=4) || MODREG) {
+                emit_signal(emu, SIGILL, (void*)R_RIP, 0);
+            }
+            GETGX;
+            GETVX;
+            GETGY;
+            GETVY;
+            tmp8u = F8; //SIB
+            // compute base
+            tmp64u = emu->regs[(tmp8u&0x7)+(rex.b<<3)].q[0];
+            if(nextop&0x40)
+                tmp64u += F8S;
+            else if(nextop&0x80)
+                tmp64u += F32S;
+            // get vxmm
+            EX = &emu->xmm[((tmp8u>>3)&7)+(rex.x<<3)];
+            EY = &emu->ymm[((tmp8u>>3)&7)+(rex.x<<3)];
+            u8 = tmp8u>>6;
+            // prepare mask
+            if(!vex.l)
+                VY->u128 = 0;
+            if(rex.w)
+                for(int i=0; i<2; ++i)
+                    VX->sq[i]>>=63;
+            else
+                for(int i=0; i<4; ++i)
+                    VX->sd[i]>>=31;
+            // go gather
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    if(VX->q[i]) {
+                        GX->q[i] = *(uint64_t*)(tmp64u + (EX->sd[i]<<u8));
+                        VX->q[i] = 0;
+                    }
+            } else {
+                for(int i=0; i<4; ++i)
+                    if(VX->ud[i]) {
+                        GX->ud[i] = *(uint32_t*)(tmp64u + (EX->sd[i]<<u8));
+                        VX->ud[i] = 0;
+                    }
+            }
+            if(vex.l) {
+                if(rex.w)
+                    for(int i=0; i<2; ++i)
+                        VY->sq[i]>>=63;
+                else
+                    for(int i=0; i<4; ++i)
+                        VY->sd[i]>>=31;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        if(VY->q[i]) {
+                            GY->q[i] = *(uint64_t*)(tmp64u + (EX->sd[2+i]<<u8));
+                            VY->q[i] = 0;
+                        }
+                } else {
+                    for(int i=0; i<4; ++i)
+                        if(VY->ud[i]) {
+                            GY->ud[i] = *(uint32_t*)(tmp64u + (EY->sd[i]<<u8));
+                            VY->ud[i] = 0;
+                        }
+                }
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x93:  /* VGATHERQPD/VGATHERQPS Gx, VSIB, Vx */
+            nextop = F8;
+            if(((nextop&7)!=4) || MODREG) {
+                emit_signal(emu, SIGILL, (void*)R_RIP, 0);
+            }
+            GETGX;
+            GETVX;
+            GETGY;
+            GETVY;
+            tmp8u = F8; //SIB
+            // compute base
+            tmp64u = emu->regs[(tmp8u&0x7)+(rex.b<<3)].q[0];
+            if(nextop&0x40)
+                tmp64u += F8S;
+            else if(nextop&0x80)
+                tmp64u += F32S;
+            // get vxmm
+            EX = &emu->xmm[((tmp8u>>3)&7)+(rex.x<<3)];
+            EY = &emu->ymm[((tmp8u>>3)&7)+(rex.x<<3)];
+            u8 = tmp8u>>6;
+            // prepare mask
+            if(!vex.l) {
+                VY->u128 = 0;
+            }
+            if(!vex.l || !rex.w)
+                GY->u128 = 0;
+            if(rex.w)
+                for(int i=0; i<2; ++i)
+                    VX->sq[i]>>=63;
+            else
+                for(int i=0; i<4; ++i)
+                    VX->sd[i]>>=31;
+            // go gather
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    if(VX->q[i]) {
+                        GX->q[i] = *(uint64_t*)(tmp64u + (EX->sq[i]<<u8));
+                        VX->q[i] = 0;
+                    }
+            } else {
+                for(int i=0; i<(vex.l?4:2); ++i)
+                    if(VX->ud[i]) {
+                        GX->ud[i] = *(uint32_t*)(tmp64u + (((i>1)?EY->sq[i-2]:EX->sq[i])<<u8));
+                        VX->ud[i] = 0;
+                    }
+            }
+            if(vex.l) {
+                if(rex.w)
+                    for(int i=0; i<2; ++i)
+                        VY->sq[i]>>=63;
+                else
+                    VY->u128=0;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        if(VY->q[i]) {
+                            GY->q[i] = *(uint64_t*)(tmp64u + (EY->sq[i]<<u8));
+                            VY->q[i] = 0;
+                        }
+                } else {
+                    VY->u128 = 0;
+                }
+            }
+            if(!rex.w && !vex.l) {
+                GX->q[1] = 0;
+                VX->q[1] = 0;
+            }
+            break;
+
         case 0xDB:  /* VAESIMC Gx, Ex */
             nextop = F8;
             GETEX(0);
diff --git a/src/emu/x64runavx660f3a.c b/src/emu/x64runavx660f3a.c
index cc5e784d..e09de6bf 100644
--- a/src/emu/x64runavx660f3a.c
+++ b/src/emu/x64runavx660f3a.c
@@ -57,6 +57,7 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
     int64_t tmp64s;
     reg64_t *oped, *opgd;
     float tmpf;
+    double tmpd;
     sse_regs_t *opex, *opgx, *opvx, eax1;
     sse_regs_t *opey, *opgy, *opvy, eay1;
     // AES opcodes constants
@@ -170,6 +171,14 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             break;
 
+        case 0x17:      // VEXTRACTPS ED, GX, u8
+            nextop = F8;
+            GETED(1);
+            GETGX;
+            tmp8u = F8;
+            ED->dword[0] = GX->ud[tmp8u&3];
+            if(MODREG) ED->dword[1] = 0;
+            break;
         case 0x18:  /* VINSERTF128 Gx, Ex, imm8 */
             nextop = F8;
             GETEX(1);
@@ -217,7 +226,20 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             GY->u128 = 0;
             break;
 
-        case 0x40:  /* DPPS Gx, Ex, Ib */
+        case 0x39:  /* VEXTRACTI128 Ex, Gx, Ib */
+            nextop = F8;
+            GETGX;
+            GETEX(1);
+            GETGY;
+            tmp8u = F8;
+            EX->u128 = (tmp8u&1)?GY->u128:GX->u128;
+            if(MODREG) {
+                GETEY;
+                EY->u128 = 0;
+            }
+            break;
+
+        case 0x40:  /* VDPPS Gx, VX, Ex, Ib */
             nextop = F8;
             GETEX(1);
             GETGX;
@@ -231,6 +253,8 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             for(int i=0; i<4; ++i)
                 GX->f[i] = (tmp8u&(1<<i))?tmpf:0.0f;
             if(vex.l) {
+                GETEY;
+                GETVY;
                 tmpf = 0.0f;
                 for(int i=0; i<4; ++i)
                     if(tmp8u&(1<<(i+4)))
@@ -240,6 +264,33 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             } else
                 GY->u128 = 0;
             break;
+        case 0x41:  /* VDPPD Gx, Vx, Ex, Ib */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            tmp8u = F8;
+            tmpd = 0.0;
+            if(tmp8u&(1<<(4+0)))
+                tmpd += VX->d[0]*EX->d[0];
+            if(tmp8u&(1<<(4+1)))
+                tmpd += VX->d[1]*EX->d[1];
+            GX->d[0] = (tmp8u&(1<<(0)))?tmpd:0.0;
+            GX->d[1] = (tmp8u&(1<<(1)))?tmpd:0.0;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                tmpd = 0.0;
+                if(tmp8u&(1<<(4+0)))
+                    tmpd += VY->d[0]*EY->d[0];
+                if(tmp8u&(1<<(4+1)))
+                    tmpd += VY->d[1]*EY->d[1];
+                GY->d[0] = (tmp8u&(1<<(0)))?tmpd:0.0;
+                GY->d[1] = (tmp8u&(1<<(1)))?tmpd:0.0;
+            } else
+                GY->u128 = 0;
+            break;
 
         case 0x44:    /* VPCLMULQDQ Gx, Vx, Ex, imm8 */
             nextop = F8;
diff --git a/src/emu/x64runavxf20f.c b/src/emu/x64runavxf20f.c
index 642946b5..fc51a3a4 100644
--- a/src/emu/x64runavxf20f.c
+++ b/src/emu/x64runavxf20f.c
@@ -48,6 +48,7 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
     reg64_t *oped, *opgd;
     sse_regs_t *opex, *opgx, *opvx, eax1;
     sse_regs_t *opey, *opgy, *opvy, eay1;
+    int is_nan;
 
 
 #ifdef TEST_INTERPRETER
@@ -86,6 +87,90 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             break;
 
+        case 0x2A:  /* VCVTSI2SD Gx, Vx, Ed */
+            nextop = F8;
+            GETED(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = ED->sq[0];
+            } else {
+                GX->d[0] = ED->sdword[0];
+            }
+            GX->q[1] = VX->q[1];
+            GY->u128 = 0;
+            break;
+
+        case 0x2C:  /* VCVTTSD2SI Gd, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGD;
+            if(rex.w)
+                if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffffffffffffLL)
+                    GD->q[0] = 0x8000000000000000LL;
+                else
+                    GD->sq[0] = EX->d[0];
+            else {
+                if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffff)
+                    GD->dword[0] = 0x80000000;
+                else
+                    GD->sdword[0] = EX->d[0];
+                GD->dword[1] = 0;
+            }
+            break;
+        case 0x2D:  /* VCVTSD2SI Gd, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGD;
+            if(rex.w) {
+                if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffffffffffffLL)
+                    GD->q[0] = 0x8000000000000000LL;
+                else
+                    switch(emu->mxcsr.f.MXCSR_RC) {
+                        case ROUND_Nearest: {
+                            int round = fegetround();
+                            fesetround(FE_TONEAREST);
+                            GD->sq[0] = nearbyint(EX->d[0]);
+                            fesetround(round);
+                            break;
+                        }
+                        case ROUND_Down:
+                            GD->sq[0] = floor(EX->d[0]);
+                            break;
+                        case ROUND_Up:
+                            GD->sq[0] = ceil(EX->d[0]);
+                            break;
+                        case ROUND_Chop:
+                            GD->sq[0] = EX->d[0];
+                            break;
+                    }
+            } else {
+                if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffff)
+                    GD->dword[0] = 0x80000000;
+                else
+                    switch(emu->mxcsr.f.MXCSR_RC) {
+                        case ROUND_Nearest: {
+                            int round = fegetround();
+                            fesetround(FE_TONEAREST);
+                            GD->sdword[0] = nearbyint(EX->d[0]);
+                            fesetround(round);
+                            break;
+                        }
+                        case ROUND_Down:
+                            GD->sdword[0] = floor(EX->d[0]);
+                            break;
+                        case ROUND_Up:
+                            GD->sdword[0] = ceil(EX->d[0]);
+                            break;
+                        case ROUND_Chop:
+                            GD->sdword[0] = EX->d[0];
+                            break;
+                    }
+                GD->dword[1] = 0;
+            }
+            break;
+
         case 0x58:  /* VADDSD Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
@@ -99,6 +184,36 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             GY->u128 = 0;
             break;
 
+        case 0x5A:  /* VCVTSD2SS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GX->f[0] = EX->d[0];
+            GX->ud[1] = VX->ud[1];
+            GX->q[1] = VX->q[1];
+            GY->u128 = 0;
+            break;
+
+        case 0x5E:  /* VDIVSD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            #ifndef NOALIGN
+            is_nan = isnan(VX->d[0]) || isnan(EX->d[0]);
+            #endif
+            GX->d[0] = VX->d[0] / EX->d[0];
+            #ifndef NOALIGN
+            if(!is_nan && isnan(GX->d[0]))
+                GX->d[0] = -NAN;
+            #endif
+            GX->q[1] = VX->q[1];
+            GY->u128 = 0;
+            break;
+
         case 0xC2:  /* VCMPSD Gx, Vx, Ex, Ib */
             nextop = F8;
             GETEX(1);
diff --git a/src/emu/x64runavxf30f.c b/src/emu/x64runavxf30f.c
index 98fb8b4d..9afb1c10 100644
--- a/src/emu/x64runavxf30f.c
+++ b/src/emu/x64runavxf30f.c
@@ -87,6 +87,94 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             break;
 
+        case 0x2A:  /* VCVTSI2SS Gx, Vx, Ed */
+            nextop = F8;
+            GETED(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w)
+                GX->f[0] = ED->sq[0];
+            else
+                GX->f[0] = ED->sdword[0];
+            GX->ud[1] = VX->ud[1];
+            GX->q[1] = VX->q[1];
+            GY->u128 = 0;
+            break;
+
+        case 0x2C:  /* VCVTTSS2SI Gd, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGD;
+            if (rex.w) {
+                if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>(float)0x7fffffffffffffffLL)
+                    GD->q[0] = 0x8000000000000000LL;
+                else
+                    GD->sq[0] = EX->f[0];
+            } else {
+                if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>0x7fffffff)
+                    GD->dword[0] = 0x80000000;
+                else
+                    GD->sdword[0] = EX->f[0];
+                GD->dword[1] = 0;
+            }
+            break;
+        case 0x2D:  /* VCVTSS2SI Gd, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGD;
+            if(rex.w) {
+                if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>(float)0x7fffffffffffffffLL)
+                    GD->q[0] = 0x8000000000000000LL;
+                else
+                    switch(emu->mxcsr.f.MXCSR_RC) {
+                        case ROUND_Nearest: {
+                            int round = fegetround();
+                            fesetround(FE_TONEAREST);
+                            GD->sq[0] = nearbyintf(EX->f[0]);
+                            fesetround(round);
+                            break;
+                        }
+                        case ROUND_Down:
+                            GD->sq[0] = floorf(EX->f[0]);
+                            break;
+                        case ROUND_Up:
+                            GD->sq[0] = ceilf(EX->f[0]);
+                            break;
+                        case ROUND_Chop:
+                            GD->sq[0] = EX->f[0];
+                            break;
+                    }
+            } else {
+                if(isnanf(EX->f[0]))
+                    tmp64s = INT32_MIN;
+                else
+                    switch(emu->mxcsr.f.MXCSR_RC) {
+                        case ROUND_Nearest: {
+                            int round = fegetround();
+                            fesetround(FE_TONEAREST);
+                            tmp64s = nearbyintf(EX->f[0]);
+                            fesetround(round);
+                            break;
+                        }
+                        case ROUND_Down:
+                            tmp64s = floorf(EX->f[0]);
+                            break;
+                        case ROUND_Up:
+                            tmp64s = ceilf(EX->f[0]);
+                            break;
+                        case ROUND_Chop:
+                            tmp64s = EX->f[0];
+                            break;
+                    }
+                if (tmp64s==(int32_t)tmp64s)
+                    GD->sdword[0] = (int32_t)tmp64s;
+                else
+                    GD->sdword[0] = INT32_MIN;
+                GD->dword[1] = 0;
+            }
+            break;
+
         case 0x58:  /* VADDSS Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
@@ -111,7 +199,38 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             GX->q[1] = VX->q[1];
             GY->q[0] = GY->q[1] = 0;
             break;
-
+        case 0x5B:  /* VCVTTPS2DQ Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            for(int i=0; i<4; ++i) {
+                if(isnanf(EX->f[i]))
+                    tmp64s = INT32_MIN;
+                else
+                    tmp64s = EX->f[i];
+                if (tmp64s==(int32_t)tmp64s) {
+                    GX->sd[i] = (int32_t)tmp64s;
+                } else {
+                    GX->sd[i] = INT32_MIN;
+                }
+            }
+            if(vex.l) {
+                GETEY;
+                for(int i=0; i<4; ++i) {
+                    if(isnanf(EY->f[i]))
+                        tmp64s = INT32_MIN;
+                    else
+                        tmp64s = EY->f[i];
+                    if (tmp64s==(int32_t)tmp64s) {
+                        GY->sd[i] = (int32_t)tmp64s;
+                    } else {
+                        GY->sd[i] = INT32_MIN;
+                    }
+                }
+            } else
+                GY->u128 = 0;
+            break;
         case 0x5C:  /* VSUBSS Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
@@ -123,7 +242,21 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GX->ud[1] = VX->ud[1];
                 GX->q[1] = VX->q[1];
             }
-            GY->q[0] = GY->q[1] = 0;
+            GY->u128 = 0;
+            break;
+
+        case 0x5E:  /* VDIVSS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GX->f[0] = VX->f[0] / EX->f[0];
+            if(GX!=VX) {
+                GX->ud[1] = VX->ud[1];
+                GX->q[1] = VX->q[1];
+            }
+            GY->u128 = 0;
             break;
 
         case 0x6F:  // VMOVDQU Gx, Ex