about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-05-28 10:42:41 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-05-28 10:42:41 +0200
commitb9f5929439ab5e0ebf9d64b2dc2659a9a018f19d (patch)
treeabd27a75f28e1a50caf99716ca76b7f1184c125d /src
parent3dc396a64775a0aa8aae55513eda0d326cb50080 (diff)
downloadbox64-b9f5929439ab5e0ebf9d64b2dc2659a9a018f19d.tar.gz
box64-b9f5929439ab5e0ebf9d64b2dc2659a9a018f19d.zip
[INTERPRETER] More avx, avx2 and vaes opcodes
Diffstat (limited to 'src')
-rw-r--r--src/emu/x64run0f.c2
-rw-r--r--src/emu/x64runavx.c15
-rw-r--r--src/emu/x64runavx0f.c207
-rw-r--r--src/emu/x64runavx660f.c243
-rw-r--r--src/emu/x64runavx660f38.c289
-rw-r--r--src/emu/x64runavx660f3a.c173
-rw-r--r--src/emu/x64runavxf20f.c142
-rw-r--r--src/emu/x64runavxf30f.c41
-rw-r--r--src/tools/my_cpuid.c1
-rw-r--r--src/wrapped/wrappedlibc.c2
10 files changed, 1069 insertions, 46 deletions
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index 0dadfe2d..8fc5e645 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -702,7 +702,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                 if(EX->f[i]==0)

                     GX->f[i] = 1.0f/EX->f[i];

                 else if (EX->f[i]<0)

-                    GX->f[i] = NAN;

+                    GX->f[i] = -NAN;

                 else if (isnan(EX->f[i]))

                     GX->f[i] = EX->f[i];

                 else if (isinf(EX->f[i]))

diff --git a/src/emu/x64runavx.c b/src/emu/x64runavx.c
index 56507b4d..9d15e803 100644
--- a/src/emu/x64runavx.c
+++ b/src/emu/x64runavx.c
@@ -57,21 +57,10 @@ uintptr_t TestAVX(x64test_t *test, vex_t vex, uintptr_t addr, int *step)
 uintptr_t RunAVX(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
 #endif
 {
-    uint8_t opcode;
-    uint8_t nextop;
-    uint8_t tmp8u;
-    int8_t tmp8s;
-    int32_t tmp32s, tmp32s2;
-    uint32_t tmp32u, tmp32u2;
-    uint64_t tmp64u, tmp64u2;
-    int64_t tmp64s;
-    reg64_t *oped, *opgd;
-    sse_regs_t *opex, *opgx, eax1;
-    mmx87_regs_t *opem, *opgm, eam1;
-
 #ifdef TEST_INTERPRETER
     x64emu_t *emu = test->emu;
 #endif
+    uint8_t opcode = PK(0);
     if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_NONE))
         addr = RunAVX_0F(emu, vex, addr, step);
     else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_66))
@@ -87,7 +76,7 @@ uintptr_t RunAVX(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
     else addr = 0;
 
     if(!addr)
-        printf_log(LOG_NONE, "Unimplemented AVX opcode prefix %s map %s ", avx_prefix_string(vex.p), avx_map_string(vex.m));
+        printf_log(LOG_NONE, "Unimplemented AVX opcode prefix %s map %s opcode %X ", avx_prefix_string(vex.p), avx_map_string(vex.m), opcode);
 
     return addr;
 }
diff --git a/src/emu/x64runavx0f.c b/src/emu/x64runavx0f.c
index b5a49560..34372ca6 100644
--- a/src/emu/x64runavx0f.c
+++ b/src/emu/x64runavx0f.c
@@ -69,7 +69,7 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GY->q[0] = EY->q[0];
                 GY->q[1] = EY->q[1];
             } else {
-                GY->q[0] = GY->q[1] = 0;
+                GY->u128 = 0;
             }
             break;
         case 0x11:  /* VMOVUPS Ex, Gx */
@@ -86,6 +86,27 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             break;
 
+        case 0x14:  /* VUNPCKLPS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GX->ud[3] = EX->ud[1];
+            GX->ud[2] = VX->ud[1];
+            GX->ud[1] = EX->ud[0];
+            GX->ud[0] = VX->ud[0];
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->ud[3] = EY->ud[1];
+                GY->ud[2] = VY->ud[1];
+                GY->ud[1] = EY->ud[0];
+                GY->ud[0] = VY->ud[0];
+            } else
+                GY->u128 = 0;
+            break;
+
         case 0x28:  /* VMOVAPS Gx, Ex */
             nextop = F8;
             GETEX(0);
@@ -98,7 +119,7 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GY->q[0] = EY->q[0];
                 GY->q[1] = EY->q[1];
             } else {
-                GY->q[0] = GY->q[1] = 0;
+                GY->u128 = 0;
             }
             break;
         case 0x29:  /* VMOVAPS Ex, Gx */
@@ -115,6 +136,23 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             break;
             
+        case 0x2F:                      /* VCOMISS Gx, Ex */
+            RESET_FLAGS(emu);
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            if(isnan(GX->f[0]) || isnan(EX->f[0])) {
+                SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF);
+            } else if(isgreater(GX->f[0], EX->f[0])) {
+                CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
+            } else if(isless(GX->f[0], EX->f[0])) {
+                CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF);
+            } else {
+                SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
+            }
+            CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF);
+            break;
+
         case 0x52:                      /* VRSQRTPS Gx, Ex */
             nextop = F8;
             GETEX(0);
@@ -124,7 +162,7 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 if(EX->f[i]==0)
                     GX->f[i] = 1.0f/EX->f[i];
                 else if (EX->f[i]<0)
-                    GX->f[i] = NAN;
+                    GX->f[i] = -NAN;
                 else if (isnan(EX->f[i]))
                     GX->f[i] = EX->f[i];
                 else if (isinf(EX->f[i]))
@@ -138,7 +176,7 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                     if(EY->f[i]==0)
                         GY->f[i] = 1.0f/EY->f[i];
                     else if (EY->f[i]<0)
-                        GY->f[i] = NAN;
+                        GY->f[i] = -NAN;
                     else if (isnan(EY->f[i]))
                         GY->f[i] = EY->f[i];
                     else if (isinf(EY->f[i]))
@@ -147,30 +185,73 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                         GY->f[i] = 1.0f/sqrtf(EY->f[i]);
                 }
             } else
-                GY->q[0] = GY->q[1] = 0;
+                GY->u128 = 0;
             #ifdef TEST_INTERPRETER
             test->notest = 1;
             #endif
             break;
 
+        case 0x54:                      /* VANDPS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GX->u128 = VX->u128 & EX->u128;
+            GETGY;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->u128 = VY->u128 & EY->u128;
+            } else {
+                GY->u128 = 0;
+            }
+            break;
+        case 0x55:                      /* VANDNPS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GX->u128 = (~VX->u128) & EX->u128;
+            GETGY;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->u128 = (~VY->u128) & EY->u128;
+            } else {
+                GY->u128 = 0;
+            }
+            break;
+
         case 0x57:                      /* XORPS Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
             GETGX;
             GETVX;
             GETGY;
+            GX->u128 = VX->u128 ^ EX->u128;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->u128 = VY->u128 ^ EY->u128;
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x58:                      /* VADDPS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
             for(int i=0; i<4; ++i)
-                GX->ud[i] = VX->ud[i] ^ EX->ud[i];
+                GX->f[i] = VX->f[i] + EX->f[i];
             if(vex.l) {
                 GETEY;
                 GETVY;
                 for(int i=0; i<4; ++i)
-                    GY->ud[i] = VY->ud[i] ^ EY->ud[i];
-
+                    GY->f[i] = VY->f[i] + EY->f[i];
             } else
-                GY->q[0] = GY->q[1] = 0;
+                GY->u128 = 0;
             break;
-
         case 0x59:                      /* VMULPS Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
@@ -185,9 +266,27 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 for(int i=0; i<4; ++i)
                     GY->f[i] = VY->f[i] * EY->f[i];
             } else
-                GY->q[0] = GY->q[1] = 0;
+                GY->u128 = 0;
             break;
 
+        case 0x5B:                      /* VCVTDQ2PS Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            GX->f[0] = EX->sd[0];
+            GX->f[1] = EX->sd[1];
+            GX->f[2] = EX->sd[2];
+            GX->f[3] = EX->sd[3];
+            if(vex.l) {
+                GETEY;
+                GY->f[0] = EY->sd[0];
+                GY->f[1] = EY->sd[1];
+                GY->f[2] = EY->sd[2];
+                GY->f[3] = EY->sd[3];
+            } else
+                GY->u128 = 0;
+            break;
         case 0x5C:                      /* VSUBPS Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
@@ -202,7 +301,7 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 for(int i=0; i<4; ++i)
                     GY->f[i] = VY->f[i] - EY->f[i];
             } else
-                GY->q[0] = GY->q[1] = 0;
+                GY->u128 = 0;
             break;
 
         case 0x77:
@@ -216,6 +315,90 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 return 0;
             break;
 
+        case 0xC2:                      /* VCMPPS Gx, Vx, Ex, Ib */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            tmp8u = F8;
+            for(int i=0; i<4; ++i) {
+                tmp8s = 0;
+                switch(tmp8u&7) {
+                    case 0: tmp8s=(VX->f[i] == EX->f[i]); break;
+                    case 1: tmp8s=isless(VX->f[i], EX->f[i]); break;
+                    case 2: tmp8s=islessequal(VX->f[i], EX->f[i]); break;
+                    case 3: tmp8s=isnan(VX->f[i]) || isnan(EX->f[i]); break;
+                    case 4: tmp8s=(VX->f[i] != EX->f[i]); break;
+                    case 5: tmp8s=isnan(VX->f[i]) || isnan(EX->f[i]) || isgreaterequal(VX->f[i], EX->f[i]); break;
+                    case 6: tmp8s=isnan(VX->f[i]) || isnan(EX->f[i]) || isgreater(VX->f[i], EX->f[i]); break;
+                    case 7: tmp8s=!isnan(VX->f[i]) && !isnan(EX->f[i]); break;
+                }
+                GX->ud[i]=(tmp8s)?0xffffffff:0;
+            }
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<4; ++i) {
+                    tmp8s = 0;
+                    switch(tmp8u&7) {
+                        case 0: tmp8s=(VY->f[i] == EY->f[i]); break;
+                        case 1: tmp8s=isless(VY->f[i], EY->f[i]); break;
+                        case 2: tmp8s=islessequal(VY->f[i], EY->f[i]); break;
+                        case 3: tmp8s=isnan(VY->f[i]) || isnan(EY->f[i]); break;
+                        case 4: tmp8s=(VY->f[i] != EY->f[i]); break;
+                        case 5: tmp8s=isnan(VY->f[i]) || isnan(EY->f[i]) || isgreaterequal(VY->f[i], EY->f[i]); break;
+                        case 6: tmp8s=isnan(VY->f[i]) || isnan(EY->f[i]) || isgreater(VY->f[i], EY->f[i]); break;
+                        case 7: tmp8s=!isnan(VY->f[i]) && !isnan(EY->f[i]); break;
+                    }
+                    GY->ud[i]=(tmp8s)?0xffffffff:0;
+                }
+            } else
+                GY->u128 = 0;
+            break;
+
+        case 0xC6:                      /* VSHUFPS Gx, Vx, Ex, Ib */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            GETVY;
+            GETEY;
+            tmp8u = F8;
+            if(GX==VX) {
+                eax1 = *VX;
+                VX = &eax1;
+            }
+            if(GX==EX) {
+                eay1 = *EX;
+                EX = &eay1;
+            }
+            for(int i=0; i<2; ++i) {
+                GX->ud[i] = VX->ud[(tmp8u>>(i*2))&3];
+            }
+            for(int i=2; i<4; ++i) {
+                GX->ud[i] = EX->ud[(tmp8u>>(i*2))&3];
+            }
+            if(vex.l) {
+                if(GY==VY) {
+                    eax1 = *VY;
+                    VY = &eax1;
+                }
+                if(GY==EY) {
+                    eay1 = *EY;
+                    EY = &eay1;
+                }
+                for(int i=0; i<2; ++i) {
+                    GY->ud[i] = VY->ud[(tmp8u>>(i*2))&3];
+                }
+                for(int i=2; i<4; ++i) {
+                    GY->ud[i] = EY->ud[(tmp8u>>(i*2))&3];
+                }
+            } else
+                GY->u128 = 0;
+            break;
+
         default:
             return 0;
     }
diff --git a/src/emu/x64runavx660f.c b/src/emu/x64runavx660f.c
index aeb976f1..3136433f 100644
--- a/src/emu/x64runavx660f.c
+++ b/src/emu/x64runavx660f.c
@@ -58,6 +58,154 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
 
     switch(opcode) {
 
+        case 0x2F:                      /* VCOMISD Gx, Ex */
+            RESET_FLAGS(emu);
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            if(isnan(GX->d[0]) || isnan(EX->d[0])) {
+                SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF);
+            } else if(isgreater(GX->d[0], EX->d[0])) {
+                CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
+            } else if(isless(GX->d[0], EX->d[0])) {
+                CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF);
+            } else {
+                SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
+            }
+            CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF);
+            break;
+
+        case 0x54:  /* VANDPD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GX->u128 = VX->u128 & EX->u128;
+            GETGY;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->u128 = VY->u128 & EY->u128;
+            } else {
+                GY->u128 = 0;
+            }
+            break;
+        case 0x55:  /* VANDNPD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GX->u128 = (~VX->u128) & EX->u128;
+            GETGY;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->u128 = (~VY->u128) & EY->u128;
+            } else {
+                GY->u128 = 0;
+            }
+            break;
+
+        case 0x58:  /* VADDPD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GX->d[0] = VX->d[0] + EX->d[0];
+            GX->d[1] = VX->d[1] + EX->d[1];
+            GETGY;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->d[0] = VY->d[0] + EY->d[0];
+                GY->d[1] = VY->d[1] + EY->d[1];
+            } else {
+                GY->u128 = 0;
+            }
+            break;
+
+        case 0x5A:      /* VCVTPD2PS Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            GX->f[0] = EX->d[0];
+            GX->f[1] = EX->d[1];
+            if(vex.l) {
+                GETEY;
+                GX->f[2] = EY->d[0];
+                GX->f[3] = EY->d[1];
+            } else
+                GX->q[1] = 0;
+            GY->u128 = 0;
+            break;
+        case 0x5B:      /* VCVTPS2DQ Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            for(int i=0; i<4; ++i) {
+                if(isnanf(EX->f[i]))
+                    tmp64s = INT32_MIN;
+                else
+                    switch(emu->mxcsr.f.MXCSR_RC) {
+                        case ROUND_Nearest: {
+                            int round = fegetround();
+                            fesetround(FE_TONEAREST);
+                            tmp64s = nearbyintf(EX->f[i]);
+                            fesetround(round);
+                            break;
+                        }
+                        case ROUND_Down:
+                            tmp64s = floorf(EX->f[i]);
+                            break;
+                        case ROUND_Up:
+                            tmp64s = ceilf(EX->f[i]);
+                            break;
+                        case ROUND_Chop:
+                            tmp64s = EX->f[i];
+                            break;
+                    }
+                if (tmp64s==(int32_t)tmp64s) {
+                    GX->sd[i] = (int32_t)tmp64s;
+                } else {
+                    GX->sd[i] = INT32_MIN;
+                }
+            }
+            if(vex.l) {
+                GETEY;
+                for(int i=0; i<4; ++i) {
+                    if(isnanf(EY->f[i]))
+                        tmp64s = INT32_MIN;
+                    else
+                        switch(emu->mxcsr.f.MXCSR_RC) {
+                            case ROUND_Nearest: {
+                                int round = fegetround();
+                                fesetround(FE_TONEAREST);
+                                tmp64s = nearbyintf(EY->f[i]);
+                                fesetround(round);
+                                break;
+                            }
+                            case ROUND_Down:
+                                tmp64s = floorf(EY->f[i]);
+                                break;
+                            case ROUND_Up:
+                                tmp64s = ceilf(EY->f[i]);
+                                break;
+                            case ROUND_Chop:
+                                tmp64s = EY->f[i];
+                                break;
+                        }
+                    if (tmp64s==(int32_t)tmp64s) {
+                        GY->sd[i] = (int32_t)tmp64s;
+                    } else {
+                        GY->sd[i] = INT32_MIN;
+                    }
+                }
+            } else
+                GY->u128 = 0;
+            break;
+        
         case 0x64:  /* VPCMPGTB Gx,Vx, Ex */
             nextop = F8;
             GETEX(0);
@@ -107,21 +255,45 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GY->q[0] = GY->q[1] = 0;
             break;
 
-        case 0x6C:  /* VPUNPCKLQDQ Gx,E Vx, x */
+        case 0x6B:  /* VPACKSSDW Gx,Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            for(int i=0; i<4; ++i)
+                GX->sw[i] = (VX->sd[i]<-32768)?-32768:((VX->sd[i]>32767)?32767:VX->sd[i]);
+            if(GX==EX)
+                GX->q[1] = GX->q[0];
+            else
+                for(int i=0; i<4; ++i)
+                    GX->sw[4+i] = (EX->sd[i]<-32768)?-32768:((EX->sd[i]>32767)?32767:EX->sd[i]);
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<4; ++i)
+                    GY->sw[i] = (VY->sd[i]<-32768)?-32768:((VY->sd[i]>32767)?32767:VY->sd[i]);
+                if(GY==EY)
+                    GY->q[1] = GY->q[0];
+                else
+                    for(int i=0; i<4; ++i)
+                        GY->sw[4+i] = (EY->sd[i]<-32768)?-32768:((EY->sd[i]>32767)?32767:EY->sd[i]);
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x6C:  /* VPUNPCKLQDQ Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
             GETGX;
             GETVX;
             GETGY;
             GX->q[1] = EX->q[0];
-            if(GX!=VX)
-                GX->q[0] = VX->q[0];
+            GX->q[0] = VX->q[0];
             if(vex.l) {
                 GETEY;
                 GETVY;
                 GY->q[1] = EY->q[0];
-                if(GY!=VY)
-                    GY->q[0] = VY->q[0];
+                GY->q[0] = VY->q[0];
             } else
                 GY->q[0] = GY->q[1] = 0;
             break;
@@ -178,7 +350,7 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GETEY;
                 if(EY==GY) {eay1 = *GY; EY = &eay1;}   // copy is needed
                 for (int i=0; i<4; ++i)
-                    GY->ud[4+i] = EY->ud[4+((tmp8u>>(i*2))&3)];
+                    GY->ud[i] = EY->ud[(tmp8u>>(i*2))&3];
             } else 
                 memset(GY, 0, 16);
             if(EX==GX) {eax1 = *GX; EX = &eax1;}   // copy is needed
@@ -366,6 +538,65 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             } // no upper raz?
             break;
 
+        case 0xC2:                      /* CMPPD Gx, Vx, Ex, Ib */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            tmp8u = F8;
+            for(int i=0; i<2; ++i) {
+                tmp8s = 0;
+                switch(tmp8u&7) {
+                    case 0: tmp8s=(VX->d[i] == EX->d[i]); break;
+                    case 1: tmp8s=isless(VX->d[i], EX->d[i]); break;
+                    case 2: tmp8s=islessequal(VX->d[i], EX->d[i]); break;
+                    case 3: tmp8s=isnan(VX->d[i]) || isnan(EX->d[i]); break;
+                    case 4: tmp8s=isnan(VX->d[i]) || isnan(EX->d[i]) || (VX->d[i] != EX->d[i]); break;
+                    case 5: tmp8s=isnan(VX->d[i]) || isnan(EX->d[i]) || isgreaterequal(VX->d[i], EX->d[i]); break;
+                    case 6: tmp8s=isnan(VX->d[i]) || isnan(EX->d[i]) || isgreater(VX->d[i], EX->d[i]); break;
+                    case 7: tmp8s=!isnan(VX->d[i]) && !isnan(EX->d[i]); break;
+                }
+                GX->q[i]=(tmp8s)?0xffffffffffffffffLL:0LL;
+            }
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<2; ++i) {
+                    tmp8s = 0;
+                    switch(tmp8u&7) {
+                        case 0: tmp8s=(VY->d[i] == EY->d[i]); break;
+                        case 1: tmp8s=isless(VY->d[i], EY->d[i]); break;
+                        case 2: tmp8s=islessequal(VY->d[i], EY->d[i]); break;
+                        case 3: tmp8s=isnan(VY->d[i]) || isnan(EY->d[i]); break;
+                        case 4: tmp8s=isnan(VY->d[i]) || isnan(EY->d[i]) || (VY->d[i] != EY->d[i]); break;
+                        case 5: tmp8s=isnan(VY->d[i]) || isnan(EY->d[i]) || isgreaterequal(VY->d[i], EY->d[i]); break;
+                        case 6: tmp8s=isnan(VY->d[i]) || isnan(EY->d[i]) || isgreater(VY->d[i], EY->d[i]); break;
+                        case 7: tmp8s=!isnan(VY->d[i]) && !isnan(EY->d[i]); break;
+                    }
+                    GY->q[i]=(tmp8s)?0xffffffffffffffffLL:0LL;
+                }
+            } else
+                GY->u128 = 0;
+            break;
+
+        case 0xD0:  /* VADDSUBPD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GX->d[0] = VX->d[0] - EX->d[0];
+            GX->d[1] = VX->d[1] + EX->d[1];
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->d[0] = VY->d[0] - EY->d[0];
+                GY->d[1] = VY->d[1] + EY->d[1];
+            } else
+                GY->u128 = 0;
+            break;
+
         case 0xDB:  /* VPAND Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c
index ab01aba7..3ec1f0ff 100644
--- a/src/emu/x64runavx660f38.c
+++ b/src/emu/x64runavx660f38.c
@@ -30,6 +30,27 @@
 
 #include "modrm.h"
 
+static uint8_t ff_mult(uint8_t a, uint8_t b)
+{
+	int retval = 0;
+
+	for(int i = 0; i < 8; i++) {
+		if((b & 1) == 1)
+			retval ^= a;
+
+		if((a & 0x80)) {
+			a <<= 1;
+			a  ^= 0x1b;
+		} else {
+			a <<= 1;
+		}
+
+		b >>= 1;
+	}
+
+	return retval;
+}
+
 #ifdef TEST_INTERPRETER
 uintptr_t TestAVX_660F38(x64test_t *test, vex_t vex, uintptr_t addr, int *step)
 #else
@@ -47,6 +68,49 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
     reg64_t *oped, *opgd;
     sse_regs_t *opex, *opgx, *opvx, eax1;
     sse_regs_t *opey, *opgy, *opvy, eay1;
+    // AES opcodes constants
+                            //   A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf
+                            //   A  F  K  P  E  J  O  D  I  N  C  H  M  B  G  L
+    const uint8_t shiftrows[] = {0, 5,10,15, 4, 9,14, 3, 8,13, 2, 7,12, 1, 6,11};
+    const uint8_t subbytes[256] = {
+        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
+    };
+                            //   A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf
+                            //   A  N  K  H  E  B  O  L  I  F  C  P  M  J  G  D
+    const uint8_t invshiftrows[] = {0,13,10, 7, 4, 1,14,11, 8, 5, 2,15,12, 9, 6, 3};
+    const uint8_t invsubbytes[256] = {
+        0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+        0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+        0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+        0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+        0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+        0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+        0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+        0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+        0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+        0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+        0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+        0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+        0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+        0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+        0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+        0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
+    };
 
 
 #ifdef TEST_INTERPRETER
@@ -90,6 +154,231 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GY->q[0] = GY->q[1] = 0;
             break;
 
+        case 0x18:  /* VBROADCASTSS Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            tmp32u = EX->ud[0];
+            for(int i=0; i<4; ++i)
+                GX->ud[i] = tmp32u;
+            if(vex.l) {
+                for(int i=0; i<4; ++i)
+                    GY->ud[i] = tmp32u;
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x19:  /* VBROADCASTSD Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            tmp64u = EX->q[0];
+            for(int i=0; i<2; ++i)
+                GX->q[i] = tmp64u;
+            if(vex.l) {
+                for(int i=0; i<2; ++i)
+                    GY->q[i] = tmp64u;
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x1A:  /* VBROADCASTF128 Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            GX->u128 = EX->u128;
+            GY->u128 = EX->u128;
+            break;
+
+        case 0x2C:  /*VMASKMOVPS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            for(int i=0; i<4; ++i)
+                GX->ud[i] = (VX->ud[i]>>31)?EX->ud[i]:0;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<4; ++i)
+                    GY->ud[i] = (VY->ud[i]>>31)?EY->ud[i]:0;
+            } else
+                GY->u128 = 0;
+            break;
+
+        case 0x2E:  /*VMASKMOVPS Ex, Vx, Gx */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            for(int i=0; i<4; ++i)
+                EX->ud[i] = (VX->ud[i]>>31)?GX->ud[i]:0;
+            if(vex.l) {
+                GETGY;
+                GETEY;
+                GETVY;
+                for(int i=0; i<4; ++i)
+                    EY->ud[i] = (VY->ud[i]>>31)?GY->ud[i]:0;
+            }
+            break;
+
+        case 0x5A:  /* VBROADCASTI128 Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            GX->u128 = EX->u128;
+            GY->u128 = EX->u128;
+            break;
+
+        case 0xDB:  /* VAESIMC Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            //STATE ← InvMixColumns( STATE );
+            if (EX == GX) {
+                for(int i=0; i<16; ++i)
+                    eax1.ub[i] = EX->ub[i];
+                for(int j=0; j<4; ++j) {
+                    GX->ub[0+j*4] = ff_mult(0x0E, eax1.ub[0+j*4]) ^ ff_mult(0x0B, eax1.ub[1+j*4]) ^ ff_mult(0x0D, eax1.ub[2+j*4]) ^ ff_mult(0x09, eax1.ub[3+j*4]);
+                    GX->ub[1+j*4] = ff_mult(0x09, eax1.ub[0+j*4]) ^ ff_mult(0x0E, eax1.ub[1+j*4]) ^ ff_mult(0x0B, eax1.ub[2+j*4]) ^ ff_mult(0x0D, eax1.ub[3+j*4]);
+                    GX->ub[2+j*4] = ff_mult(0x0D, eax1.ub[0+j*4]) ^ ff_mult(0x09, eax1.ub[1+j*4]) ^ ff_mult(0x0E, eax1.ub[2+j*4]) ^ ff_mult(0x0B, eax1.ub[3+j*4]);
+                    GX->ub[3+j*4] = ff_mult(0x0B, eax1.ub[0+j*4]) ^ ff_mult(0x0D, eax1.ub[1+j*4]) ^ ff_mult(0x09, eax1.ub[2+j*4]) ^ ff_mult(0x0E, eax1.ub[3+j*4]);
+                }
+            } else {
+                for(int j=0; j<4; ++j) {
+                    GX->ub[0+j*4] = ff_mult(0x0E, EX->ub[0+j*4]) ^ ff_mult(0x0B, EX->ub[1+j*4]) ^ ff_mult(0x0D, EX->ub[2+j*4]) ^ ff_mult(0x09, EX->ub[3+j*4]);
+                    GX->ub[1+j*4] = ff_mult(0x09, EX->ub[0+j*4]) ^ ff_mult(0x0E, EX->ub[1+j*4]) ^ ff_mult(0x0B, EX->ub[2+j*4]) ^ ff_mult(0x0D, EX->ub[3+j*4]);
+                    GX->ub[2+j*4] = ff_mult(0x0D, EX->ub[0+j*4]) ^ ff_mult(0x09, EX->ub[1+j*4]) ^ ff_mult(0x0E, EX->ub[2+j*4]) ^ ff_mult(0x0B, EX->ub[3+j*4]);
+                    GX->ub[3+j*4] = ff_mult(0x0B, EX->ub[0+j*4]) ^ ff_mult(0x0D, EX->ub[1+j*4]) ^ ff_mult(0x09, EX->ub[2+j*4]) ^ ff_mult(0x0E, EX->ub[3+j*4]);
+                }
+            }
+            GY->u128 = 0;
+            break;
+        case 0xDC:  /* VAESENC Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            //STATE ← SRC1;
+            //RoundKey ← SRC2;
+            //STATE ← ShiftRows( STATE );
+            //STATE ← SubBytes( STATE );
+            for(int i=0; i<16; ++i)
+                eax1.ub[i] = subbytes[VX->ub[shiftrows[i]]];
+            //STATE ← MixColumns( STATE );
+            for(int j=0; j<4; ++j) {
+                eay1.ub[0+j*4] = ff_mult(0x02, eax1.ub[0+j*4]) ^ ff_mult(0x03, eax1.ub[1+j*4]) ^               eax1.ub[2+j*4]  ^               eax1.ub[3+j*4] ;
+                eay1.ub[1+j*4] =               eax1.ub[0+j*4]  ^ ff_mult(0x02, eax1.ub[1+j*4]) ^ ff_mult(0x03, eax1.ub[2+j*4]) ^               eax1.ub[3+j*4] ;
+                eay1.ub[2+j*4] =               eax1.ub[0+j*4]  ^               eax1.ub[1+j*4]  ^ ff_mult(0x02, eax1.ub[2+j*4]) ^ ff_mult(0x03, eax1.ub[3+j*4]);
+                eay1.ub[3+j*4] = ff_mult(0x03, eax1.ub[0+j*4]) ^               eax1.ub[1+j*4]  ^               eax1.ub[2+j*4]  ^ ff_mult(0x02, eax1.ub[3+j*4]);
+            }
+            //DEST[127:0] ← STATE XOR RoundKey;
+            GX->u128 = eay1.u128 ^ EX->u128;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<16; ++i)
+                    eax1.ub[i] = subbytes[VY->ub[shiftrows[i]]];
+                for(int j=0; j<4; ++j) {
+                    eay1.ub[0+j*4] = ff_mult(0x02, eax1.ub[0+j*4]) ^ ff_mult(0x03, eax1.ub[1+j*4]) ^               eax1.ub[2+j*4]  ^               eax1.ub[3+j*4] ;
+                    eay1.ub[1+j*4] =               eax1.ub[0+j*4]  ^ ff_mult(0x02, eax1.ub[1+j*4]) ^ ff_mult(0x03, eax1.ub[2+j*4]) ^               eax1.ub[3+j*4] ;
+                    eay1.ub[2+j*4] =               eax1.ub[0+j*4]  ^               eax1.ub[1+j*4]  ^ ff_mult(0x02, eax1.ub[2+j*4]) ^ ff_mult(0x03, eax1.ub[3+j*4]);
+                    eay1.ub[3+j*4] = ff_mult(0x03, eax1.ub[0+j*4]) ^               eax1.ub[1+j*4]  ^               eax1.ub[2+j*4]  ^ ff_mult(0x02, eax1.ub[3+j*4]);
+                }
+                GY->u128 = eay1.u128 ^ EY->u128;
+            } else
+                GY->u128 = 0; 
+            break;
+        case 0xDD:  /* VAESENCLAST Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            //STATE ← SRC1;
+            //RoundKey ← SRC2;
+            //STATE ← ShiftRows( STATE );
+            //STATE ← SubBytes( STATE );
+            for(int i=0; i<16; ++i)
+                eax1.ub[i] = subbytes[VX->ub[shiftrows[i]]];
+            //DEST[127:0] ← STATE XOR RoundKey;
+            GX->u128 = eax1.u128 ^ EX->u128;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<16; ++i)
+                    eax1.ub[i] = subbytes[VY->ub[shiftrows[i]]];
+                GY->u128 = eax1.u128 ^ EY->u128;
+            } else
+                GY->u128 = 0;
+            break;
+        case 0xDE:  /* VAESDEC Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            //STATE ← SRC1;
+            //RoundKey ← SRC2;
+            //STATE ← InvShiftRows( STATE );
+            for(int i=0; i<16; ++i)
+                eax1.ub[i] = VX->ub[invshiftrows[i]];
+            //STATE ← InvSubBytes( STATE );
+            for(int i=0; i<16; ++i)
+                eax1.ub[i] = invsubbytes[eax1.ub[i]];
+            //STATE ← InvMixColumns( STATE );
+            for(int j=0; j<4; ++j) {
+                eay1.ub[0+j*4] = ff_mult(0x0E, eax1.ub[0+j*4]) ^ ff_mult(0x0B, eax1.ub[1+j*4]) ^ ff_mult(0x0D, eax1.ub[2+j*4]) ^ ff_mult(0x09, eax1.ub[3+j*4]);
+                eay1.ub[1+j*4] = ff_mult(0x09, eax1.ub[0+j*4]) ^ ff_mult(0x0E, eax1.ub[1+j*4]) ^ ff_mult(0x0B, eax1.ub[2+j*4]) ^ ff_mult(0x0D, eax1.ub[3+j*4]);
+                eay1.ub[2+j*4] = ff_mult(0x0D, eax1.ub[0+j*4]) ^ ff_mult(0x09, eax1.ub[1+j*4]) ^ ff_mult(0x0E, eax1.ub[2+j*4]) ^ ff_mult(0x0B, eax1.ub[3+j*4]);
+                eay1.ub[3+j*4] = ff_mult(0x0B, eax1.ub[0+j*4]) ^ ff_mult(0x0D, eax1.ub[1+j*4]) ^ ff_mult(0x09, eax1.ub[2+j*4]) ^ ff_mult(0x0E, eax1.ub[3+j*4]);
+            }
+            //DEST[127:0] ← STATE XOR RoundKey;
+            GX->u128 = eay1.u128 ^ EX->u128;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<16; ++i)
+                    eax1.ub[i] = invsubbytes[VY->ub[invshiftrows[i]]];
+                for(int j=0; j<4; ++j) {
+                    eay1.ub[0+j*4] = ff_mult(0x0E, eax1.ub[0+j*4]) ^ ff_mult(0x0B, eax1.ub[1+j*4]) ^ ff_mult(0x0D, eax1.ub[2+j*4]) ^ ff_mult(0x09, eax1.ub[3+j*4]);
+                    eay1.ub[1+j*4] = ff_mult(0x09, eax1.ub[0+j*4]) ^ ff_mult(0x0E, eax1.ub[1+j*4]) ^ ff_mult(0x0B, eax1.ub[2+j*4]) ^ ff_mult(0x0D, eax1.ub[3+j*4]);
+                    eay1.ub[2+j*4] = ff_mult(0x0D, eax1.ub[0+j*4]) ^ ff_mult(0x09, eax1.ub[1+j*4]) ^ ff_mult(0x0E, eax1.ub[2+j*4]) ^ ff_mult(0x0B, eax1.ub[3+j*4]);
+                    eay1.ub[3+j*4] = ff_mult(0x0B, eax1.ub[0+j*4]) ^ ff_mult(0x0D, eax1.ub[1+j*4]) ^ ff_mult(0x09, eax1.ub[2+j*4]) ^ ff_mult(0x0E, eax1.ub[3+j*4]);
+                }
+                GY->u128 = eay1.u128 ^ EY->u128;
+            } else
+                GY->u128 = 0;
+            break;
+        case 0xDF:  /* VAESDECLAST Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            //STATE ← SRC1;
+            //RoundKey ← SRC2;
+            //STATE ← InvShiftRows( STATE );
+            //STATE ← InvSubBytes( STATE );
+            for(int i=0; i<16; ++i)
+                eax1.ub[i] = invsubbytes[VX->ub[invshiftrows[i]]];
+            //DEST[127:0] ← STATE XOR RoundKey;
+            GX->u128 = eax1.u128 ^ EX->u128;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<16; ++i)
+                    eax1.ub[i] = invsubbytes[VY->ub[invshiftrows[i]]];
+                GY->u128 = eax1.u128 ^ EY->u128;
+            } else
+                GY->u128 = 0;
+            break;
+
         default:
             return 0;
     }
diff --git a/src/emu/x64runavx660f3a.c b/src/emu/x64runavx660f3a.c
index db82a823..cc5e784d 100644
--- a/src/emu/x64runavx660f3a.c
+++ b/src/emu/x64runavx660f3a.c
@@ -59,6 +59,25 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
     float tmpf;
     sse_regs_t *opex, *opgx, *opvx, eax1;
     sse_regs_t *opey, *opgy, *opvy, eay1;
+    // AES opcodes constants
+    const uint8_t subbytes[256] = {
+        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
+    };
 
 
 #ifdef TEST_INTERPRETER
@@ -70,7 +89,41 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
 
     switch(opcode) {
 
-        case 0x0F:          // VPALIGNR GX, VX, EX, u8
+        case 0x0C:      /* VBLENDPS Gx, Vx, Ex, u8 */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            tmp8u = F8;
+            for(int i=0; i<4; ++i)
+                GX->ud[i] = (tmp8u&(1<<i))?EX->ud[i]:VX->ud[i];
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<4; ++i)
+                    GY->ud[i] = (tmp8u&(1<<(i+4)))?EY->ud[i]:VY->ud[i];
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x0D:      /* VBLENDPD Gx, Vx, Ex, u8 */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            tmp8u = F8;
+            for(int i=0; i<2; ++i)
+                GX->q[i] = (tmp8u&(1<<i))?EX->q[i]:VX->q[i];
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<2; ++i)
+                    GY->q[i] = (tmp8u&(1<<(i+2)))?EY->q[i]:VY->q[i];
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x0F:      /* VPALIGNR GX, VX, EX, u8 */
             nextop = F8;
             GETEX(1);
             GETGX;
@@ -90,7 +143,7 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GETEY;
                 GETVY;
                 if(tmp8u>31)
-                    {GY->q[0] = GY->q[1] = 0;}
+                    {GY->u128 = 0;}
                 else
                 {
                     for (int i=0; i<16; ++i, ++tmp8u)
@@ -99,10 +152,56 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                     GY->q[1] = eax1.q[1];
                 }
             } else
-                GY->q[0] = GY->q[1] = 0;
+                GY->u128 = 0;
             break;
 
-        case 0x21:  /* VINSRTPS Gx, Vx, Ex, imm8 */
+        case 0x16:      // VPEXTRD/Q ED, GX, u8
+            nextop = F8;
+            GETED(1);
+            GETGX;
+            tmp8u = F8;
+            if(rex.w) {
+                ED->q[0] = GX->q[tmp8u&1];
+            } else {
+                if(MODREG)
+                    ED->q[0] = GX->ud[tmp8u&3];
+                else
+                    ED->dword[0] = GX->ud[tmp8u&3];
+            }
+            break;
+
+        case 0x18:  /* VINSERTF128 Gx, Ex, imm8 */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            GETVY;
+            tmp8u = F8;
+            if(tmp8u&1) {
+                GY->u128 = EX->u128;
+                if(GX!=VX);
+                    GX->u128 = VX->u128;
+            } else {
+                GX->u128 = EX->u128;
+                if(GY!=VY)
+                    GY->u128 = VY->u128;
+            }
+            break;
+        case 0x19:  /* VEXTRACT128 Ex, Gx, imm8 */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETGY;
+            tmp8u = F8;
+            EX->u128 = (tmp8u&1)?GY->u128:GX->u128;
+            if(MODREG) {
+                GETEY;
+                EY->u128 = 0;
+            }
+            break;
+
+        case 0x21:  /* VINSERTPS Gx, Vx, Ex, imm8 */
             nextop = F8;
             GETGX;
             GETEX(1);
@@ -114,8 +213,8 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             } else
                 tmp32u = EX->ud[0];
             for(int i=0; i<4; ++i)
-                GX->ud[i] = (tmp8u&(1<<i))?((i==((tmp8u>>4)&3))?tmp32u:VX->ud[i]):0;
-            GY->q[0] = GY->q[1] = 0;
+                GX->ud[i] = (tmp8u&(1<<i))?0:((i==((tmp8u>>4)&3))?tmp32u:VX->ud[i]);
+            GY->u128 = 0;
             break;
 
         case 0x40:  /* DPPS Gx, Ex, Ib */
@@ -139,7 +238,7 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 for(int i=0; i<4; ++i)
                     GY->f[i] = (tmp8u&(1<<i))?tmpf:0.0f;
             } else
-                GY->q[0] = GY->q[1] = 0;
+                GY->u128 = 0;
             break;
 
         case 0x44:    /* VPCLMULQDQ Gx, Vx, Ex, imm8 */
@@ -155,7 +254,65 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GETEY;
                 GY->u128 = pclmul_helper(VY->q[tmp8u&1], EY->q[(tmp8u>>4)&1]);
             } else
-                GY->q[0] = GY->q[1] = 0;
+                GY->u128 = 0;
+            break;
+
+        case 0x4A:      /* VBLENDVPS Gx, Vx, Ex, XMMImm8 */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            tmp8u = (F8)>>4;
+            for(int i=0; i<4; ++i)
+                GX->ud[i] = (emu->xmm[tmp8u].ud[i]>>31)?EX->ud[i]:VX->ud[i];
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<4; ++i)
+                    GY->ud[i] = (emu->ymm[tmp8u].ud[i]>>31)?EY->ud[i]:VY->ud[i];
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x4B:      /* VBLENDVPD Gx, Vx, Ex, XMMImm8 */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            tmp8u = (F8)>>4;
+            for(int i=0; i<2; ++i)
+                GX->q[i] = (emu->xmm[tmp8u].q[i]>>63)?EX->q[i]:VX->q[i];
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<2; ++i)
+                    GY->q[i] = (emu->ymm[tmp8u].q[i]>>63)?EY->q[i]:VY->q[i];
+            } else
+                GY->u128 = 0;
+            break;
+
+        case 0xDF:      // VAESKEYGENASSIST Gx, Ex, u8
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            tmp32u = F8;
+            for (int i = 4; i < 8; ++i)
+                GX->ub[i] = subbytes[EX->ub[i]];
+            for (int i = 12; i < 16; ++i)
+                GX->ub[i] = subbytes[EX->ub[i]];
+            GX->ud[0] = GX->ud[1];
+            tmp8u = GX->ub[4];
+            GX->ud[1] = GX->ud[1] >> 8;
+            GX->ub[7] = tmp8u;
+            GX->ud[1] ^= tmp32u;
+            GX->ud[2] = GX->ud[3];
+            tmp8u = GX->ub[12];
+            GX->ud[3] = GX->ud[3] >> 8;
+            GX->ub[15] = tmp8u;
+            GX->ud[3] ^= tmp32u;
+            GETGY;
+            GY->u128 = 0;
             break;
 
         default:
diff --git a/src/emu/x64runavxf20f.c b/src/emu/x64runavxf20f.c
index 1bb03d72..642946b5 100644
--- a/src/emu/x64runavxf20f.c
+++ b/src/emu/x64runavxf20f.c
@@ -44,6 +44,7 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
     uint32_t tmp32u, tmp32u2;
     uint64_t tmp64u, tmp64u2;
     int64_t tmp64s;
+    int64_t tmp64s0, tmp64s1;
     reg64_t *oped, *opgd;
     sse_regs_t *opex, *opgx, *opvx, eax1;
     sse_regs_t *opey, *opgy, *opvy, eay1;
@@ -70,9 +71,9 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GX->q[1] = 0;
             }
             GETGY;
-            GY->q[0] = GY->q[1] = 0;
+            GY->u128 = 0;
             break;
-        case 0x11:  /* MOVSS Ex Gx */
+        case 0x11:  /* MOVSD Ex Gx */
             nextop = F8;
             GETEX(0);
             GETGX;
@@ -81,10 +82,143 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GETVX;
                 EX->q[1] = VX->q[1];
                 GETEY;
-                EY->q[0] = EY->q[1] = 0;
+                EY->u128 = 0;
             }
             break;
-        
+
+        case 0x58:  /* VADDSD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GX->d[0] = VX->d[0] + EX->d[0];
+            if(GX!=VX) {
+                GX->q[1] = VX->q[1];
+            }
+            GY->u128 = 0;
+            break;
+
+        case 0xC2:  /* VCMPSD Gx, Vx, Ex, Ib */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            tmp8u = F8;
+            tmp8s = 0;
+            switch(tmp8u&7) {
+                case 0: tmp8s=(VX->d[0] == EX->d[0]); break;
+                case 1: tmp8s=isless(VX->d[0], EX->d[0]) && !(isnan(VX->d[0]) || isnan(EX->d[0])); break;
+                case 2: tmp8s=islessequal(VX->d[0], EX->d[0]) && !(isnan(VX->d[0]) || isnan(EX->d[0])); break;
+                case 3: tmp8s=isnan(VX->d[0]) || isnan(EX->d[0]); break;
+                case 4: tmp8s=isnan(VX->d[0]) || isnan(EX->d[0]) || (VX->d[0] != EX->d[0]); break;
+                case 5: tmp8s=isnan(VX->d[0]) || isnan(EX->d[0]) || isgreaterequal(VX->d[0], EX->d[0]); break;
+                case 6: tmp8s=isnan(VX->d[0]) || isnan(EX->d[0]) || isgreater(VX->d[0], EX->d[0]); break;
+                case 7: tmp8s=!isnan(VX->d[0]) && !isnan(EX->d[0]); break;
+            }
+            GX->q[0]=(tmp8s)?0xffffffffffffffffLL:0LL;
+            GX->q[1] = VX->q[1];
+            GY->u128 = 0;
+            break;
+
+        case 0xD0:  /* VADDSUBPS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GX->f[0] = VX->f[0] - EX->f[0];
+            GX->f[1] = VX->f[1] + EX->f[1];
+            GX->f[2] = VX->f[2] - EX->f[2];
+            GX->f[3] = VX->f[3] + EX->f[3];
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->f[0] = VY->f[0] - EY->f[0];
+                GY->f[1] = VY->f[1] + EY->f[1];
+                GY->f[2] = VY->f[2] - EY->f[2];
+                GY->f[3] = VY->f[3] + EY->f[3];
+            } else
+                GY->u128 = 0;
+            break;
+
+        case 0xE6:  /* CVTPD2DQ Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            switch(emu->mxcsr.f.MXCSR_RC) {
+                case ROUND_Nearest: {
+                    int round = fegetround();
+                    fesetround(FE_TONEAREST);
+                    tmp64s0 = nearbyint(EX->d[0]);
+                    tmp64s1 = nearbyint(EX->d[1]);
+                    fesetround(round);
+                    break;
+                }
+                case ROUND_Down:
+                    tmp64s0 = floor(EX->d[0]);
+                    tmp64s1 = floor(EX->d[1]);
+                    break;
+                case ROUND_Up:
+                    tmp64s0 = ceil(EX->d[0]);
+                    tmp64s1 = ceil(EX->d[1]);
+                    break;
+                case ROUND_Chop:
+                    tmp64s0 = EX->d[0];
+                    tmp64s1 = EX->d[1];
+                    break;
+            }
+            if (tmp64s0==(int32_t)tmp64s0 && !isnan(EX->d[0])) {
+                GX->sd[0] = (int32_t)tmp64s0;
+            } else {
+                GX->sd[0] = INT32_MIN;
+            }
+            if (tmp64s1==(int32_t)tmp64s1 && !isnan(EX->d[1])) {
+                GX->sd[1] = (int32_t)tmp64s1;
+            } else {
+                GX->sd[1] = INT32_MIN;
+            }
+            if(vex.l) {
+                GETEY;
+                switch(emu->mxcsr.f.MXCSR_RC) {
+                    case ROUND_Nearest: {
+                        int round = fegetround();
+                        fesetround(FE_TONEAREST);
+                        tmp64s0 = nearbyint(EY->d[0]);
+                        tmp64s1 = nearbyint(EY->d[1]);
+                        fesetround(round);
+                        break;
+                    }
+                    case ROUND_Down:
+                        tmp64s0 = floor(EY->d[0]);
+                        tmp64s1 = floor(EY->d[1]);
+                        break;
+                    case ROUND_Up:
+                        tmp64s0 = ceil(EY->d[0]);
+                        tmp64s1 = ceil(EY->d[1]);
+                        break;
+                    case ROUND_Chop:
+                        tmp64s0 = EY->d[0];
+                        tmp64s1 = EY->d[1];
+                        break;
+                }
+                if (tmp64s0==(int32_t)tmp64s0 && !isnan(EY->d[0])) {
+                    GX->sd[2] = (int32_t)tmp64s0;
+                } else {
+                    GX->sd[2] = INT32_MIN;
+                }
+                if (tmp64s1==(int32_t)tmp64s1 && !isnan(EY->d[1])) {
+                    GX->sd[3] = (int32_t)tmp64s1;
+                } else {
+                    GX->sd[3] = INT32_MIN;
+                }
+            } else
+                GX->q[1] = 0;
+            GY->u128 = 0;
+            break;
+
         default:
             return 0;
     }
diff --git a/src/emu/x64runavxf30f.c b/src/emu/x64runavxf30f.c
index 73180d0a..98fb8b4d 100644
--- a/src/emu/x64runavxf30f.c
+++ b/src/emu/x64runavxf30f.c
@@ -68,7 +68,6 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GX->ud[1] = VX->ud[1];
                 GX->q[1] = VX->q[1];
             } else {
-                // EX is not a register (reg to reg only move 31:0)
                 GX->ud[1] = GX->ud[2] = GX->ud[3] = 0;
             }
             GETGY;
@@ -152,6 +151,46 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             } // no ymm raz here it seems
             break;
 
+        case 0xC2:  /* VCMPSS Gx, Vx, Ex, Ib */
+            nextop = F8;
+            GETEX(1);
+            GETGX;
+            GETVX;
+            GETGY;
+            tmp8u = F8;
+            tmp8s = 0;
+            switch(tmp8u&7) {
+                case 0: tmp8s=(VX->f[0] == EX->f[0]); break;
+                case 1: tmp8s=isless(VX->f[0], EX->f[0]) && !(isnan(VX->f[0]) || isnan(EX->f[0])); break;
+                case 2: tmp8s=islessequal(VX->f[0], EX->f[0]) && !(isnan(VX->f[0]) || isnan(EX->f[0])); break;
+                case 3: tmp8s=isnan(VX->f[0]) || isnan(EX->f[0]); break;
+                case 4: tmp8s=isnan(VX->f[0]) || isnan(EX->f[0]) || (VX->f[0] != EX->f[0]); break;
+                case 5: tmp8s=isnan(VX->f[0]) || isnan(EX->f[0]) || isgreaterequal(VX->f[0], EX->f[0]); break;
+                case 6: tmp8s=isnan(VX->f[0]) || isnan(EX->f[0]) || isgreater(VX->f[0], EX->f[0]); break;
+                case 7: tmp8s=!isnan(VX->f[0]) && !isnan(EX->f[0]); break;
+            }
+            GX->ud[0]=(tmp8s)?0xffffffff:0;
+            if(GX!=VX) {
+                GX->ud[1] = VX->ud[1];
+                GX->q[1] = VX->q[1];
+            }
+            GY->u128 = 0;
+            break;
+
+        case 0xE6:  /* VCVTDQ2PD Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            if(vex.l) {
+                GY->d[1] = EX->sd[3];
+                GY->d[0] = EX->sd[2];
+            } else
+                GY->u128 = 0;
+            GX->d[1] = EX->sd[1];
+            GX->d[0] = EX->sd[0];
+            break;
+
         default:
             return 0;
     }
diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c
index 2ed0e3a6..7c54a379 100644
--- a/src/tools/my_cpuid.c
+++ b/src/tools/my_cpuid.c
@@ -325,6 +325,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                         //1<<3 |  // BMI1 
                         box64_avx2<<5 |  //AVX2
                         //1<<8 | //BMI2
+                        box64_avx2<<9 | //VAES
                         1<<29|  // SHA extension
                         0;
             } else {R_EAX = R_ECX = R_EBX = R_EDX = 0;}
diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c
index 51d7557f..e547d523 100644
--- a/src/wrapped/wrappedlibc.c
+++ b/src/wrapped/wrappedlibc.c
@@ -1639,7 +1639,7 @@ void CreateCPUInfoFile(int fd)
         P;
         sprintf(buff, "bogomips\t: %g\n", getBogoMips());
         P;
-        sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1%s%s lzcnt popcnt%s\n", box64_sse42?" sse4_2":"", box64_avx?" avx":"", box64_avx2?" avx2":"");
+        sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1%s%s lzcnt popcnt%s%s\n", box64_sse42?" sse4_2":"", box64_avx?" avx":"", box64_avx2?" avx2":"", box64_avx2?" vaes":"");
         P;
         sprintf(buff, "address sizes\t: 48 bits physical, 48 bits virtual\n");
         P;