about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/emu/x64run.c4
-rw-r--r--src/emu/x64run0f.c37
-rw-r--r--src/emu/x64run64.c68
-rw-r--r--src/emu/x64run66.c4
-rw-r--r--src/emu/x64run660f.c106
-rw-r--r--src/emu/x64run6664.c4
-rw-r--r--src/emu/x64run66f0.c17
-rw-r--r--src/emu/x64run67.c10
-rw-r--r--src/emu/x64run670f.c71
-rwxr-xr-xsrc/emu/x64run_private.h2
-rw-r--r--src/emu/x64rund8.c56
-rw-r--r--src/emu/x64rund9.c21
-rw-r--r--src/emu/x64rundb.c12
-rw-r--r--src/emu/x64rundd.c25
-rw-r--r--src/emu/x64rundf.c6
-rw-r--r--src/emu/x64runf0.c21
-rw-r--r--src/emu/x64runf20f.c6
-rw-r--r--src/emu/x64runf30f.c6
18 files changed, 217 insertions, 259 deletions
diff --git a/src/emu/x64run.c b/src/emu/x64run.c
index 68985012..032614da 100755
--- a/src/emu/x64run.c
+++ b/src/emu/x64run.c
@@ -168,7 +168,7 @@ x64emurun:
         GO(0x30, xor)                   /* XOR 0x30 -> 0x35 */
         #undef GO
 
-	case 0x2E:	    /* segments are ignored */
+	    case 0x2E:	    /* segments are ignored */
         case 0x36:          /* SS: (ignored) */
             break;
 
@@ -1414,10 +1414,12 @@ x64emurun:
                     case 4:                 /* MUL EAX,Ed */
                         mul32_eax(emu, ED->dword[0]);
                         emu->regs[_AX].dword[1] = 0;
+                        emu->regs[_DX].dword[1] = 0;
                         break;
                     case 5:                 /* IMUL EAX,Ed */
                         imul32_eax(emu, ED->dword[0]);
                         emu->regs[_AX].dword[1] = 0;
+                        emu->regs[_DX].dword[1] = 0;
                         break;
                     case 6:                 /* DIV Ed */
                         div32(emu, ED->dword[0]);
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index a9adc844..05c9aab6 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -37,6 +37,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
     int32_t tmp32s, tmp32s2;

     uint32_t tmp32u, tmp32u2;

     uint64_t tmp64u, tmp64u2;

+    int64_t tmp64s;

     reg64_t *oped, *opgd;

     sse_regs_t *opex, *opgx, eax1;

     mmx87_regs_t *opem, *opgm, eam1;

@@ -837,12 +838,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             nextop = F8;

             GETED(0);

             GETGD;

-            tmp32s = GD->sdword[0];

-            tmp8u=tmp32s&(rex.w?63:31);

-            tmp32s >>= (rex.w?6:5);

+            tmp64s = rex.w?GD->sq[0]:GD->sdword[0];

+            tmp8u=tmp64s&(rex.w?63:31);

+            tmp64s >>= (rex.w?6:5);

             if(!MODREG)

             {

-                ED=(reg64_t*)(((uintptr_t)(ED))+(tmp32s<<(rex.w?3:2)));

+                ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));

             }

             if(rex.w) {

                 if(ED->q[0] & (1LL<<tmp8u))

@@ -953,12 +954,14 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                 }

             } else {

                 cmp32(emu, R_EAX, ED->dword[0]);

-                R_RAX = ED->dword[0];   // to erase upper part of RAX

                 if(ACCESS_FLAG(F_ZF)) {

                     if(MODREG)

                         ED->q[0] = GD->dword[0];

                     else

                         ED->dword[0] = GD->dword[0];

+                    R_RAX = R_EAX;   // to erase upper part of RAX

+                } else {

+                    R_RAX = ED->dword[0];

                 }

             }

             break;

@@ -967,12 +970,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             nextop = F8;

             GETED(0);

             GETGD;

-            tmp32s = GD->sdword[0];

-            tmp8u=tmp32s&(rex.w?63:31);

-            tmp32s >>= (rex.w?6:5);

+            tmp64s = rex.w?GD->sq[0]:GD->sdword[0];

+            tmp8u=tmp64s&(rex.w?63:31);

+            tmp64s >>= (rex.w?6:5);

             if(!MODREG)

             {

-                ED=(reg64_t*)(((uintptr_t)(ED))+(tmp32s<<(rex.w?3:2)));

+                ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));

             }

             if(rex.w) {

                 if(ED->q[0] & (1LL<<tmp8u)) {

@@ -1097,12 +1100,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             nextop = F8;

             GETED(0);

             GETGD;

-            tmp32s = GD->sdword[0];

-            tmp8u=tmp32s&(rex.w?63:31);

-            tmp32s >>= (rex.w?6:5);

+            tmp64s = rex.w?GD->sq[0]:GD->sdword[0];

+            tmp8u=tmp64s&(rex.w?63:31);

+            tmp64s >>= (rex.w?6:5);

             if(!MODREG)

             {

-                ED=(reg64_t*)(((uintptr_t)(ED))+(tmp32s<<(rex.w?3:2)));

+                ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));

             }

             if(rex.w) {

                 if(ED->q[0] & (1LL<<tmp8u))

@@ -1256,14 +1259,14 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             break;

         case 0xC4:                      /* PINSRW Gm,Ew,Ib */

             nextop = F8;

-            GETED(0);

+            GETED(1);

             GETGM;

             tmp8u = F8;

             GM->uw[tmp8u&3] = ED->word[0];   // only low 16bits

             break;

         case 0xC5:                       /* PEXTRW Gw,Em,Ib */

             nextop = F8;

-            GETEM(0);

+            GETEM(1);

             GETGD;

             tmp8u = F8;

             GD->q[0] = EM->uw[tmp8u&3];  // 16bits extract, 0 extended

@@ -1378,7 +1381,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             nextop = F8;

             GETEM(0);

             GETGD;

-            GD->dword[0] = 0;

+            GD->q[0] = 0;

             for (int i=0; i<8; ++i)

                 if(EM->ub[i]&0x80)

                     GD->dword[0] |= (1<<i);

@@ -1457,7 +1460,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             GETEM(0);

             GETGM;

             if(EM->q>15)

-                tmp8u = 16;

+                tmp8u = 15;

             else

                 tmp8u = EM->ub[0];

             for(int i=0; i<4; ++i)

diff --git a/src/emu/x64run64.c b/src/emu/x64run64.c
index f1d5d9e0..fa1ea281 100644
--- a/src/emu/x64run64.c
+++ b/src/emu/x64run64.c
@@ -119,7 +119,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                             GETEX_OFFS(0, tlsdata);

                             GETGX;

                             GX->q[0] = EX->q[0];

-                            if((nextop&0xC0)!=0xC0) {

+                            if(!MODREG) {

                                 // EX is not a register

                                 GX->q[1] = 0;

                             }

@@ -129,7 +129,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                             GETEX_OFFS(0, tlsdata);

                             GETGX;

                             GX->ud[0] = EX->ud[0];

-                            if((nextop&0xC0)!=0xC0) {

+                            if(!MODREG) {

                                 // EX is not a register (reg to reg only move 31:0)

                                 GX->ud[1] = GX->ud[2] = GX->ud[3] = 0;

                             }

@@ -163,7 +163,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                     }

                     break;

 

-                case 0x29:                      /* MOVAPS Ex,Gx */

+                case 0x29:                      /* MOVAPS FS:Ex,Gx */

                     switch(rep) {

                         case 0:

                             nextop = F8;

@@ -179,7 +179,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
 

                 case 0x59:

                     switch(rep) {

-                        case 2: /* MULSS Gx, Ex */

+                        case 2: /* MULSS Gx, FS:Ex */

                             nextop = F8;

                             GETEX_OFFS(0, tlsdata);

                             GETGX;

@@ -192,7 +192,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                     break;

                 case 0x5A:

                     switch(rep) {

-                        case 2:  /* CVTSS2SD Gx, Ex */

+                        case 2:  /* CVTSS2SD Gx, FS:Ex */

                             nextop = F8;

                             GETEX_OFFS(0, tlsdata);

                             GETGX;

@@ -206,7 +206,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
 

                 case 0x6F:

                     switch(rep) {

-                        case 2: /* MOVDQU Gx, Ex */

+                        case 2: /* MOVDQU Gx, FS:Ex */

                             nextop = F8;

                             GETEX_OFFS(0, tlsdata);

                             GETGX;

@@ -218,21 +218,33 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                     }

                     break;

 

-                case 0xAF:                      /* IMUL Gd,Ed */

-                    nextop = F8;

-                    GETED_OFFS(0, tlsdata);

-                    GETGD;

-                    if(rex.w)

-                        GD->q[0] = imul64(emu, GD->q[0], ED->q[0]);

-                    else

-                        GD->q[0] = imul32(emu, GD->dword[0], ED->dword[0]);

+                case 0xAF:

+                    switch(rep) {

+                        case 0: /* IMUL Gd, FS:Ed */

+                            nextop = F8;

+                            GETED_OFFS(0, tlsdata);

+                            GETGD;

+                            if(rex.w)

+                                GD->q[0] = imul64(emu, GD->q[0], ED->q[0]);

+                            else

+                                GD->q[0] = imul32(emu, GD->dword[0], ED->dword[0]);

+                            break;

+                        default:

+                            return 0;

+                    }

                     break;

 

-                case 0xB6:                      /* MOVZX Gd,Eb */

-                    nextop = F8;

-                    GETEB_OFFS(0, tlsdata);

-                    GETGD;

-                    GD->q[0] = EB->byte[0];

+                case 0xB6:

+                    switch(rep) {

+                        case 0: /* MOVZX Gd, FS:Eb */

+                            nextop = F8;

+                            GETEB_OFFS(0, tlsdata);

+                            GETGD;

+                            GD->q[0] = EB->byte[0];

+                            break;

+                        default:

+                            return 0;

+                    }

                     break;

 

                 default:

@@ -267,7 +279,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
             break;

 

 

-        case 0x63:                      /* MOVSXD Gd,Ed */

+        case 0x63:                      /* MOVSXD Gd, FS:Ed */

             nextop = F8;

             GETED_OFFS(0, tlsdata);

             GETGD;

@@ -281,7 +293,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
             break;

 

         case 0x66:

-            return Run6664(emu, rex, addr);

+            return Run6664(emu, rex, seg, addr);

 

         case 0x80:                      /* GRP Eb,Ib */

             nextop = F8;

@@ -308,7 +320,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                 tmp32s = F8S;

             }

             if(rex.w) {

-                tmp64u = (uint64_t)tmp32s;

+                tmp64u = (uint64_t)(int64_t)tmp32s;

                 switch((nextop>>3)&7) {

                     case 0: ED->q[0] = add64(emu, ED->q[0], tmp64u); break;

                     case 1: ED->q[0] =  or64(emu, ED->q[0], tmp64u); break;

@@ -346,13 +358,13 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
             }

             break;

 

-        case 0x88:                      /* MOV Eb,Gb */

+        case 0x88:                      /* MOV FS:Eb,Gb */

             nextop = F8;

             GETEB_OFFS(0, tlsdata);

             GETGB;

             EB->byte[0] = GB;

             break;

-        case 0x89:                    /* MOV Ed,Gd */

+        case 0x89:                    /* MOV FS:Ed,Gd */

             nextop = F8;

             GETED_OFFS(0, tlsdata);

             GETGD;

@@ -366,13 +378,13 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                     ED->dword[0] = GD->dword[0];

             }

             break;

-        case 0x8A:                      /* MOV Gb,Eb */

+        case 0x8A:                      /* MOV Gb, FS:Eb */

             nextop = F8;

             GETEB_OFFS(0, tlsdata);

             GETGB;

             GB = EB->byte[0];

             break;

-        case 0x8B:                      /* MOV Gd,Ed */

+        case 0x8B:                      /* MOV Gd, FS:Ed */

             nextop = F8;

             GETED_OFFS(0, tlsdata);

             GETGD;

@@ -382,12 +394,12 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                 GD->q[0] = ED->dword[0];

             break;

 

-        case 0xC6:                      /* MOV Eb,Ib */

+        case 0xC6:                      /* MOV FS:Eb, Ib */

             nextop = F8;

             GETEB_OFFS(1, tlsdata);

             EB->byte[0] = F8;

             break;

-        case 0xC7:                      /* MOV Ed,Id */

+        case 0xC7:                      /* MOV FS:Ed, Id */

             nextop = F8;

             GETED_OFFS(4, tlsdata);

             if(rex.w)

diff --git a/src/emu/x64run66.c b/src/emu/x64run66.c
index f7e0d93e..f7696ee1 100644
--- a/src/emu/x64run66.c
+++ b/src/emu/x64run66.c
@@ -133,7 +133,9 @@ uintptr_t Run66(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
         break;

 

     case 0x64:                              /* FS: */

-        return Run6664(emu, rex, addr);

+        return Run6664(emu, rex, _FS, addr);

+    case 0x65:                              /* GS: */

+        return Run6664(emu, rex, _GS, addr);

 

     case 0x69:                      /* IMUL Gw,Ew,Iw */

         nextop = F8;

diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c
index fbfca35d..57440997 100644
--- a/src/emu/x64run660f.c
+++ b/src/emu/x64run660f.c
@@ -54,6 +54,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
     int32_t tmp32s;

     uint32_t tmp32u;

     uint64_t tmp64u;

+    int64_t tmp64s;

     float tmpf;

     #ifndef NOALIGN

     int is_nan;

@@ -199,8 +200,8 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         GETGM;

         switch(emu->mxcsr.f.MXCSR_RC) {

             case ROUND_Nearest:

-                GM->sd[0] = floor(EX->d[0]+0.5);

-                GM->sd[1] = floor(EX->d[1]+0.5);

+                GM->sd[0] = nearbyint(EX->d[0]);

+                GM->sd[1] = nearbyint(EX->d[1]);

                 break;

             case ROUND_Down:

                 GM->sd[0] = floor(EX->d[0]);

@@ -673,13 +674,13 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                     tmp8u &= 3;

                 switch(tmp8u) {

                     case ROUND_Nearest:

-                        GX->f[0] = floor(EX->f[0]+0.5);

+                        GX->f[0] = nearbyintf(EX->f[0]);

                         break;

                     case ROUND_Down:

-                        GX->f[0] = floor(EX->f[0]);

+                        GX->f[0] = floorf(EX->f[0]);

                         break;

                     case ROUND_Up:

-                        GX->f[0] = ceil(EX->f[0]);

+                        GX->f[0] = ceilf(EX->f[0]);

                         break;

                     case ROUND_Chop:

                         GX->f[0] = EX->f[0];

@@ -697,7 +698,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                     tmp8u &= 3;

                 switch(tmp8u) {

                     case ROUND_Nearest:

-                        GX->d[0] = floor(EX->d[0]+0.5);

+                        GX->d[0] = nearbyint(EX->d[0]);

                         break;

                     case ROUND_Down:

                         GX->d[0] = floor(EX->d[0]);

@@ -762,12 +763,12 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 GETGX;

                 tmp8u = F8;

                 ED->dword[0] = GX->ud[tmp8u&3];

-                if(MODREG && rex.w) ED->dword[1] = 0;

+                if(MODREG) ED->dword[1] = 0;

                 break;

 

             case 0x20:      // PINSRB GX, ED, u8

                 nextop = F8;

-                GETED(1);

+                GETED(1);   // It's ED, and not EB

                 GETGX;

                 tmp8u = F8;

                 GX->ub[tmp8u&0xf] = ED->byte[0];

@@ -865,7 +866,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEX(0);

         GETGD;

-        GD->dword[0] = 0;

+        GD->q[0] = 0;

         for(int i=0; i<2; ++i)

             GD->dword[0] |= ((EX->q[i]>>63)&1)<<i;

         break;

@@ -952,23 +953,23 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         GETEX(0);

         GETGX;

         for(int i=0; i<4; ++i)

-            if(isnanf(EX->f[i]) || isinff(EX->f[i]) || EX->f[i]>0x7fffffff)

+            if(isnanf(EX->f[i]) || isinff(EX->f[i]) || EX->f[i]>0x7fffffff || EX->f[i]<-0x80000000)

                 GX->sd[i] = 0x80000000;

-        else

-            switch(emu->mxcsr.f.MXCSR_RC) {

-                case ROUND_Nearest:

-                    GX->sd[i] = nearbyintf(EX->f[i]);

-                    break;

-                case ROUND_Down:

-                    GX->sd[i] = floorf(EX->f[i]);

-                    break;

-                case ROUND_Up:

-                    GX->sd[i] = ceilf(EX->f[i]);

-                    break;

-                case ROUND_Chop:

-                    GX->sd[i] = EX->f[i];

-                    break;

-            }

+            else

+                switch(emu->mxcsr.f.MXCSR_RC) {

+                    case ROUND_Nearest:

+                        GX->sd[i] = nearbyintf(EX->f[i]);

+                        break;

+                    case ROUND_Down:

+                        GX->sd[i] = floorf(EX->f[i]);

+                        break;

+                    case ROUND_Up:

+                        GX->sd[i] = ceilf(EX->f[i]);

+                        break;

+                    case ROUND_Chop:

+                        GX->sd[i] = EX->f[i];

+                        break;

+                }

         break;

     case 0x5C:                      /* SUBPD Gx, Ex */

         nextop = F8;

@@ -1206,6 +1207,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 break;

             case 4:                 /* PSRAW Ex, Ib */

                 tmp8u = F8;

+                if(tmp8u>15) tmp8u=15;

                 for (int i=0; i<8; ++i) EX->sw[i] >>= tmp8u;

                 break;

             case 6:                 /* PSLLW Ex, Ib */

@@ -1232,10 +1234,8 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 break;

             case 4:                 /* PSRAD Ex, Ib */

                 tmp8u = F8;

-                if(tmp8u>31) {

-                    for (int i=0; i<4; ++i) EX->sd[i] = (EX->sd[i]<0)?-1:0;

-                } else

-                    for (int i=0; i<4; ++i) EX->sd[i] >>= tmp8u;

+                if(tmp8u>31) tmp8u=31;

+                for (int i=0; i<4; ++i) EX->sd[i] >>= tmp8u;

                 break;

             case 6:                 /* PSLLD Ex, Ib */

                 tmp8u = F8;

@@ -1537,13 +1537,19 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEB(0);

         GETGW;

-        GW->word[0] = EB->byte[0];

+        if(rex.w)

+            GW->q[0] = EB->byte[0];

+        else

+            GW->word[0] = EB->byte[0];

         break;

     case 0xB7:                      /* MOVZX Gw,Ew */

         nextop = F8;

         GETEW(0);

         GETGW;

-        GW->word[0] = EW->word[0];

+        if(rex.w)

+            GW->q[0] = EW->word[0];

+        else

+            GW->word[0] = EW->word[0];

         break;

 

     case 0xBA:                      

@@ -1551,7 +1557,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         switch((nextop>>3)&7) {

             case 4:                 /* BT Ew,Ib */

                 CHECK_FLAGS(emu);

-                GETEW(0);

+                GETEW(1);

                 GETGW;

                 tmp8u = F8;

                 if(rex.w) {

@@ -1570,7 +1576,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 break;

             case 5:             /* BTS Ew, Ib */

                 CHECK_FLAGS(emu);

-                GETEW(0);

+                GETEW(1);

                 GETGW;

                 tmp8u = F8;

                 if(rex.w) {

@@ -1593,7 +1599,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 break;

             case 6:             /* BTR Ew, Ib */

                 CHECK_FLAGS(emu);

-                GETEW(0);

+                GETEW(1);

                 GETGW;

                 tmp8u = F8;

                 if(rex.w) {

@@ -1614,7 +1620,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 break;

             case 7:             /* BTC Ew, Ib */

                 CHECK_FLAGS(emu);

-                GETEW(0);

+                GETEW(1);

                 GETGW;

                 tmp8u = F8;

                 if(rex.w) {

@@ -1642,12 +1648,12 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEW(0);

         GETGW;

-        tmp32s = rex.w?GW->sdword[0]:GW->sword[0];

-        tmp8u=tmp32s&(rex.w?63:15);

-        tmp32s >>= (rex.w?6:4);

+        tmp64s = rex.w?GW->sq[0]:GW->sword[0];

+        tmp8u=tmp64s&(rex.w?63:15);

+        tmp64s >>= (rex.w?6:4);

         if(!MODREG)

         {

-            EW=(reg64_t*)(((uintptr_t)(EW))+(tmp32s<<(rex.w?3:1)));

+            EW=(reg64_t*)(((uintptr_t)(EW))+(tmp64s<<(rex.w?3:1)));

         }

         if(rex.w) {

             if(EW->q[0] & (1LL<<tmp8u))

@@ -1774,7 +1780,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         GETEX(1);

         GETGD;

         tmp8u = F8;

-        GD->dword[0] = EX->uw[tmp8u&7];  // 16bits extract, 0 extended

+        GD->q[0] = EX->uw[tmp8u&7];  // 16bits extract, 0 extended

         break;

     case 0xC6:  /* SHUFPD Gx, Ex, Ib */

         nextop = F8;

@@ -1947,7 +1953,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEX(0);

         GETGX;

-        tmp8u=(EX->q[0]>15)?16:EX->ub[0];

+        tmp8u=(EX->q[0]>15)?15:EX->ub[0];

         for (int i=0; i<8; ++i) 

             GX->sw[i] >>= tmp8u;

         break;

@@ -1955,13 +1961,9 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEX(0);

         GETGX;

-        tmp8u=(EX->q[0]>31)?32:EX->ub[0];

-        if(tmp8u>31) 

-            for (int i=0; i<4; ++i)

-                GX->sd[i] = (GX->sd[i]<0)?-1:0;

-        else

-            for (int i=0; i<4; ++i)

-                GX->sd[i] >>= tmp8u;

+        tmp8u=(EX->q[0]>31)?31:EX->ub[0];

+        for (int i=0; i<4; ++i)

+            GX->sd[i] >>= tmp8u;

         break;

     case 0xE3:  /* PAVGW Gx, Ex */

         nextop = F8;

@@ -2038,10 +2040,8 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEX(0);

         GETGX;

-        GX->ud[0] |= EX->ud[0];

-        GX->ud[1] |= EX->ud[1];

-        GX->ud[2] |= EX->ud[2];

-        GX->ud[3] |= EX->ud[3];

+        GX->q[0] |= EX->q[0];

+        GX->q[1] |= EX->q[1];

         break;

     case 0xEC:  /* PADDSB Gx,Ex */

         nextop = F8;

@@ -2101,7 +2101,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         if(EX->q[0]>63)

             {GX->q[0] = GX->q[1] = 0;}

         else 

-            {tmp8u=EX->q[0]; for (int i=0; i<2; ++i) GX->q[i] <<= tmp8u;}

+            {tmp8u=EX->ub[0]; for (int i=0; i<2; ++i) GX->q[i] <<= tmp8u;}

         break;

     case 0xF4:  /* PMULUDQ Gx,Ex */

         nextop = F8;

diff --git a/src/emu/x64run6664.c b/src/emu/x64run6664.c
index 4a4b8a37..9753dfce 100644
--- a/src/emu/x64run6664.c
+++ b/src/emu/x64run6664.c
@@ -22,13 +22,13 @@
 

 #include "modrm.h"

 

-uintptr_t Run6664(x64emu_t *emu, rex_t rex, uintptr_t addr)

+uintptr_t Run6664(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)

 {

     uint8_t opcode;

     uint8_t nextop;

     reg64_t *oped, *opgd;

     sse_regs_t *opex, *opgx;

-    uintptr_t tlsdata = GetFSBaseEmu(emu);

+    uintptr_t tlsdata = GetSegmentBaseEmu(emu, seg);

 

     opcode = F8;

     // REX prefix before the F0 are ignored

diff --git a/src/emu/x64run66f0.c b/src/emu/x64run66f0.c
index 623ad8d2..7bf48a47 100644
--- a/src/emu/x64run66f0.c
+++ b/src/emu/x64run66f0.c
@@ -133,16 +133,14 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
             GETGW;                                                  \
             if(rex.w) {                                             \
                 do {                                                \
-                    tmp64u = native_lock_read_dd(ED);                \
+                    tmp64u = native_lock_read_dd(ED);               \
                     tmp64u = OP##64(emu, tmp64u, GD->q[0]);         \
-                } while (native_lock_write_dd(ED, tmp64u));          \
+                } while (native_lock_write_dd(ED, tmp64u));         \
             } else {                                                \
                 do {                                                \
-                    tmp16u = native_lock_read_h(ED);                 \
+                    tmp16u = native_lock_read_h(ED);                \
                     tmp16u = OP##16(emu, tmp16u, GW->word[0]);      \
-                } while (native_lock_write_d(ED, tmp16u));           \
-                if(MODREG)                                          \
-                    EW->word[1] = 0;                                \
+                } while (native_lock_write_h(ED, tmp16u));          \
             }                                                       \
             break;                                                  \
         case B+3:                                                   \
@@ -170,10 +168,7 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
             if(rex.w)                                               \
                 ED->q[0] = OP##64(emu, ED->q[0], GD->q[0]);         \
             else                                                    \
-                if(MODREG)                                          \
-                    ED->q[0] = OP##32(emu, ED->dword[0], GD->dword[0]);     \
-                else                                                        \
-                    EW->word[0] = OP##16(emu, EW->word[0], GW->word[0]);    \
+                EW->word[0] = OP##16(emu, EW->word[0], GW->word[0]);\
             pthread_mutex_unlock(&emu->context->mutex_lock);        \
             break;                                                  \
         case B+3:                                                   \
@@ -278,7 +273,7 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
                                 tmp16u &=~0xff;
                                 tmp16u |= native_lock_read_b(ED);
                                 tmp16u = inc16(emu, tmp16u);
-                        } while(native_lock_write_b(ED, tmp16u&0xff));
+                            } while(native_lock_write_b(ED, tmp16u&0xff));
                             ED->word[0] = tmp16u;
                         } else {
                             do {
diff --git a/src/emu/x64run67.c b/src/emu/x64run67.c
index caa65c19..034fd640 100644
--- a/src/emu/x64run67.c
+++ b/src/emu/x64run67.c
@@ -394,20 +394,22 @@ uintptr_t Run67(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
                 case 4:                 /* MUL EAX,Ed */

                     mul32_eax(emu, ED->dword[0]);

                     emu->regs[_AX].dword[1] = 0;

+                    emu->regs[_DX].dword[1] = 0;

                     break;

                 case 5:                 /* IMUL EAX,Ed */

                     imul32_eax(emu, ED->dword[0]);

                     emu->regs[_AX].dword[1] = 0;

+                    emu->regs[_DX].dword[1] = 0;

                     break;

                 case 6:                 /* DIV Ed */

                     div32(emu, ED->dword[0]);

-                    emu->regs[_AX].dword[1] = 0;

-                    emu->regs[_DX].dword[1] = 0;

+                    //emu->regs[_AX].dword[1] = 0;

+                    //emu->regs[_DX].dword[1] = 0;

                     break;

                 case 7:                 /* IDIV Ed */

                     idiv32(emu, ED->dword[0]);

-                    emu->regs[_AX].dword[1] = 0;

-                    emu->regs[_DX].dword[1] = 0;

+                    //emu->regs[_AX].dword[1] = 0;

+                    //emu->regs[_DX].dword[1] = 0;

                     break;

             }

         }

diff --git a/src/emu/x64run670f.c b/src/emu/x64run670f.c
index c576c501..4125a82d 100644
--- a/src/emu/x64run670f.c
+++ b/src/emu/x64run670f.c
@@ -43,40 +43,55 @@ uintptr_t Run670F(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
 
     switch(opcode) {
 
-        case 0x2E:                      /* UCOMISS Gx, Ex */
+        case 0x2E:
             // same for now
-        case 0x2F:                      /* COMISS Gx, Ex */
-            if(rep) {
-                return 0;
+        case 0x2F:                      
+            switch(rep) {
+                case 0: /* (U)COMISS Gx, Ex */
+                    RESET_FLAGS(emu);
+                    nextop = F8;
+                    GETEX32(0);
+                    GETGX;
+                    if(isnan(GX->f[0]) || isnan(EX->f[0])) {
+                        SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF);
+                    } else if(isgreater(GX->f[0], EX->f[0])) {
+                        CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
+                    } else if(isless(GX->f[0], EX->f[0])) {
+                        CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF);
+                    } else {
+                        SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
+                    }
+                    CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF);
+                    break;
+                default:
+                    return 0;
             }
-            RESET_FLAGS(emu);
-            nextop = F8;
-            GETEX32(0);
-            GETGX;
-            if(isnan(GX->f[0]) || isnan(EX->f[0])) {
-                SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF);
-            } else if(isgreater(GX->f[0], EX->f[0])) {
-                CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
-            } else if(isless(GX->f[0], EX->f[0])) {
-                CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF);
-            } else {
-                SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
-            }
-            CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF);
             break;
 
-        case 0x6F:                      /* MOVQ Gm, Em */
-            nextop = F8;
-            GETEM32(0);
-            GETGM;
-            GM->q = EM->q;
+        case 0x6F:
+            switch(rep) {
+                case 0: /* MOVQ Gm, Em */
+                    nextop = F8;
+                    GETEM32(0);
+                    GETGM;
+                    GM->q = EM->q;
+                    break;
+                default:
+                    return 0;
+            }
             break;
 
-        case 0x7F:                      /* MOVQ Em, Gm */
-            nextop = F8;
-            GETEM32(0);
-            GETGM;
-            EM->q = GM->q;
+        case 0x7F:
+            switch(rep) {
+                case 0: /* MOVQ Em, Gm */ 
+                    nextop = F8;
+                    GETEM32(0);
+                    GETGM;
+                    EM->q = GM->q;
+                    break;
+                default:
+                    return 0;
+            }
             break;
 
     default:
diff --git a/src/emu/x64run_private.h b/src/emu/x64run_private.h
index 9288593b..c4704a77 100755
--- a/src/emu/x64run_private.h
+++ b/src/emu/x64run_private.h
@@ -68,7 +68,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step);
 uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr);
 uintptr_t Run66(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr);
 uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr);
-uintptr_t Run6664(x64emu_t *emu, rex_t rex, uintptr_t addr);
+uintptr_t Run6664(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr);
 uintptr_t Run66D9(x64emu_t *emu, rex_t rex, uintptr_t addr);
 uintptr_t Run66DD(x64emu_t *emu, rex_t rex, uintptr_t addr);
 uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr);
diff --git a/src/emu/x64rund8.c b/src/emu/x64rund8.c
index 54160094..1f851bb5 100644
--- a/src/emu/x64rund8.c
+++ b/src/emu/x64rund8.c
@@ -116,76 +116,36 @@ uintptr_t RunD8(x64emu_t *emu, rex_t rex, uintptr_t addr)
         switch((nextop>>3)&7) {

             case 0:         /* FADD ST0, float */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    ST0.d += *(float*)ED;

-                else {

-                    memcpy(&f, ED, sizeof(float));

-                    ST0.d += f;

-                }

+                ST0.d += *(float*)ED;

                 break;

             case 1:         /* FMUL ST0, float */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    ST0.d *= *(float*)ED;

-                else {

-                    memcpy(&f, ED, sizeof(float));

-                    ST0.d *= f;

-                }

+                ST0.d *= *(float*)ED;

                 break;

             case 2:      /* FCOM ST0, float */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    fpu_fcom(emu, *(float*)ED);

-                else {

-                    memcpy(&f, ED, sizeof(float));

-                    fpu_fcom(emu, f);

-                }

+                fpu_fcom(emu, *(float*)ED);

                 break;

             case 3:     /* FCOMP */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    fpu_fcom(emu, *(float*)ED);

-                else {

-                    memcpy(&f, ED, sizeof(float));

-                    fpu_fcom(emu, f);

-                }

+                fpu_fcom(emu, *(float*)ED);

                 fpu_do_pop(emu);

                 break;

             case 4:         /* FSUB ST0, float */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    ST0.d -= *(float*)ED;

-                else {

-                    memcpy(&f, ED, sizeof(float));

-                    ST0.d -= f;

-                }

+                ST0.d -= *(float*)ED;

                 break;

             case 5:         /* FSUBR ST0, float */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    ST0.d = *(float*)ED - ST0.d;

-                else {

-                    memcpy(&f, ED, sizeof(float));

-                    ST0.d = f - ST0.d;

-                }

+                ST0.d = *(float*)ED - ST0.d;

                 break;

             case 6:         /* FDIV ST0, float */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    ST0.d /= *(float*)ED;

-                else {

-                    memcpy(&f, ED, sizeof(float));

-                    ST0.d /= f;

-                }

+                ST0.d /= *(float*)ED;

                 break;

             case 7:         /* FDIVR ST0, float */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    ST0.d = *(float*)ED / ST0.d;

-                else {

-                    memcpy(&f, ED, sizeof(float));

-                    ST0.d = f / ST0.d;

-                }

+                ST0.d = *(float*)ED / ST0.d;

                 break;

             default:

                 return 0;

diff --git a/src/emu/x64rund9.c b/src/emu/x64rund9.c
index 301f2852..e50f5bf5 100644
--- a/src/emu/x64rund9.c
+++ b/src/emu/x64rund9.c
@@ -232,30 +232,15 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr)
             case 0:     /* FLD ST0, Ed float */

                 GETED(0);

                 fpu_do_push(emu);

-                if(!(((uintptr_t)ED)&3))

-                    ST0.d = *(float*)ED;

-                else {

-                    memcpy(&f, ED, sizeof(float));

-                    ST0.d = f;

-                }

+                ST0.d = *(float*)ED;

                 break;

             case 2:     /* FST Ed, ST0 */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    *(float*)ED = ST0.d;

-                else {

-                    f = ST0.d;

-                    memcpy(ED, &f, sizeof(float));

-                }

+                *(float*)ED = ST0.d;

                 break;

             case 3:     /* FSTP Ed, ST0 */

                 GETED(0);

-                if(!(((uintptr_t)ED)&3))

-                    *(float*)ED = ST0.d;

-                else {

-                    f = ST0.d;

-                    memcpy(ED, &f, sizeof(float));

-                }

+                *(float*)ED = ST0.d;

                 fpu_do_pop(emu);

                 break;

             case 4:     /* FLDENV m */

diff --git a/src/emu/x64rundb.c b/src/emu/x64rundb.c
index 27d847dd..6581b727 100644
--- a/src/emu/x64rundb.c
+++ b/src/emu/x64rundb.c
@@ -133,15 +133,15 @@ uintptr_t RunDB(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 break;

             case 1: /* FISTTP Ed, ST0 */

                 GETED(0);

-                tmp32s = ST0.d; // TODO: Handling of FPU Exception

-                if(tmp32s==0x7fffffff && isgreater(ST0.d, (double)(int32_t)0x7fffffff))

-                    tmp32s = 0x80000000;

+                if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, -(double)(int32_t)0x7fffffff) || !isfinite(ST0.d))

+                    ED->sdword[0] = 0x80000000;

+                else

+                    ED->sdword[0] = ST0.d;

                 fpu_do_pop(emu);

-                ED->sdword[0] = tmp32s;

                 break;

             case 2: /* FIST Ed, ST0 */

                 GETED(0);

-                if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, -(double)(int32_t)0x7fffffff) || !isfinite(ST0.d))

+                if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, -(double)(int32_t)0x80000000) || !isfinite(ST0.d))

                     ED->sdword[0] = 0x80000000;

                 else {

                     volatile int32_t tmp = fpu_round(emu, ST0.d);    // tmp to avoid BUS ERROR

@@ -150,7 +150,7 @@ uintptr_t RunDB(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 break;

             case 3: /* FISTP Ed, ST0 */

                 GETED(0);

-                if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, -(double)(int32_t)0x7fffffff) || !isfinite(ST0.d))

+                if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, -(double)(int32_t)0x80000000) || !isfinite(ST0.d))

                     ED->sdword[0] = 0x80000000;

                 else {

                     volatile int32_t tmp = fpu_round(emu, ST0.d);    // tmp to avoid BUS ERROR

diff --git a/src/emu/x64rundd.c b/src/emu/x64rundd.c
index 6b05adf9..abb18f77 100644
--- a/src/emu/x64rundd.c
+++ b/src/emu/x64rundd.c
@@ -114,37 +114,20 @@ uintptr_t RunDD(x64emu_t *emu, rex_t rex, uintptr_t addr)
             case 0: /* FLD double */

                 GETED(0);

                 fpu_do_push(emu);

-                if(!(((uintptr_t)ED)&7))

-                    ST0.d = *(double*)ED;

-                else {

-                    memcpy(&ST0.d, ED, sizeof(double));

-                }

+                ST0.d = *(double*)ED;

                 break;

             case 1: /* FISTTP ED qword */

                 GETED(0);

-                if(!(((uintptr_t)ED)&7))

-                    *(int64_t*)ED = ST0.d;

-                else {

-                    int64_t i64 = ST0.d;

-                    memcpy(ED, &i64, sizeof(int64_t));

-                }

+                *(int64_t*)ED = ST0.d;

                 fpu_do_pop(emu);

                 break;

             case 2: /* FST double */

                 GETED(0);

-                if(!(((uintptr_t)ED)&7))

-                    *(double*)ED = ST0.d;

-                else {

-                    memcpy(ED, &ST0.d, sizeof(double));

-                }

+                *(double*)ED = ST0.d;

                 break;

             case 3: /* FSTP double */

                 GETED(0);

-                if(!(((uintptr_t)ED)&7))

-                    *(double*)ED = ST0.d;

-                else {

-                    memcpy(ED, &ST0.d, sizeof(double));

-                }

+                *(double*)ED = ST0.d;

                 fpu_do_pop(emu);

                 break;

             case 4: /* FRSTOR m108byte */

diff --git a/src/emu/x64rundf.c b/src/emu/x64rundf.c
index dfe992c8..3b2b53e6 100644
--- a/src/emu/x64rundf.c
+++ b/src/emu/x64rundf.c
@@ -129,14 +129,14 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr)
             break;

         case 2: /* FIST Ew, ST0 */

             GETEW(0);

-            if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x7fff) || !isfinite(ST0.d))

+            if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x8000) || !isfinite(ST0.d))

                 EW->sword[0] = 0x8000;

             else

                 EW->sword[0] = fpu_round(emu, ST0.d);

             break;

         case 3: /* FISTP Ew, ST0 */

             GETEW(0);

-            if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x7fff) || !isfinite(ST0.d))

+            if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x8000) || !isfinite(ST0.d))

                 EW->sword[0] = 0x8000;

             else

                 EW->sword[0] = fpu_round(emu, ST0.d);

@@ -165,7 +165,7 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr)
             if(STll(0).sref==ST(0).sq)

                 ED->sq[0] = STll(0).sq;

             else {

-                if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, -(double)(int64_t)0x7fffffffffffffffLL) || !isfinite(ST0.d))

+                if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, -(double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d))

                     ED->sq[0] = 0x8000000000000000LL;

                 else

                     ED->sq[0] = fpu_round(emu, ST0.d);

diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c
index 00b524ac..13eed9a2 100644
--- a/src/emu/x64runf0.c
+++ b/src/emu/x64runf0.c
@@ -173,10 +173,12 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 nextop = F8;

                 GETED(0);

                 GETGD;

-                tmp8u = GD->byte[0];

+                tmp64s = rex.w?GD->sq[0]:GD->sdword[0];

+                tmp8u=tmp64s&(rex.w?63:31);

+                tmp64s >>= (rex.w?6:5);

                 if(!MODREG)

                 {

-                    ED=(reg64_t*)(((uint32_t*)(ED))+(tmp8u>>5));

+                    ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));

                 }

 #ifdef DYNAREC

                 if(rex.w) {

@@ -350,10 +352,12 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
                     nextop = F8;

                     GETED(0);

                     GETGD;

-                    tmp8u = GD->byte[0];

+                    tmp64s = rex.w?GD->sq[0]:GD->sdword[0];

+                    tmp8u=tmp64s&(rex.w?63:31);

+                    tmp64s >>= (rex.w?6:5);

                     if(!MODREG)

                     {

-                        ED=(reg64_t*)(((uint32_t*)(ED))+(tmp8u>>5));

+                        ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));

                     }

                     tmp8u&=rex.w?63:31;

 #ifdef DYNAREC

@@ -595,6 +599,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
                         tmp8u = native_lock_read_b(EB);

                         tmp8u2 = add8(emu, tmp8u, GB);

                     } while(native_lock_write_b(EB, tmp8u2));

+                    GB = tmp8u;

 #else

                     pthread_mutex_lock(&emu->context->mutex_lock);

                     tmp8u = add8(emu, EB->byte[0], GB);

@@ -858,7 +863,6 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 } while(native_lock_write_b(EB, GB));

                 GB = tmp8u;

             }

-            // dynarec use need it's own mecanism

 #else

             GETEB(0);

             GETGB;

@@ -890,10 +894,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 if(rex.w) {

                     GD->q[0] = native_lock_xchg(ED, GD->q[0]);

                 } else {

-                    do {

-                        tmp32u = native_lock_read_d(ED);

-                    } while(native_lock_write_d(ED, GD->dword[0]));

-                    GD->q[0] = tmp32u;

+                    GD->dword[0] = native_lock_xchg_d(ED, GD->dword[0]);

                 }

             }

 #else

@@ -999,7 +1000,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
                                 tmp32u &=~0xff;

                                 tmp32u |= native_lock_read_b(ED);

                                 tmp32u = inc32(emu, tmp32u);

-                        } while(native_lock_write_b(ED, tmp32u&0xff));

+                            } while(native_lock_write_b(ED, tmp32u&0xff));

                             ED->dword[0] = tmp32u;

                         } else {

                             do {

diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c
index 580478ac..a2c0e8e0 100644
--- a/src/emu/x64runf20f.c
+++ b/src/emu/x64runf20f.c
@@ -45,7 +45,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
         GETEX(0);

         GETGX;

         GX->q[0] = EX->q[0];

-        if((nextop&0xC0)!=0xC0) {

+        if(!MODREG) {

             // EX is not a register

             GX->q[1] = 0;

         }

@@ -314,8 +314,8 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
         GETGX;

         switch(emu->mxcsr.f.MXCSR_RC) {

             case ROUND_Nearest:

-                GX->sd[0] = floor(EX->d[0]+0.5);

-                GX->sd[1] = floor(EX->d[1]+0.5);

+                GX->sd[0] = nearbyint(EX->d[0]);

+                GX->sd[1] = nearbyint(EX->d[1]);

                 break;

             case ROUND_Down:

                 GX->sd[0] = floor(EX->d[0]);

diff --git a/src/emu/x64runf30f.c b/src/emu/x64runf30f.c
index 669ccd16..acfd7895 100644
--- a/src/emu/x64runf30f.c
+++ b/src/emu/x64runf30f.c
@@ -281,7 +281,6 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         if(rex.w) {

             tmp64u = ED->q[0];

             if(tmp64u) {

-                CLEAR_FLAG(F_ZF);

                 tmp8u = 0;

                 while(!(tmp64u&(1LL<<tmp8u))) ++tmp8u;

                 GD->q[0] = tmp8u;

@@ -295,16 +294,15 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         } else {

             tmp32u = ED->dword[0];

             if(tmp32u) {

-                CLEAR_FLAG(F_ZF);

                 tmp8u = 0;

                 while(!(tmp32u&(1<<tmp8u))) ++tmp8u;

-                GD->dword[0] = tmp8u;

+                GD->q[0] = tmp8u;

                 CONDITIONAL_SET_FLAG(tmp8u==0, F_ZF);

                 CLEAR_FLAG(F_CF);

             } else {

                 CLEAR_FLAG(F_ZF);

                 SET_FLAG(F_CF);

-                GD->dword[0] = 32;

+                GD->q[0] = 32;

             }

         }

         break;