about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-06-18 19:01:22 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-06-18 19:01:22 +0200
commit9698844cc3894f29e101fe9282141d4a5e27267f (patch)
tree048f1b94e5ec146ae5f651a2e83718fe1d9fe582 /src
parent9ca5e13d7f03d76bf06ad1baafb99e12cf053694 (diff)
downloadbox64-9698844cc3894f29e101fe9282141d4a5e27267f.tar.gz
box64-9698844cc3894f29e101fe9282141d4a5e27267f.zip
[32BITS] Various small fixes in 32bits interpreter
Diffstat (limited to 'src')
-rwxr-xr-xsrc/emu/x64run.c41
-rw-r--r--src/emu/x64run0f.c12
-rw-r--r--src/emu/x64run64.c33
-rw-r--r--src/emu/x64run66.c8
-rw-r--r--src/emu/x64run66f0.c215
-rw-r--r--src/emu/x64run67.c45
-rwxr-xr-xsrc/emu/x64run_private.c8
-rw-r--r--src/emu/x64runf0.c9
8 files changed, 166 insertions, 205 deletions
diff --git a/src/emu/x64run.c b/src/emu/x64run.c
index 09d28836..8e611920 100755
--- a/src/emu/x64run.c
+++ b/src/emu/x64run.c
@@ -304,13 +304,19 @@ x64emurun:
             nextop = F8;
             GETED(0);
             GETGD;
-            if(rex.w)
-                GD->sq[0] = ED->sdword[0];
-            else
-                if(MODREG)
-                    GD->q[0] = ED->dword[0];    // not really a sign extension
+            if(rex.is32bits) {
+                // ARPL here
+                // faking to always happy...
+                SET_FLAG(F_ZF);
+            } else {
+                if(rex.w)
+                    GD->sq[0] = ED->sdword[0];
                 else
-                    GD->sdword[0] = ED->sdword[0];  // meh?
+                    if(MODREG)
+                        GD->q[0] = ED->dword[0];    // not really a sign extension
+                    else
+                        GD->sdword[0] = ED->sdword[0];  // meh?
+            }
             break;
         case 0x64:                      /* FS: prefix */
             #ifdef TEST_INTERPRETER
@@ -413,7 +419,7 @@ x64emurun:
         case 0x6D:                      /* INSL DX */
         case 0x6E:                      /* OUTSB DX */
         case 0x6F:                      /* OUTSL DX */
-            // this is a privilege opcode...
+            // this is a privilege opcode in 64bits, but not in 32bits...
             #ifndef TEST_INTERPRETOR
             emit_signal(emu, SIGSEGV, (void*)R_RIP, 0);
             STEP;
@@ -633,7 +639,7 @@ x64emurun:
         case 0x8F:                      /* POP Ed */
             nextop = F8;
             if(MODREG) {
-                emu->regs[(nextop&7)+(rex.b<<3)].q[0] = Pop(emu);
+                emu->regs[(nextop&7)+(rex.b<<3)].q[0] = rex.is32bits?Pop32(emu):Pop(emu);
             } else {
                 if(rex.is32bits) {
                     tmp32u = Pop32(emu);  // this order allows handling POP [ESP] and variant
@@ -697,7 +703,7 @@ x64emurun:
                 Push(emu, emu->eflags.x64);
             break;
         case 0x9D:                      /* POPF */
-            emu->eflags.x64 = ((Pop(emu) & 0x3F7FD7)/* & (0xffff-40)*/ ) | 0x2; // mask off res2 and res3 and on res1
+            emu->eflags.x64 = (((rex.is32bits?Pop32(emu):Pop(emu)) & 0x3F7FD7)/* & (0xffff-40)*/ ) | 0x2; // mask off res2 and res3 and on res1
             RESET_FLAGS(emu);
             #ifndef TEST_INTERPRETER
             if(ACCESS_FLAG(F_TF)) {
@@ -828,7 +834,6 @@ x64emurun:
                     R_RCX = tmp64u;
                     break;
                 default:
-                    tmp8s = ACCESS_FLAG(F_DF)?-1:+1;
                     tmp8u  = *(uint8_t*)R_RDI;
                     tmp8u2 = *(uint8_t*)R_RSI;
                     R_RDI += tmp8s;
@@ -898,14 +903,12 @@ x64emurun:
                     break;
                 default:
                     if(rex.w) {
-                        tmp8s = ACCESS_FLAG(F_DF)?-8:+8;
                         tmp64u  = *(uint64_t*)R_RDI;
                         tmp64u2 = *(uint64_t*)R_RSI;
                         R_RDI += tmp8s;
                         R_RSI += tmp8s;
                         cmp64(emu, tmp64u2, tmp64u);
                     } else {
-                        tmp8s = ACCESS_FLAG(F_DF)?-4:+4;
                         tmp32u  = *(uint32_t*)R_RDI;
                         tmp32u2 = *(uint32_t*)R_RSI;
                         R_RDI += tmp8s;
@@ -1485,7 +1488,7 @@ x64emurun:
         case 0xE5:                      /* IN EAX, XX */
         case 0xE6:                      /* OUT XX, AL */
         case 0xE7:                      /* OUT XX, EAX */
-            // this is a privilege opcode...
+            // this is a privilege opcode on 64bits...
             #ifndef TEST_INTERPRETER
             emit_signal(emu, SIGSEGV, (void*)R_RIP, 0);
             STEP;
@@ -1515,7 +1518,7 @@ x64emurun:
         case 0xED:                      /* IN EAX, DX */
         case 0xEE:                      /* OUT DX, AL */
         case 0xEF:                      /* OUT DX, EAX */
-            // this is a privilege opcode...
+            // this is a privilege opcode on 64bits...
             #ifndef TEST_INTERPRETER
             emit_signal(emu, SIGSEGV, (void*)R_RIP, 0);
             STEP;
@@ -1659,12 +1662,12 @@ x64emurun:
             SET_FLAG(F_CF);
             break;
         case 0xFA:                      /* CLI */
-            // this is a privilege opcode...
+            // this is a privilege opcode on 64bits...
             emit_signal(emu, SIGSEGV, (void*)R_RIP, 0);
             STEP;
             break;
         case 0xFB:                      /* STI */
-            // this is a privilege opcode...
+            // this is a privilege opcode on 64bits...
             emit_signal(emu, SIGSEGV, (void*)R_RIP, 0);
             STEP;
             break;
@@ -1766,10 +1769,10 @@ x64emurun:
                         goto fini;
                     } else {
                         if(rex.is32bits || !rex.w) {
-                            addr = (uintptr_t)getAlternate((void*)(uintptr_t)ED->dword[0]);   //check CS?
+                            addr = (uintptr_t)getAlternate((void*)(uintptr_t)ED->dword[0]);
                             R_CS = ED->word[2];
                         } else {
-                            addr = (uintptr_t)getAlternate((void*)ED->q[0]);   //check CS?
+                            addr = (uintptr_t)getAlternate((void*)ED->q[0]);
                             R_CS = (ED+1)->word[0];
                         }
                         STEP2;
@@ -1779,7 +1782,7 @@ x64emurun:
                 case 6:                 /* Push Ed */
                     _GETED(0);
                     if(rex.is32bits) {
-                        tmp32u = ED->dword[0];  // rex.w ignored
+                        tmp32u = ED->dword[0];
                         Push32(emu, tmp32u);  // avoid potential issue with push [esp+...]
                     } else {
                         tmp64u = ED->q[0];  // rex.w ignored
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index f96dfe0d..b8e51267 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -108,7 +108,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             nextop = F8;

             GETEX(0);

             GETGX;

-            if((nextop&0xC0)==0xC0)    /* MOVHLPS Gx,Ex */

+            if(MODREG)    /* MOVHLPS Gx,Ex */

                 GX->q[0] = EX->q[1];

             else

                 GX->q[0] = EX->q[0];    /* MOVLPS Gx,Ex */

@@ -350,8 +350,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                     GETED(0);

                     if(rex.w)

                         ED->q[0] = __builtin_bswap64(GD->q[0]);

-                    else

-                        ED->q[0] = __builtin_bswap32(GD->dword[0]);

+                    else {

+                        if(MODREG)

+                            ED->q[0] = __builtin_bswap32(GD->dword[0]);

+                        else

+                            ED->dword[0] = __builtin_bswap32(GD->dword[0]);

+                    }

                     break;

 

                 default:

@@ -1574,7 +1578,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             break;

         case 0xE7:                   /* MOVNTQ Em,Gm */

             nextop = F8;

-            if((nextop&0xC0)==0xC0)

+            if(MODREG)

                 return 0;

             GETEM(0);

             GETGM;

diff --git a/src/emu/x64run64.c b/src/emu/x64run64.c
index f3c9a20a..ef3491e2 100644
--- a/src/emu/x64run64.c
+++ b/src/emu/x64run64.c
@@ -292,13 +292,19 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
             nextop = F8;

             GETED_OFFS(0, tlsdata);

             GETGD;

-            if(rex.w)

-                GD->sq[0] = ED->sdword[0];

-            else

-                if(MODREG)

-                    GD->q[0] = ED->dword[0];    // not really a sign extension

+            if(rex.is32bits) {

+                // ARPL here

+                // faking to always happy...

+                SET_FLAG(F_ZF);

+            } else {

+                if(rex.w)

+                    GD->sq[0] = ED->sdword[0];

                 else

-                    GD->sdword[0] = ED->sdword[0];  // meh?

+                    if(MODREG)

+                        GD->q[0] = ED->dword[0];    // not really a sign extension

+                    else

+                        GD->sdword[0] = ED->sdword[0];  // meh?

+            }

             break;

 

         case 0x66:

@@ -380,7 +386,6 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
             if(rex.w) {

                 ED->q[0] = GD->q[0];

             } else {

-                //if ED is a reg, than the opcode works like movzx

                 if(MODREG)

                     ED->q[0] = GD->dword[0];

                 else

@@ -436,8 +441,8 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
 

         case 0xA1:                      /* MOV EAX,FS:Od */

             if(rex.is32bits) {

-                tmp64u = F32;

-                R_EAX = *(uint32_t*)(tlsdata+tmp64u);

+                tmp32s = F32S;

+                R_EAX = *(uint32_t*)(tlsdata+tmp32s);

             } else {

                 tmp64u = F64;

                 if(rex.w)

@@ -449,8 +454,8 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
 

         case 0xA3:                      /* MOV FS:Od,EAX */

             if(rex.is32bits) {

-                tmp64u = F32;

-                *(uint32_t*)(uintptr_t)(tlsdata+tmp64u) = R_EAX;

+                tmp32s = F32S;

+                *(uint32_t*)(uintptr_t)(tlsdata+tmp32s) = R_EAX;

             } else {

                 tmp64u = F64;

                 if(rex.w)

@@ -625,7 +630,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                     addr = tmp64u;

                     break;

                 case 3:                 /* CALL FAR Ed */

-                    if(nextop>0xC0) {

+                    if(MODREG) {

                         printf_log(LOG_NONE, "Illegal Opcode %p: %02X %02X %02X %02X\n", (void*)R_RIP, opcode, nextop, PK(2), PK(3));

                         emu->quit=1;

                         emu->error |= ERR_ILLEGAL;

@@ -652,7 +657,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                         addr = (uintptr_t)getAlternate((void*)ED->q[0]);

                     break;

                 case 5:                 /* JMP FAR Ed */

-                    if(nextop>0xc0) {

+                    if(MODREG) {

                         printf_log(LOG_NONE, "Illegal Opcode %p: 0x%02X 0x%02X %02X %02X\n", (void*)R_RIP, opcode, nextop, PK(2), PK(3));

                         emu->quit=1;

                         emu->error |= ERR_ILLEGAL;

@@ -669,7 +674,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
                     break;

                 case 6:                 /* Push Ed */

                     if(rex.is32bits) {

-                        tmp32u = ED->dword[0];  // rex.w ignored

+                        tmp32u = ED->dword[0];

                         Push32(emu, tmp32u);  // avoid potential issue with push [esp+...]

                     } else {

                         tmp64u = ED->q[0];  // rex.w ignored

diff --git a/src/emu/x64run66.c b/src/emu/x64run66.c
index 642cfd74..905e39d6 100644
--- a/src/emu/x64run66.c
+++ b/src/emu/x64run66.c
@@ -92,7 +92,7 @@ uintptr_t Run66(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
         if(rex.w)                                                   \

             GW->q[0] = OP##64(emu, GW->q[0], EW->q[0]);             \

         else                                                        \

-        GW->word[0] = OP##16(emu, GW->word[0], EW->word[0]);        \

+            GW->word[0] = OP##16(emu, GW->word[0], EW->word[0]);    \

         break;                                                      \

     case B+4:                                                       \

         R_AL = OP##8(emu, R_AL, F8);                                \

@@ -317,7 +317,7 @@ uintptr_t Run66(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
 

     case 0xA1:                      /* MOV EAX,Od */

         if(rex.is32bits) {

-            R_EAX = *(uint32_t*)(uintptr_t)F32;

+            R_AX = *(uint16_t*)(uintptr_t)F32;

         } else {

             if(rex.w)

                 R_RAX = *(uint64_t*)F64;

@@ -328,7 +328,7 @@ uintptr_t Run66(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
 

     case 0xA3:                      /* MOV Od,EAX */

         if(rex.is32bits) {

-            *(uint32_t*)(uintptr_t)F32 = R_EAX;

+            *(uint16_t*)(uintptr_t)F32 = R_AX;

         } else {

             if(rex.w)

                 *(uint64_t*)F64 = R_RAX;

@@ -423,14 +423,12 @@ uintptr_t Run66(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
                 break;

             default:

                 if(rex.w) {

-                    tmp8s = ACCESS_FLAG(F_DF)?-8:+8;

                     tmp64u  = *(uint64_t*)R_RDI;

                     tmp64u2 = *(uint64_t*)R_RSI;

                     R_RDI += tmp8s;

                     R_RSI += tmp8s;

                     cmp64(emu, tmp64u2, tmp64u);

                 } else {

-                    tmp8s = ACCESS_FLAG(F_DF)?-2:+2;

                     tmp16u  = *(uint16_t*)R_RDI;

                     tmp16u2 = *(uint16_t*)R_RSI;

                     R_RDI += tmp8s;

diff --git a/src/emu/x64run66f0.c b/src/emu/x64run66f0.c
index 86e6a8dd..df302af5 100644
--- a/src/emu/x64run66f0.c
+++ b/src/emu/x64run66f0.c
@@ -33,6 +33,7 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
 {
     uint8_t opcode;
     uint8_t nextop;
+    int16_t tmp16s;
     uint16_t tmp16u, tmp16u2;
     int32_t tmp32s;
     int64_t tmp64s;
@@ -47,13 +48,14 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
     opcode = F8;
     // REX prefix before the F0 are ignored
     rex.rex = 0;
+    uintptr_t addr_entry = addr;
     if(!rex.is32bits)
         while(opcode>=0x40 && opcode<=0x4f) {
             rex.rex = opcode;
             opcode = F8;
         }
 
-    if(rex.w) return RunF0(emu, rex, addr);
+    if(rex.w) return RunF0(emu, rex, addr_entry);
 
     switch(opcode) {
         
@@ -93,39 +95,25 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
                     GETEW(0);
                     GETGW;
 #if defined(DYNAREC) && !defined(TEST_INTERPRETER)
-                    if(rex.w) {
+                    if(((uintptr_t)ED)&1) {
                         do {
-                            tmp64u = native_lock_read_dd(ED);
-                            tmp64u2 = add64(emu, tmp64u, GD->q[0]);
-                        } while(native_lock_write_dd(ED, tmp64u2));
-                        GD->q[0] = tmp64u;
+                            tmp16u = ED->word[0] & ~0xff;
+                            tmp16u |= native_lock_read_h(ED);
+                            tmp16u2 = add16(emu, tmp16u, GD->word[0]);
+                        } while(native_lock_write_h(ED, tmp16u2&0xff));
+                        ED->word[0] = tmp16u2;
                     } else {
-                        if(((uintptr_t)ED)&1) {
-                            do {
-                                tmp16u = ED->word[0] & ~0xff;
-                                tmp16u |= native_lock_read_h(ED);
-                                tmp16u2 = add16(emu, tmp16u, GD->word[0]);
-                            } while(native_lock_write_h(ED, tmp16u2&0xff));
-                            ED->word[0] = tmp16u2;
-                        } else {
-                            do {
-                                tmp16u = native_lock_read_h(ED);
-                                tmp16u2 = add16(emu, tmp16u, GD->word[0]);
-                            } while(native_lock_write_h(ED, tmp16u2));
-                        }
-                        GD->word[0] = tmp16u;
+                        do {
+                            tmp16u = native_lock_read_h(ED);
+                            tmp16u2 = add16(emu, tmp16u, GD->word[0]);
+                        } while(native_lock_write_h(ED, tmp16u2));
                     }
+                    GD->word[0] = tmp16u;
 #else
                     pthread_mutex_lock(&emu->context->mutex_lock);
-                    if(rex.w) {
-                        tmp64u = add64(emu, ED->q[0], GD->q[0]);
-                        GD->q[0] = ED->q[0];
-                        ED->q[0] = tmp64u;
-                    } else {
-                        tmp16u = add16(emu, ED->word[0], GD->word[0]);
-                        GD->word[0] = ED->word[0];
-                        ED->word[0] = tmp16u;
-                    }
+                    tmp16u = add16(emu, ED->word[0], GD->word[0]);
+                    GD->word[0] = ED->word[0];
+                    ED->word[0] = tmp16u;
                     pthread_mutex_unlock(&emu->context->mutex_lock);
 #endif
                     break;
@@ -141,32 +129,18 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
             nextop = F8;                                            \
             GETEW(0);                                               \
             GETGW;                                                  \
-            if(rex.w) {                                             \
-                do {                                                \
-                    tmp64u = native_lock_read_dd(ED);               \
-                    tmp64u = OP##64(emu, tmp64u, GD->q[0]);         \
-                } while (native_lock_write_dd(ED, tmp64u));         \
-            } else {                                                \
-                do {                                                \
-                    tmp16u = native_lock_read_h(ED);                \
-                    tmp16u = OP##16(emu, tmp16u, GW->word[0]);      \
-                } while (native_lock_write_h(ED, tmp16u));          \
-            }                                                       \
+            do {                                                    \
+                tmp16u = native_lock_read_h(ED);                    \
+                tmp16u = OP##16(emu, tmp16u, GW->word[0]);          \
+            } while (native_lock_write_h(ED, tmp16u));              \
             break;                                                  \
         case B+3:                                                   \
             nextop = F8;                                            \
             GETEW(0);                                               \
             GETGW;                                                  \
-            if(rex.w)                                               \
-                GD->q[0] = OP##64(emu, GD->q[0], ED->q[0]);         \
-            else                                                    \
-                GW->word[0] = OP##16(emu, GW->word[0], EW->word[0]);\
-            break;                                                  \
+            GW->word[0] = OP##16(emu, GW->word[0], EW->word[0]);    \
         case B+5:                                                   \
-            if(rex.w)                                               \
-                R_RAX = OP##64(emu, R_RAX, F32S64);                 \
-            else                                                    \
-                R_AX = OP##16(emu, R_AX, F16);                      \
+            R_AX = OP##16(emu, R_AX, F16);                          \
             break;
 #else
         #define GO(B, OP)                                           \
@@ -175,10 +149,7 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
             GETEW(0);                                               \
             GETGW;                                                  \
             pthread_mutex_lock(&emu->context->mutex_lock);          \
-            if(rex.w)                                               \
-                ED->q[0] = OP##64(emu, ED->q[0], GD->q[0]);         \
-            else                                                    \
-                EW->word[0] = OP##16(emu, EW->word[0], GW->word[0]);\
+            EW->word[0] = OP##16(emu, EW->word[0], GW->word[0]);    \
             pthread_mutex_unlock(&emu->context->mutex_lock);        \
             break;                                                  \
         case B+3:                                                   \
@@ -186,18 +157,12 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
             GETEW(0);                                               \
             GETGW;                                                  \
             pthread_mutex_lock(&emu->context->mutex_lock);          \
-            if(rex.w)                                               \
-                GD->q[0] = OP##64(emu, GD->q[0], ED->q[0]);         \
-            else                                                    \
-                GW->word[0] = OP##16(emu, GW->word[0], EW->word[0]);\
+            GW->word[0] = OP##16(emu, GW->word[0], EW->word[0]);    \
             pthread_mutex_unlock(&emu->context->mutex_lock);        \
             break;                                                  \
         case B+5:                                                   \
             pthread_mutex_lock(&emu->context->mutex_lock);          \
-            if(rex.w)                                               \
-                R_RAX = OP##64(emu, R_RAX, F32S64);                 \
-            else                                                    \
-                R_AX = OP##16(emu, R_AX, F16);                      \
+            R_AX = OP##16(emu, R_AX, F16);                          \
             pthread_mutex_unlock(&emu->context->mutex_lock);        \
             break;
 #endif
@@ -214,42 +179,42 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
         case 0x83:              /* GRP Ew,Ib */
             nextop = F8;
             GETED((opcode==0x83)?1:2);
-            tmp64s = (opcode==0x83)?(F8S):(F16S);
-            tmp64u = (uint64_t)tmp64s;
+            tmp16s = (opcode==0x83)?(F8S):(F16S);
+            tmp16u = (uint16_t)tmp16s;
 #if defined(DYNAREC) && !defined(TEST_INTERPRETER)
             if(MODREG)
                 switch((nextop>>3)&7) {
-                    case 0: ED->word[0] = add16(emu, ED->word[0], tmp64u); break;
-                    case 1: ED->word[0] =  or16(emu, ED->word[0], tmp64u); break;
-                    case 2: ED->word[0] = adc16(emu, ED->word[0], tmp64u); break;
-                    case 3: ED->word[0] = sbb16(emu, ED->word[0], tmp64u); break;
-                    case 4: ED->word[0] = and16(emu, ED->word[0], tmp64u); break;
-                    case 5: ED->word[0] = sub16(emu, ED->word[0], tmp64u); break;
-                    case 6: ED->word[0] = xor16(emu, ED->word[0], tmp64u); break;
-                    case 7:            cmp16(emu, ED->word[0], tmp64u); break;
+                    case 0: ED->word[0] = add16(emu, ED->word[0], tmp16u); break;
+                    case 1: ED->word[0] =  or16(emu, ED->word[0], tmp16u); break;
+                    case 2: ED->word[0] = adc16(emu, ED->word[0], tmp16u); break;
+                    case 3: ED->word[0] = sbb16(emu, ED->word[0], tmp16u); break;
+                    case 4: ED->word[0] = and16(emu, ED->word[0], tmp16u); break;
+                    case 5: ED->word[0] = sub16(emu, ED->word[0], tmp16u); break;
+                    case 6: ED->word[0] = xor16(emu, ED->word[0], tmp16u); break;
+                    case 7:               cmp16(emu, ED->word[0], tmp16u); break;
                 }
             else
                 switch((nextop>>3)&7) {
-                    case 0: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = add16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
-                    case 1: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 =  or16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
-                    case 2: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = adc16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
-                    case 3: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = sbb16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
-                    case 4: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = and16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
-                    case 5: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = sub16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
-                    case 6: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = xor16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
-                    case 7:                                                 cmp16(emu, ED->word[0], tmp64u); break;
+                    case 0: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = add16(emu, tmp16u2, tmp16u);} while(native_lock_write_h(ED, tmp16u2)); break;
+                    case 1: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 =  or16(emu, tmp16u2, tmp16u);} while(native_lock_write_h(ED, tmp16u2)); break;
+                    case 2: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = adc16(emu, tmp16u2, tmp16u);} while(native_lock_write_h(ED, tmp16u2)); break;
+                    case 3: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = sbb16(emu, tmp16u2, tmp16u);} while(native_lock_write_h(ED, tmp16u2)); break;
+                    case 4: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = and16(emu, tmp16u2, tmp16u);} while(native_lock_write_h(ED, tmp16u2)); break;
+                    case 5: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = sub16(emu, tmp16u2, tmp16u);} while(native_lock_write_h(ED, tmp16u2)); break;
+                    case 6: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = xor16(emu, tmp16u2, tmp16u);} while(native_lock_write_h(ED, tmp16u2)); break;
+                    case 7:                                                  cmp16(emu, ED->word[0], tmp16u); break;
                 }
 #else
             pthread_mutex_lock(&emu->context->mutex_lock);
             switch((nextop>>3)&7) {
-                case 0: ED->word[0] = add16(emu, ED->word[0], tmp64u); break;
-                case 1: ED->word[0] =  or16(emu, ED->word[0], tmp64u); break;
-                case 2: ED->word[0] = adc16(emu, ED->word[0], tmp64u); break;
-                case 3: ED->word[0] = sbb16(emu, ED->word[0], tmp64u); break;
-                case 4: ED->word[0] = and16(emu, ED->word[0], tmp64u); break;
-                case 5: ED->word[0] = sub16(emu, ED->word[0], tmp64u); break;
-                case 6: ED->word[0] = xor16(emu, ED->word[0], tmp64u); break;
-                case 7:               cmp16(emu, ED->word[0], tmp64u); break;
+                case 0: ED->word[0] = add16(emu, ED->word[0], tmp16u); break;
+                case 1: ED->word[0] =  or16(emu, ED->word[0], tmp16u); break;
+                case 2: ED->word[0] = adc16(emu, ED->word[0], tmp16u); break;
+                case 3: ED->word[0] = sbb16(emu, ED->word[0], tmp16u); break;
+                case 4: ED->word[0] = and16(emu, ED->word[0], tmp16u); break;
+                case 5: ED->word[0] = sub16(emu, ED->word[0], tmp16u); break;
+                case 6: ED->word[0] = xor16(emu, ED->word[0], tmp16u); break;
+                case 7:               cmp16(emu, ED->word[0], tmp16u); break;
             }
             pthread_mutex_unlock(&emu->context->mutex_lock);
 #endif
@@ -261,74 +226,34 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
             switch((nextop>>3)&7) {
                 case 0:                 /* INC Ed */
 #if defined(DYNAREC) && !defined(TEST_INTERPRETER)
-                    if(rex.w)
-                        if(((uintptr_t)ED)&7) {
-                            // unaligned
-                            do {
-                                tmp64u = ED->q[0] & 0xffffffffffffff00LL;
-                                tmp64u |= native_lock_read_b(ED);
-                                tmp64u = inc64(emu, tmp64u);
-                            } while(native_lock_write_b(ED, tmp64u&0xff));
-                            ED->q[0] = tmp64u;
-                        }
-                        else
-                            do {
-                                tmp64u = native_lock_read_dd(ED);
-                            } while(native_lock_write_dd(ED, inc64(emu, tmp64u)));
-                    else {
-                        if((uintptr_t)ED&1) { 
-                            //meh.
-                            do {
-                                tmp16u = ED->word[0];
-                                tmp16u &=~0xff;
-                                tmp16u |= native_lock_read_b(ED);
-                                tmp16u = inc16(emu, tmp16u);
-                            } while(native_lock_write_b(ED, tmp16u&0xff));
-                            ED->word[0] = tmp16u;
-                        } else {
-                            do {
-                                tmp16u = native_lock_read_h(ED);
-                            } while(native_lock_write_h(ED, inc16(emu, tmp16u)));
-                        }
+                    if((uintptr_t)ED&1) { 
+                        //meh.
+                        do {
+                            tmp16u = ED->word[0];
+                            tmp16u &=~0xff;
+                            tmp16u |= native_lock_read_b(ED);
+                            tmp16u = inc16(emu, tmp16u);
+                        } while(native_lock_write_b(ED, tmp16u&0xff));
+                        ED->word[0] = tmp16u;
+                    } else {
+                        do {
+                            tmp16u = native_lock_read_h(ED);
+                        } while(native_lock_write_h(ED, inc16(emu, tmp16u)));
                     }
 #else
                     pthread_mutex_lock(&emu->context->mutex_lock);
-                    if(rex.w) {
-                        ED->q[0] = inc64(emu, ED->q[0]);
-                    } else {
-                        ED->word[0] = inc16(emu, ED->word[0]);
-                    }
+                    ED->word[0] = inc16(emu, ED->word[0]);
                     pthread_mutex_unlock(&emu->context->mutex_lock);
 #endif
                     break;
                 case 1:                 /* DEC Ed */
 #if defined(DYNAREC) && !defined(TEST_INTERPRETER)
-                    if(rex.w)
-                        if(((uintptr_t)ED)&7) {
-                            // unaligned
-                            do {
-                                tmp64u = ED->q[0] & 0xffffffffffffff00LL;
-                                tmp64u |= native_lock_read_b(ED);
-                                tmp64u = dec64(emu, tmp64u);
-                            } while(native_lock_write_b(ED, tmp64u&0xff));
-                            ED->q[0] = tmp64u;
-                        }
-                        else
-                            do {
-                                tmp64u = native_lock_read_dd(ED);
-                            } while(native_lock_write_dd(ED, dec64(emu, tmp64u)));
-                    else {
-                        do {
-                            tmp16u = native_lock_read_h(ED);
-                        } while(native_lock_write_h(ED, dec16(emu, tmp16u)));
-                    }
+                    do {
+                        tmp16u = native_lock_read_h(ED);
+                    } while(native_lock_write_h(ED, dec16(emu, tmp16u)));
 #else
                     pthread_mutex_lock(&emu->context->mutex_lock);
-                    if(rex.w) {
-                        ED->q[0] = dec64(emu, ED->q[0]);
-                    } else {
-                        ED->word[0] = dec16(emu, ED->word[0]);
-                    }
+                    ED->word[0] = dec16(emu, ED->word[0]);
                     pthread_mutex_unlock(&emu->context->mutex_lock);
 #endif
                     break;
diff --git a/src/emu/x64run67.c b/src/emu/x64run67.c
index 2d9db866..c18f867f 100644
--- a/src/emu/x64run67.c
+++ b/src/emu/x64run67.c
@@ -337,27 +337,50 @@ uintptr_t Run67(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
     case 0xE0:                      /* LOOPNZ */

         CHECK_FLAGS(emu);

         tmp8s = F8S;

-        --R_ECX; // don't update flags

-        if(R_ECX && !ACCESS_FLAG(F_ZF))

-            addr += tmp8s;

+        if(rex.is32bits) {

+            --R_CX; // don't update flags

+            if(R_CX && !ACCESS_FLAG(F_ZF))

+                addr += tmp8s;

+        } else {

+            --R_ECX; // don't update flags

+            if(R_ECX && !ACCESS_FLAG(F_ZF))

+                addr += tmp8s;

+        }

         break;

     case 0xE1:                      /* LOOPZ */

         CHECK_FLAGS(emu);

         tmp8s = F8S;

-        --R_ECX; // don't update flags

-        if(R_ECX && ACCESS_FLAG(F_ZF))

-            addr += tmp8s;

+        if(rex.is32bits) {

+            --R_CX; // don't update flags

+            if(R_CX && ACCESS_FLAG(F_ZF))

+                addr += tmp8s;

+        } else {

+            --R_ECX; // don't update flags

+            if(R_ECX && ACCESS_FLAG(F_ZF))

+                addr += tmp8s;

+        }

         break;

     case 0xE2:                      /* LOOP */

         tmp8s = F8S;

-        --R_ECX; // don't update flags

-        if(R_ECX)

-            addr += tmp8s;

+        if(rex.is32bits) {

+            --R_CX; // don't update flags

+            if(R_CX)

+                addr += tmp8s;

+        } else {

+            --R_ECX; // don't update flags

+            if(R_ECX)

+                addr += tmp8s;

+        }

         break;

     case 0xE3:              /* JECXZ Ib */

         tmp8s = F8S;

-        if(!R_ECX)

-            addr += tmp8s;

+        if(rex.is32bits) {

+            if(!R_CX)

+                addr += tmp8s;

+        } else {

+            if(!R_ECX)

+                addr += tmp8s;

+        }

         break;

 

     case 0xE8:                      /* CALL Id */

diff --git a/src/emu/x64run_private.c b/src/emu/x64run_private.c
index d191fd02..232fe163 100755
--- a/src/emu/x64run_private.c
+++ b/src/emu/x64run_private.c
@@ -1077,11 +1077,13 @@ void PrintTrace(x64emu_t* emu, uintptr_t ip, int dynarec)
                 peek = PK(0);
             }
             if(peek==0xC3 || peek==0xC2 || (peek==0xF3 && PK(1)==0xC3)) {
-                printf_log(LOG_NONE, " => %p", *(void**)(R_RSP));
-                if(is32bits)
+                if(is32bits) {
+                    printf_log(LOG_NONE, " => %p", (void*)(uintptr_t)*(uint32_t*)(R_RSP));
                     printFunctionAddr(*(uint32_t*)(R_RSP), "=> ");
-                else
+                } else {
+                    printf_log(LOG_NONE, " => %p", *(void**)(R_RSP));
                     printFunctionAddr(*(uintptr_t*)(R_RSP), "=> ");
+                }
             } else if(peek==0x57 && rex.b) {
                 printf_log(LOG_NONE, " => STACK_TOP: %p", *(void**)(R_RSP));
                 printFunctionAddr(ip, "here: ");
diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c
index 6d58670a..5a75203e 100644
--- a/src/emu/x64runf0.c
+++ b/src/emu/x64runf0.c
@@ -50,10 +50,11 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
     opcode = F8;

     // REX prefix before the F0 are ignored

     rex.rex = 0;

-    while(opcode>=0x40 && opcode<=0x4f) {

-        rex.rex = opcode;

-        opcode = F8;

-    }

+    if(!rex.is32bits)

+        while(opcode>=0x40 && opcode<=0x4f) {

+            rex.rex = opcode;

+            opcode = F8;

+        }

 

     switch(opcode) {

 #if defined(DYNAREC) && !defined(TEST_INTERPRETER)