about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-04 11:21:26 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-04 11:21:26 +0100
commit9613868aa403ff5b06ebfea925ee986f5198b354 (patch)
treeabc5f65b2b3690e4e86a5e428e94ca7c5c3bf170
parent8cccb02e6b6c94725f0be61958031b04ddd06713 (diff)
downloadbox64-9613868aa403ff5b06ebfea925ee986f5198b354.tar.gz
box64-9613868aa403ff5b06ebfea925ee986f5198b354.zip
First x86_64 opcode executed :)
-rwxr-xr-xCMakeLists.txt1
-rwxr-xr-xsrc/emu/x64primop.c1464
-rwxr-xr-xsrc/emu/x64primop.h576
-rwxr-xr-xsrc/emu/x64run.c58
-rwxr-xr-xsrc/emu/x64run_private.h25
5 files changed, 2113 insertions, 11 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 13788e07..af475804 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -107,6 +107,7 @@ set(ELFLOADER_SRC
     "${BOX64_ROOT}/src/emu/x64emu.c"
     "${BOX64_ROOT}/src/emu/x64int3.c"
     "${BOX64_ROOT}/src/emu/x87emu_private.c"
+    "${BOX64_ROOT}/src/emu/x64primop.c"
     "${BOX64_ROOT}/src/emu/x64run.c"
     "${BOX64_ROOT}/src/emu/x64run_private.c"
     "${BOX64_ROOT}/src/emu/x64syscall.c"
diff --git a/src/emu/x64primop.c b/src/emu/x64primop.c
new file mode 100755
index 00000000..0c040f1d
--- /dev/null
+++ b/src/emu/x64primop.c
@@ -0,0 +1,1464 @@
+// Based on Realmode X86 Emulator Library, and enhanced to handle 64bits
+/****************************************************************************
+*
+* Realmode X86 Emulator Library
+*
+* Copyright (c) 1996-1999 SciTech Software, Inc.
+* Copyright (c) David Mosberger-Tang
+* Copyright (c) 1999 Egbert Eich
+* Copyright (c) 2007-2017 SUSE LINUX GmbH; Author: Steffen Winterfeldt
+*
+*  ========================================================================
+*
+*  Permission to use, copy, modify, distribute, and sell this software and
+*  its documentation for any purpose is hereby granted without fee,
+*  provided that the above copyright notice appear in all copies and that
+*  both that copyright notice and this permission notice appear in
+*  supporting documentation, and that the name of the authors not be used
+*  in advertising or publicity pertaining to distribution of the software
+*  without specific, written prior permission.  The authors makes no
+*  representations about the suitability of this software for any purpose.
+*  It is provided "as is" without express or implied warranty.
+*
+*  THE AUTHORS DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+*  INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+*  EVENT SHALL THE AUTHORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+*  CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+*  USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+*  OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+*  PERFORMANCE OF THIS SOFTWARE.
+*
+*  ========================================================================
+*
+* Description:
+*   Implement the primitive machine operations used by the emulation code
+*   in ops.c
+*
+*   Carry Chain Calculation
+*
+*   This represents a somewhat expensive calculation which is
+*   apparently required to emulate the setting of the OF and AF flag.
+*   The latter is not so important, but the former is.  The overflow
+*   flag is the XOR of the top two bits of the carry chain for an
+*   addition (similar for subtraction).  Since we do not want to
+*   simulate the addition in a bitwise manner, we try to calculate the
+*   carry chain given the two operands and the result.
+*
+*   So, given the following table, which represents the addition of two
+*   bits, we can derive a formula for the carry chain.
+*
+*   a   b   cin   r     cout
+*   0   0   0     0     0
+*   0   0   1     1     0
+*   0   1   0     1     0
+*   0   1   1     0     1
+*   1   0   0     1     0
+*   1   0   1     0     1
+*   1   1   0     0     1
+*   1   1   1     1     1
+*
+*   Construction of table for cout:
+*
+*   ab
+*   r  \  00   01   11  10
+*   |------------------
+*   0  |   0    1    1   1
+*   1  |   0    0    1   0
+*
+*   By inspection, one gets:  cc = ab +  r'(a + b)
+*
+*   That represents alot of operations, but NO CHOICE....
+*
+*   Borrow Chain Calculation.
+*
+*   The following table represents the subtraction of two bits, from
+*   which we can derive a formula for the borrow chain.
+*
+*   a   b   bin   r     bout
+*   0   0   0     0     0
+*   0   0   1     1     1
+*   0   1   0     1     1
+*   0   1   1     0     1
+*   1   0   0     1     0
+*   1   0   1     0     0
+*   1   1   0     0     0
+*   1   1   1     1     1
+*
+*   Construction of table for cout:
+*
+*   ab
+*   r  \  00   01   11  10
+*   |------------------
+*   0  |   0    1    0   0
+*   1  |   1    1    1   0
+*
+*   By inspection, one gets:  bc = a'b +  r(a' + b)
+*
+****************************************************************************/
+
+// This has been heavily modified to fit box86 purpose...
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "x64emu_private.h"
+#include "x64run_private.h"
+
+/*------------------------- Global Variables ------------------------------*/
+
+#define PARITY(x)   (((emu->x64emu_parity_tab[(x) / 32] >> ((x) % 32)) & 1) == 0)
+#define XOR2(x) 	(((x) ^ ((x)>>1)) & 0x1)
+
+/*----------------------------- Implementation ----------------------------*/
+
+/****************************************************************************
+REMARKS:
+Implements the AAA instruction and side effects.
+****************************************************************************/
+uint16_t aaa16(x64emu_t *emu, uint16_t d)
+{
+	uint16_t	res;
+	CHECK_FLAGS(emu);
+	if ((d & 0xf) > 0x9 || ACCESS_FLAG(F_AF)) {
+		d += 0x6;
+		d += 0x100;
+		SET_FLAG(F_AF);
+		SET_FLAG(F_CF);
+	} else {
+		CLEAR_FLAG(F_CF);
+		CLEAR_FLAG(F_AF);
+	}
+	res = (uint16_t)(d & 0xFF0F);
+	CLEAR_FLAG(F_SF);
+	CONDITIONAL_SET_FLAG(res == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	return res;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the AAA instruction and side effects.
+****************************************************************************/
+uint16_t aas16(x64emu_t *emu, uint16_t d)
+{
+	uint16_t	res;
+	CHECK_FLAGS(emu);
+	if ((d & 0xf) > 0x9 || ACCESS_FLAG(F_AF)) {
+		d -= 0x6;
+		d -= 0x100;
+		SET_FLAG(F_AF);
+		SET_FLAG(F_CF);
+	} else {
+		CLEAR_FLAG(F_CF);
+		CLEAR_FLAG(F_AF);
+	}
+	res = (uint16_t)(d & 0xFF0F);
+	CLEAR_FLAG(F_SF);
+	CONDITIONAL_SET_FLAG(res == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	return res;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the AAD instruction and side effects.
+****************************************************************************/
+uint16_t aad16(x64emu_t *emu, uint16_t d, uint8_t base)
+{
+	uint16_t l;
+	uint8_t hb, lb;
+
+	RESET_FLAGS(emu);
+
+	hb = (uint8_t)((d >> 8) & 0xff);
+	lb = (uint8_t)((d & 0xff));
+	l = (uint16_t)((lb + base * hb) & 0xFF);
+
+	CLEAR_FLAG(F_CF);
+	CLEAR_FLAG(F_AF);
+	CLEAR_FLAG(F_OF);
+	CONDITIONAL_SET_FLAG(l & 0x80, F_SF);
+	CONDITIONAL_SET_FLAG(l == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(l & 0xff), F_PF);
+	return l;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the AAM instruction and side effects.
+****************************************************************************/
+uint16_t aam16(x64emu_t *emu, uint8_t d, uint8_t base)
+{
+    uint16_t h, l;
+
+	RESET_FLAGS(emu);
+
+	h = (uint16_t)(d / base);
+	l = (uint16_t)(d % base);
+	l |= (uint16_t)(h << 8);
+
+	CLEAR_FLAG(F_CF);
+	CLEAR_FLAG(F_AF);
+	CLEAR_FLAG(F_OF);
+	CONDITIONAL_SET_FLAG(l & 0x80, F_SF);
+	CONDITIONAL_SET_FLAG(l == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(l & 0xff), F_PF);
+    return l;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the ADC instruction and side effects.
+****************************************************************************/
+uint8_t adc8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	uint32_t res;   /* all operands in native machine order */
+	uint32_t cc;
+
+	CHECK_FLAGS(emu);
+
+	if (ACCESS_FLAG(F_CF))
+		res = 1 + d + s;
+	else
+		res = d + s;
+
+	CONDITIONAL_SET_FLAG(res & 0x100, F_CF);
+	CONDITIONAL_SET_FLAG((res & 0xff) == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(res & 0x80, F_SF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the carry chain  SEE NOTE AT TOP. */
+	cc = (s & d) | ((~res) & (s | d));
+	CONDITIONAL_SET_FLAG(XOR2(cc >> 6), F_OF);
+	CONDITIONAL_SET_FLAG(cc & 0x8, F_AF);
+	return (uint8_t)res;
+}
+
+uint16_t adc16(x64emu_t *emu, uint16_t d, uint16_t s)
+{
+	uint32_t res;   /* all operands in native machine order */
+	uint32_t cc;
+
+	CHECK_FLAGS(emu);
+
+	if (ACCESS_FLAG(F_CF))
+		res = 1 + d + s;
+	else
+		res = d + s;
+
+	CONDITIONAL_SET_FLAG(res & 0x10000, F_CF);
+	CONDITIONAL_SET_FLAG((res & 0xffff) == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the carry chain  SEE NOTE AT TOP. */
+	cc = (s & d) | ((~res) & (s | d));
+	CONDITIONAL_SET_FLAG(XOR2(cc >> 14), F_OF);
+	CONDITIONAL_SET_FLAG(cc & 0x8, F_AF);
+	return (uint16_t)res;
+}
+
+uint32_t adc32(x64emu_t *emu, uint32_t d, uint32_t s)
+{
+	uint32_t lo;	/* all operands in native machine order */
+	uint32_t hi;
+	uint32_t res;
+	uint32_t cc;
+
+	CHECK_FLAGS(emu);
+
+	if (ACCESS_FLAG(F_CF)) {
+		lo = 1 + (d & 0xFFFF) + (s & 0xFFFF);
+		res = 1 + d + s;
+		}
+	else {
+		lo = (d & 0xFFFF) + (s & 0xFFFF);
+		res = d + s;
+		}
+	hi = (lo >> 16) + (d >> 16) + (s >> 16);
+
+	CONDITIONAL_SET_FLAG(hi & 0x10000, F_CF);
+	CONDITIONAL_SET_FLAG(!res, F_ZF);
+	CONDITIONAL_SET_FLAG(res & 0x80000000, F_SF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the carry chain  SEE NOTE AT TOP. */
+	cc = (s & d) | ((~res) & (s | d));
+	CONDITIONAL_SET_FLAG(XOR2(cc >> 30), F_OF);
+	CONDITIONAL_SET_FLAG(cc & 0x8, F_AF);
+	return res;
+}
+
+uint64_t adc64(x64emu_t *emu, uint64_t d, uint64_t s)
+{
+	uint64_t lo;	/* all operands in native machine order */
+	uint64_t hi;
+	uint64_t res;
+	uint64_t cc;
+
+	CHECK_FLAGS(emu);
+
+	if (ACCESS_FLAG(F_CF)) {
+		lo = 1 + (d & 0xFFFFFFFF) + (s & 0xFFFFFFFF);
+		res = 1 + d + s;
+		}
+	else {
+		lo = (d & 0xFFFFFFFF) + (s & 0xFFFFFFFF);
+		res = d + s;
+		}
+	hi = (lo >> 32) + (d >> 32) + (s >> 32);
+
+	CONDITIONAL_SET_FLAG(hi & 0x100000000L, F_CF);
+	CONDITIONAL_SET_FLAG(!res, F_ZF);
+	CONDITIONAL_SET_FLAG(res & 0x8000000000000000L, F_SF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the carry chain  SEE NOTE AT TOP. */
+	cc = (s & d) | ((~res) & (s | d));
+	CONDITIONAL_SET_FLAG(XOR2(cc >> 62), F_OF);
+	CONDITIONAL_SET_FLAG(cc & 0x8, F_AF);
+	return res;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the CMP instruction and side effects.
+****************************************************************************/
+uint8_t cmp8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	uint32_t res;   /* all operands in native machine order */
+	uint32_t bc;
+
+	RESET_FLAGS(emu);
+
+	res = d - s;
+	CLEAR_FLAG(F_CF);
+	CONDITIONAL_SET_FLAG(res & 0x80, F_SF);
+	CONDITIONAL_SET_FLAG((res & 0xff) == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the borrow chain.  See note at top */
+	bc = (res & (~d | s)) | (~d & s);
+	CONDITIONAL_SET_FLAG(bc & 0x80, F_CF);
+	CONDITIONAL_SET_FLAG(XOR2(bc >> 6), F_OF);
+	CONDITIONAL_SET_FLAG(bc & 0x8, F_AF);
+	return d;
+}
+
+uint16_t cmp16(x64emu_t *emu, uint16_t d, uint16_t s)
+{
+	uint32_t res;   /* all operands in native machine order */
+	uint32_t bc;
+
+	RESET_FLAGS(emu);
+
+	res = d - s;
+	CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
+	CONDITIONAL_SET_FLAG((res & 0xffff) == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the borrow chain.  See note at top */
+    bc = (res & (~d | s)) | (~d & s);
+	CONDITIONAL_SET_FLAG(bc & 0x8000, F_CF);
+	CONDITIONAL_SET_FLAG(XOR2(bc >> 14), F_OF);
+	CONDITIONAL_SET_FLAG(bc & 0x8, F_AF);
+	return d;
+}
+
+uint32_t cmp32(x64emu_t *emu, uint32_t d, uint32_t s)
+{
+	uint32_t res;   /* all operands in native machine order */
+	uint32_t bc;
+
+	RESET_FLAGS(emu);
+
+	res = d - s;
+	CONDITIONAL_SET_FLAG(res & 0x80000000, F_SF);
+	CONDITIONAL_SET_FLAG(!res, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the borrow chain.  See note at top */
+	bc = (res & (~d | s)) | (~d & s);
+	CONDITIONAL_SET_FLAG(bc & 0x80000000, F_CF);
+	CONDITIONAL_SET_FLAG(XOR2(bc >> 30), F_OF);
+	CONDITIONAL_SET_FLAG(bc & 0x8, F_AF);
+	return d;
+}
+
+uint64_t cmp64(x64emu_t *emu, uint64_t d, uint64_t s)
+{
+	uint64_t res;   /* all operands in native machine order */
+	uint64_t bc;
+
+	RESET_FLAGS(emu);
+
+	res = d - s;
+	CONDITIONAL_SET_FLAG(res & 0x8000000000000000L, F_SF);
+	CONDITIONAL_SET_FLAG(!res, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the borrow chain.  See note at top */
+	bc = (res & (~d | s)) | (~d & s);
+	CONDITIONAL_SET_FLAG(bc & 0x8000000000000000L, F_CF);
+	CONDITIONAL_SET_FLAG(XOR2(bc >> 62), F_OF);
+	CONDITIONAL_SET_FLAG(bc & 0x8, F_AF);
+	return d;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the DAA instruction and side effects.
+****************************************************************************/
+uint8_t daa8(x64emu_t *emu, uint8_t d)
+{
+	uint32_t res = d;
+	CHECK_FLAGS(emu);
+	if ((d & 0xf) > 9 || ACCESS_FLAG(F_AF)) {
+		res += 6;
+		SET_FLAG(F_AF);
+	}
+	if (d > 0x99 || ACCESS_FLAG(F_CF)) {
+		res += 0x60;
+		SET_FLAG(F_CF);
+	} else
+		CLEAR_FLAG(F_CF);
+	CONDITIONAL_SET_FLAG(res & 0x80, F_SF);
+	CONDITIONAL_SET_FLAG((res & 0xFF) == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	return (uint8_t)res;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the DAS instruction and side effects.
+****************************************************************************/
+uint8_t das8(x64emu_t *emu, uint8_t d)
+{
+	uint32_t res = d;
+	CHECK_FLAGS(emu);
+	uint32_t newcf = 0;
+	if ((d & 0xf) > 9 || ACCESS_FLAG(F_AF)) {
+		res -= 6;
+		newcf = (d < 6);
+		SET_FLAG(F_AF);
+	} else
+		CLEAR_FLAG(F_AF);
+	if (d > 0x99 || ACCESS_FLAG(F_CF)) {
+		res -= 0x60;
+		newcf = 1;
+	}
+	CONDITIONAL_SET_FLAG(newcf, F_CF);
+	CONDITIONAL_SET_FLAG(res & 0x80, F_SF);
+	CONDITIONAL_SET_FLAG((res & 0xFF) == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	return res;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the RCL instruction and side effects.
+****************************************************************************/
+uint8_t rcl8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+    unsigned int res, cnt, mask, cf;
+	CHECK_FLAGS(emu);
+	s = s&0x1f;
+
+    /* s is the rotate distance.  It varies from 0 - 8. */
+	/* have
+
+       CF  B_7 B_6 B_5 B_4 B_3 B_2 B_1 B_0 
+
+       want to rotate through the carry by "s" bits.  We could 
+       loop, but that's inefficient.  So the width is 9,
+       and we split into three parts:
+
+       The new carry flag   (was B_n)
+       the stuff in B_n-1 .. B_0
+       the stuff in B_7 .. B_n+1
+
+       The new rotate is done mod 9, and given this,
+       for a rotation of n bits (mod 9) the new carry flag is
+       then located n bits from the MSB.  The low part is 
+       then shifted up cnt bits, and the high part is or'd
+       in.  Using CAPS for new values, and lowercase for the 
+       original values, this can be expressed as:
+
+       IF n > 0 
+       1) CF <-  b_(8-n)
+       2) B_(7) .. B_(n)  <-  b_(8-(n+1)) .. b_0
+       3) B_(n-1) <- cf
+       4) B_(n-2) .. B_0 <-  b_7 .. b_(8-(n-1))
+	 */
+	res = d;
+	if ((cnt = s % 9) != 0) {
+        /* extract the new CARRY FLAG. */
+        /* CF <-  b_(8-n)             */
+        cf = (d >> (8 - cnt)) & 0x1;
+
+        /* get the low stuff which rotated 
+           into the range B_7 .. B_cnt */
+        /* B_(7) .. B_(n)  <-  b_(8-(n+1)) .. b_0  */
+        /* note that the right hand side done by the mask */
+		res = (d << cnt) & 0xff;
+
+        /* now the high stuff which rotated around 
+           into the positions B_cnt-2 .. B_0 */
+        /* B_(n-2) .. B_0 <-  b_7 .. b_(8-(n-1)) */
+        /* shift it downward, 7-(n-2) = 9-n positions. 
+           and mask off the result before or'ing in. 
+         */
+        mask = (1 << (cnt - 1)) - 1;
+        res |= (d >> (9 - cnt)) & mask;
+
+        /* if the carry flag was set, or it in.  */
+		if (ACCESS_FLAG(F_CF)) {     /* carry flag is set */
+            /*  B_(n-1) <- cf */
+            res |= 1 << (cnt - 1);
+        }
+        /* set the new carry flag, based on the variable "cf" */
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+        /* OVERFLOW is set *IFF* cnt==1, then it is the 
+           xor of CF and the most significant bit.  Blecck. */
+        /* parenthesized this expression since it appears to
+           be causing OF to be misset */
+        CONDITIONAL_SET_FLAG(cnt == 1 && XOR2(cf + ((res >> 6) & 0x2)),
+							 F_OF);
+
+    }
+	return (uint8_t)res;
+}
+
+uint16_t rcl16(x64emu_t *emu, uint16_t d, uint8_t s)
+{
+	unsigned int res, cnt, mask, cf;
+	CHECK_FLAGS(emu);
+	s = s&0x1f;
+
+	res = d;
+	if ((cnt = s % 17) != 0) {
+		cf = (d >> (16 - cnt)) & 0x1;
+		res = (d << cnt) & 0xffff;
+		mask = (1 << (cnt - 1)) - 1;
+		res |= (d >> (17 - cnt)) & mask;
+		if (ACCESS_FLAG(F_CF)) {
+			res |= 1 << (cnt - 1);
+		}
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		CONDITIONAL_SET_FLAG(cnt == 1 && XOR2(cf + ((res >> 14) & 0x2)),
+							 F_OF);
+	}
+	return (uint16_t)res;
+}
+
+uint32_t rcl32(x64emu_t *emu, uint32_t d, uint8_t s)
+{
+	uint32_t res, cnt, mask, cf;
+	CHECK_FLAGS(emu);
+	s = s&0x1f;
+
+	res = d;
+	if ((cnt = s % 33) != 0) {
+		cf = (d >> (32 - cnt)) & 0x1;
+		res = (d << cnt);
+		mask = (1 << (cnt - 1)) - 1;
+		res |= (d >> (33 - cnt)) & mask;
+		if (ACCESS_FLAG(F_CF)) {     /* carry flag is set */
+			res |= 1 << (cnt - 1);
+		}
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		CONDITIONAL_SET_FLAG(cnt == 1 && XOR2(cf + ((res >> 30) & 0x2)),
+							 F_OF);
+	}
+	return res;
+}
+
+uint32_t rcl64(x64emu_t *emu, uint64_t d, uint8_t s)
+{
+	uint64_t res, cnt, mask, cf;
+	CHECK_FLAGS(emu);
+	s = s&0x3f;
+
+	res = d;
+	if ((cnt = s % 65) != 0) {
+		cf = (d >> (64 - cnt)) & 0x1;
+		res = (d << cnt);
+		mask = (1 << (cnt - 1)) - 1;
+		res |= (d >> (65 - cnt)) & mask;
+		if (ACCESS_FLAG(F_CF)) {     /* carry flag is set */
+			res |= 1 << (cnt - 1);
+		}
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		CONDITIONAL_SET_FLAG(cnt == 1 && XOR2(cf + ((res >> 62) & 0x2)),
+							 F_OF);
+	}
+	return res;
+}
+/****************************************************************************
+REMARKS:
+Implements the RCR instruction and side effects.
+****************************************************************************/
+uint8_t rcr8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	uint32_t	res, cnt;
+	uint32_t	mask, cf, ocf = 0;
+	CHECK_FLAGS(emu);
+	s = s&0x1f;
+
+	/* rotate right through carry */
+    /* 
+       s is the rotate distance.  It varies from 0 - 8.
+       d is the byte object rotated.  
+
+       have 
+
+       CF  B_7 B_6 B_5 B_4 B_3 B_2 B_1 B_0 
+
+       The new rotate is done mod 9, and given this,
+       for a rotation of n bits (mod 9) the new carry flag is
+       then located n bits from the LSB.  The low part is 
+       then shifted up cnt bits, and the high part is or'd
+       in.  Using CAPS for new values, and lowercase for the 
+       original values, this can be expressed as:
+
+       IF n > 0 
+       1) CF <-  b_(n-1)
+       2) B_(8-(n+1)) .. B_(0)  <-  b_(7) .. b_(n)
+       3) B_(8-n) <- cf
+       4) B_(7) .. B_(8-(n-1)) <-  b_(n-2) .. b_(0)
+	 */
+	res = d;
+	if ((cnt = s % 9) != 0) {
+        /* extract the new CARRY FLAG. */
+        /* CF <-  b_(n-1)              */
+        if (cnt == 1) {
+            cf = d & 0x1;
+            /* note hackery here.  Access_flag(..) evaluates to either
+               0 if flag not set
+               non-zero if flag is set.
+               doing access_flag(..) != 0 casts that into either 
+			   0..1 in any representation of the flags register
+               (i.e. packed bit array or unpacked.)
+             */
+			ocf = ACCESS_FLAG(F_CF) != 0;
+        } else
+            cf = (d >> (cnt - 1)) & 0x1;
+
+        /* B_(8-(n+1)) .. B_(0)  <-  b_(7) .. b_n  */
+        /* note that the right hand side done by the mask
+           This is effectively done by shifting the 
+           object to the right.  The result must be masked,
+           in case the object came in and was treated 
+           as a negative number.  Needed??? */
+
+        mask = (1 << (8 - cnt)) - 1;
+        res = (d >> cnt) & mask;
+
+        /* now the high stuff which rotated around 
+           into the positions B_cnt-2 .. B_0 */
+        /* B_(7) .. B_(8-(n-1)) <-  b_(n-2) .. b_(0) */
+        /* shift it downward, 7-(n-2) = 9-n positions. 
+           and mask off the result before or'ing in. 
+         */
+        res |= (d << (9 - cnt));
+
+        /* if the carry flag was set, or it in.  */
+		if (ACCESS_FLAG(F_CF)) {     /* carry flag is set */
+            /*  B_(8-n) <- cf */
+            res |= 1 << (8 - cnt);
+        }
+        /* set the new carry flag, based on the variable "cf" */
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+        /* OVERFLOW is set *IFF* cnt==1, then it is the 
+           xor of CF and the most significant bit.  Blecck. */
+        /* parenthesized... */
+		if (cnt == 1) {
+			CONDITIONAL_SET_FLAG(XOR2(ocf + ((d >> 6) & 0x2)),
+								 F_OF);
+		}
+	}
+	return (uint8_t)res;
+}
+
+uint16_t rcr16(x64emu_t *emu, uint16_t d, uint8_t s)
+{
+	uint32_t res, cnt;
+	uint32_t	mask, cf, ocf = 0;
+	CHECK_FLAGS(emu);
+	s = s&0x1f;
+
+	/* rotate right through carry */
+	res = d;
+	if ((cnt = s % 17) != 0) {
+		if (cnt == 1) {
+			cf = d & 0x1;
+			ocf = ACCESS_FLAG(F_CF) != 0;
+		} else
+			cf = (d >> (cnt - 1)) & 0x1;
+		mask = (1 << (16 - cnt)) - 1;
+		res = (d >> cnt) & mask;
+		res |= (d << (17 - cnt));
+		if (ACCESS_FLAG(F_CF)) {
+			res |= 1 << (16 - cnt);
+		}
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		if (cnt == 1) {
+			CONDITIONAL_SET_FLAG(XOR2(ocf + ((d >> 14) & 0x2)),
+								 F_OF);
+		}
+	}
+	return (uint16_t)res;
+}
+
+uint32_t rcr32(x64emu_t *emu, uint32_t d, uint8_t s)
+{
+	uint32_t res, cnt;
+	uint32_t mask, cf, ocf = 0;
+	CHECK_FLAGS(emu);
+	s = s&0x1f;
+
+	/* rotate right through carry */
+	res = d;
+	if ((cnt = s % 33) != 0) {
+		if (cnt == 1) {
+			cf = d & 0x1;
+			ocf = ACCESS_FLAG(F_CF) != 0;
+		} else
+			cf = (d >> (cnt - 1)) & 0x1;
+		mask = (1 << (32 - cnt)) - 1;
+		res = (d >> cnt) & mask;
+		if (cnt != 1)
+			res |= (d << (33 - cnt));
+		if (ACCESS_FLAG(F_CF)) {     /* carry flag is set */
+			res |= 1 << (32 - cnt);
+		}
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		if (cnt == 1) {
+			CONDITIONAL_SET_FLAG(XOR2(ocf + ((d >> 30) & 0x2)),
+								 F_OF);
+		}
+	}
+	return res;
+}
+
+uint64_t rcr64(x64emu_t *emu, uint64_t d, uint8_t s)
+{
+	uint64_t res, cnt;
+	uint64_t mask, cf, ocf = 0;
+	CHECK_FLAGS(emu);
+	s = s&0x3f;
+
+	/* rotate right through carry */
+	res = d;
+	if ((cnt = s % 65) != 0) {
+		if (cnt == 1) {
+			cf = d & 0x1;
+			ocf = ACCESS_FLAG(F_CF) != 0;
+		} else
+			cf = (d >> (cnt - 1)) & 0x1;
+		mask = (1 << (64 - cnt)) - 1;
+		res = (d >> cnt) & mask;
+		if (cnt != 1)
+			res |= (d << (65 - cnt));
+		if (ACCESS_FLAG(F_CF)) {     /* carry flag is set */
+			res |= 1 << (64 - cnt);
+		}
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		if (cnt == 1) {
+			CONDITIONAL_SET_FLAG(XOR2(ocf + ((d >> 62) & 0x2)),
+								 F_OF);
+		}
+	}
+	return res;
+}
+/****************************************************************************
+REMARKS:
+Implements the ROL instruction and side effects.
+****************************************************************************/
+uint8_t rol8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	unsigned cnt;
+
+	s = s&0x1f;
+	if(!s) return d;
+
+	if((cnt = s % 8) != 0) {
+	d = (d << cnt) + ((d >> (8 - cnt)) & ((1 << cnt) - 1));
+	}
+	RESET_FLAGS(emu);
+
+	/* OF flag is set if s == 1; OF = CF _XOR_ MSB of result */
+	if(s == 1) {
+	CONDITIONAL_SET_FLAG((d + (d >> 7)) & 1, F_OF);
+	}
+
+	/* set new CF; note that it is the LSB of the result */
+	CONDITIONAL_SET_FLAG(d & 0x1, F_CF);
+
+	return d;
+}
+
+uint16_t rol16(x64emu_t *emu, uint16_t d, uint8_t s)
+{
+	unsigned cnt;
+
+	s = s&0x1f;
+	if(!s) return d;
+
+	if((cnt = s % 16) != 0) {
+	d = (d << cnt) + ((d >> (16 - cnt)) & ((1 << cnt) - 1));
+	}
+	RESET_FLAGS(emu);
+
+	/* OF flag is set if s == 1; OF = CF _XOR_ MSB of result */
+	if(s == 1) {
+	CONDITIONAL_SET_FLAG((d + (d >> 15)) & 1, F_OF);
+	}
+
+	/* set new CF; note that it is the LSB of the result */
+	CONDITIONAL_SET_FLAG(d & 0x1, F_CF);
+
+	return d;
+}
+
+uint32_t rol32(x64emu_t *emu, uint32_t d, uint8_t s)
+{
+	unsigned cnt;
+
+	s = s&0x1f;
+	if(!s) return d;
+
+	if((cnt = s % 32) != 0) {
+	d = (d << cnt) + ((d >> (32 - cnt)) & ((1 << cnt) - 1));
+	}
+	RESET_FLAGS(emu);
+
+	/* OF flag is set if s == 1; OF = CF _XOR_ MSB of result */
+	if(s == 1) {
+	CONDITIONAL_SET_FLAG((d + (d >> 31)) & 1, F_OF);
+	}
+
+	/* set new CF; note that it is the LSB of the result */
+	CONDITIONAL_SET_FLAG(d & 0x1, F_CF);
+
+	return d;
+}
+
+uint64_t rol64(x64emu_t *emu, uint64_t d, uint8_t s)
+{
+	unsigned cnt;
+
+	s = s&0x3f;
+	if(!s) return d;
+
+	if((cnt = s % 64) != 0) {
+	d = (d << cnt) + ((d >> (64 - cnt)) & ((1L << cnt) - 1));
+	}
+	RESET_FLAGS(emu);
+
+	/* OF flag is set if s == 1; OF = CF _XOR_ MSB of result */
+	if(s == 1) {
+	CONDITIONAL_SET_FLAG((d + (d >> 63)) & 1, F_OF);
+	}
+
+	/* set new CF; note that it is the LSB of the result */
+	CONDITIONAL_SET_FLAG(d & 0x1, F_CF);
+
+	return d;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the ROR instruction and side effects.
+****************************************************************************/
+uint8_t ror8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	unsigned cnt;
+
+	s = s&0x1f;
+	if(!s) return d;
+
+	if((cnt = s % 8) != 0) {
+	d = (d << (8 - cnt)) + ((d >> (cnt)) & ((1 << (8 - cnt)) - 1));
+	}
+	RESET_FLAGS(emu);
+
+	/* OF flag is set if s == 1; OF = MSB _XOR_ (M-1)SB of result */
+	if(s == 1) {
+	CONDITIONAL_SET_FLAG(XOR2(d >> 6), F_OF);
+	}
+
+	/* set new CF; note that it is the MSB of the result */
+	CONDITIONAL_SET_FLAG(d & (1 << 7), F_CF);
+
+	return d;
+}
+
+uint16_t ror16(x64emu_t *emu, uint16_t d, uint8_t s)
+{
+	unsigned cnt;
+
+	s = s&0x1f;
+	if(!s) return d;
+
+	if((cnt = s % 16) != 0) {
+	d = (d << (16 - cnt)) + ((d >> (cnt)) & ((1 << (16 - cnt)) - 1));
+	}
+	RESET_FLAGS(emu);
+
+	/* OF flag is set if s == 1; OF = MSB _XOR_ (M-1)SB of result */
+	if(s == 1) {
+	CONDITIONAL_SET_FLAG(XOR2(d >> 14), F_OF);
+	}
+
+	/* set new CF; note that it is the MSB of the result */
+	CONDITIONAL_SET_FLAG(d & (1 << 15), F_CF);
+
+	return d;
+}
+
+uint32_t ror32(x64emu_t *emu, uint32_t d, uint8_t s)
+{
+	unsigned cnt;
+
+	s = s&0x1f;
+	if(!s) return d;
+
+	if((cnt = s % 32) != 0) {
+	d = (d << (32 - cnt)) + ((d >> (cnt)) & ((1 << (32 - cnt)) - 1));
+	}
+	RESET_FLAGS(emu);
+
+	/* OF flag is set if s == 1; OF = MSB _XOR_ (M-1)SB of result */
+	if(s == 1) {
+	CONDITIONAL_SET_FLAG(XOR2(d >> 30), F_OF);
+	}
+
+	/* set new CF; note that it is the MSB of the result */
+	CONDITIONAL_SET_FLAG(d & (1 << 31), F_CF);
+
+	return d;
+}
+
+uint64_t ror64(x64emu_t *emu, uint64_t d, uint8_t s)
+{
+	unsigned cnt;
+
+	s = s&0x3f;
+	if(!s) return d;
+
+	if((cnt = s % 64) != 0) {
+	d = (d << (64 - cnt)) + ((d >> (cnt)) & ((1L << (64 - cnt)) - 1L));
+	}
+	RESET_FLAGS(emu);
+
+	/* OF flag is set if s == 1; OF = MSB _XOR_ (M-1)SB of result */
+	if(s == 1) {
+	CONDITIONAL_SET_FLAG(XOR2(d >> 62), F_OF);
+	}
+
+	/* set new CF; note that it is the MSB of the result */
+	CONDITIONAL_SET_FLAG(d & (1L << 63), F_CF);
+
+	return d;
+}
+/****************************************************************************
+REMARKS:
+Implements the SHLD instruction and side effects.
+****************************************************************************/
+uint16_t shld16 (x64emu_t *emu, uint16_t d, uint16_t fill, uint8_t s)
+{
+	unsigned int cnt, res, cf;
+	RESET_FLAGS(emu);
+
+	s = s&0x1f;
+	cnt = s % 16;
+	if (s < 16) {
+		if (cnt > 0) {
+			res = (d << cnt) | (fill >> (16-cnt));
+			cf = d & (1 << (16 - cnt));
+			CONDITIONAL_SET_FLAG(cf, F_CF);
+			CONDITIONAL_SET_FLAG((res & 0xffff) == 0, F_ZF);
+			CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
+			CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+		} else {
+			res = d;
+		}
+		if (cnt == 1) {
+			CONDITIONAL_SET_FLAG((((res & 0x8000) == 0x8000) ^
+								  (ACCESS_FLAG(F_CF) != 0)), F_OF);
+		} else {
+			CLEAR_FLAG(F_OF);
+		}
+	} else {
+		res = (fill << (cnt)) | (d >> (16 - cnt));
+		if(s==16)
+			cf = d & 1;
+		else
+			cf = fill & (1 << (16 - cnt));
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		CONDITIONAL_SET_FLAG((res & 0xffff) == 0, F_ZF);
+		CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
+		CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+		CLEAR_FLAG(F_OF);
+	}
+	return (uint16_t)res;
+}
+
+uint32_t shld32 (x64emu_t *emu, uint32_t d, uint32_t fill, uint8_t s)
+{
+	unsigned int cnt, res, cf;
+	RESET_FLAGS(emu);
+
+	s = s&0x1f;
+	cnt = s % 32;
+	if (cnt > 0) {
+		res = (d << cnt) | (fill >> (32-cnt));
+		cf = d & (1 << (32 - cnt));
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		CONDITIONAL_SET_FLAG(!res, F_ZF);
+		CONDITIONAL_SET_FLAG(res & 0x80000000, F_SF);
+		CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	} else {
+		res = d;
+	}
+	if (cnt == 1) {
+		CONDITIONAL_SET_FLAG((((res & 0x80000000) == 0x80000000) ^
+								(ACCESS_FLAG(F_CF) != 0)), F_OF);
+	} else {
+		CLEAR_FLAG(F_OF);
+	}
+	return res;
+}
+
+uint64_t shld64 (x64emu_t *emu, uint64_t d, uint64_t fill, uint8_t s)
+{
+	unsigned int cnt, res, cf;
+	RESET_FLAGS(emu);
+
+	s = s&0x3f;
+	cnt = s % 64;
+	if (cnt > 0) {
+		res = (d << cnt) | (fill >> (64-cnt));
+		cf = d & (1 << (64 - cnt));
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		CONDITIONAL_SET_FLAG(!res, F_ZF);
+		CONDITIONAL_SET_FLAG(res & 0x8000000000000000L, F_SF);
+		CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	} else {
+		res = d;
+	}
+	if (cnt == 1) {
+		CONDITIONAL_SET_FLAG((((res & 0x8000000000000000L) == 0x8000000000000000L) ^
+								(ACCESS_FLAG(F_CF) != 0)), F_OF);
+	} else {
+		CLEAR_FLAG(F_OF);
+	}
+	return res;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the SHRD instruction and side effects.
+****************************************************************************/
+uint16_t shrd16 (x64emu_t *emu, uint16_t d, uint16_t fill, uint8_t s)
+{
+	unsigned int cnt, res, cf;
+	RESET_FLAGS(emu);
+
+	s = s&0x1f;
+	cnt = s % 16;
+	if (s < 16) {
+		if (cnt > 0) {
+			cf = d & (1 << (cnt - 1));
+			res = (d >> cnt) | (fill << (16 - cnt));
+			CONDITIONAL_SET_FLAG(cf, F_CF);
+			CONDITIONAL_SET_FLAG((res & 0xffff) == 0, F_ZF);
+			CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
+			CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+		} else {
+			res = d;
+		}
+
+		if (cnt == 1) {
+			CONDITIONAL_SET_FLAG(XOR2(res >> 14), F_OF);
+        } else {
+			CLEAR_FLAG(F_OF);
+        }
+	} else {
+		cf = fill & (1 << (cnt - 1));
+		res = (fill >> cnt) | (d << (16 - cnt));
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		CONDITIONAL_SET_FLAG((res & 0xffff) == 0, F_ZF);
+		CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
+		CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+		CLEAR_FLAG(F_OF);
+	#if 0
+		res = 0;
+		CLEAR_FLAG(F_CF);
+		CLEAR_FLAG(F_OF);
+		SET_FLAG(F_ZF);
+		CLEAR_FLAG(F_SF);
+		CLEAR_FLAG(F_PF);
+	#endif
+    }
+	return (uint16_t)res;
+}
+
+uint32_t shrd32 (x64emu_t *emu, uint32_t d, uint32_t fill, uint8_t s)
+{
+	unsigned int cnt, res, cf;
+	RESET_FLAGS(emu);
+
+	s = s&0x1f;
+	cnt = s % 32;
+	if (cnt > 0) {
+		cf = d & (1 << (cnt - 1));
+		res = (d >> cnt) | (fill << (32 - cnt));
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		CONDITIONAL_SET_FLAG(!res, F_ZF);
+		CONDITIONAL_SET_FLAG(res & 0x80000000, F_SF);
+		CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	} else {
+		res = d;
+	}
+	if (cnt == 1) {
+		CONDITIONAL_SET_FLAG(XOR2(res >> 30), F_OF);
+	} else {
+		CLEAR_FLAG(F_OF);
+	}
+	return res;
+}
+
+uint64_t shrd64 (x64emu_t *emu, uint64_t d, uint64_t fill, uint8_t s)
+{
+	unsigned int cnt;
+	uint64_t res, cf;
+	RESET_FLAGS(emu);
+
+	s = s&0x3f;
+	cnt = s % 64;
+	if (cnt > 0) {
+		cf = d & (1 << (cnt - 1));
+		res = (d >> cnt) | (fill << (64 - cnt));
+		CONDITIONAL_SET_FLAG(cf, F_CF);
+		CONDITIONAL_SET_FLAG(!res, F_ZF);
+		CONDITIONAL_SET_FLAG(res & 0x8000000000000000L, F_SF);
+		CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	} else {
+		res = d;
+	}
+	if (cnt == 1) {
+		CONDITIONAL_SET_FLAG(XOR2(res >> 62), F_OF);
+	} else {
+		CLEAR_FLAG(F_OF);
+	}
+	return res;
+}
+/****************************************************************************
+REMARKS:
+Implements the SBB instruction and side effects.
+****************************************************************************/
+uint8_t sbb8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+    uint32_t res;   /* all operands in native machine order */
+    uint32_t bc;
+	CHECK_FLAGS(emu);
+
+	if (ACCESS_FLAG(F_CF))
+		res = d - s - 1;
+	else
+		res = d - s;
+	CONDITIONAL_SET_FLAG(res & 0x80, F_SF);
+	CONDITIONAL_SET_FLAG((res & 0xff) == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the borrow chain.  See note at top */
+	bc = (res & (~d | s)) | (~d & s);
+	CONDITIONAL_SET_FLAG(bc & 0x80, F_CF);
+	CONDITIONAL_SET_FLAG(XOR2(bc >> 6), F_OF);
+	CONDITIONAL_SET_FLAG(bc & 0x8, F_AF);
+	return (uint8_t)res;
+}
+
+uint16_t sbb16(x64emu_t *emu, uint16_t d, uint16_t s)
+{
+    uint32_t res;   /* all operands in native machine order */
+    uint32_t bc;
+	CHECK_FLAGS(emu);
+
+	if (ACCESS_FLAG(F_CF))
+        res = d - s - 1;
+    else
+        res = d - s;
+	CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
+	CONDITIONAL_SET_FLAG((res & 0xffff) == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the borrow chain.  See note at top */
+	bc = (res & (~d | s)) | (~d & s);
+	CONDITIONAL_SET_FLAG(bc & 0x8000, F_CF);
+	CONDITIONAL_SET_FLAG(XOR2(bc >> 14), F_OF);
+	CONDITIONAL_SET_FLAG(bc & 0x8, F_AF);
+	return (uint16_t)res;
+}
+
+uint32_t sbb32(x64emu_t *emu, uint32_t d, uint32_t s)
+{
+	uint32_t res;   /* all operands in native machine order */
+	uint32_t bc;
+	CHECK_FLAGS(emu);
+
+	if (ACCESS_FLAG(F_CF))
+        res = d - s - 1;
+    else
+        res = d - s;
+	CONDITIONAL_SET_FLAG(res & 0x80000000, F_SF);
+	CONDITIONAL_SET_FLAG(!res, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the borrow chain.  See note at top */
+	bc = (res & (~d | s)) | (~d & s);
+	CONDITIONAL_SET_FLAG(bc & 0x80000000, F_CF);
+	CONDITIONAL_SET_FLAG(XOR2(bc >> 30), F_OF);
+	CONDITIONAL_SET_FLAG(bc & 0x8, F_AF);
+	return res;
+}
+
+uint64_t sbb64(x64emu_t *emu, uint64_t d, uint64_t s)
+{
+	uint64_t res;   /* all operands in native machine order */
+	uint64_t bc;
+	CHECK_FLAGS(emu);
+
+	if (ACCESS_FLAG(F_CF))
+        res = d - s - 1;
+    else
+        res = d - s;
+	CONDITIONAL_SET_FLAG(res & 0x8000000000000000L, F_SF);
+	CONDITIONAL_SET_FLAG(!res, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+
+	/* calculate the borrow chain.  See note at top */
+	bc = (res & (~d | s)) | (~d & s);
+	CONDITIONAL_SET_FLAG(bc & 0x8000000000000000L, F_CF);
+	CONDITIONAL_SET_FLAG(XOR2(bc >> 62), F_OF);
+	CONDITIONAL_SET_FLAG(bc & 0x8, F_AF);
+	return res;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the TEST instruction and side effects.
+****************************************************************************/
+void test8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+    uint32_t res;   /* all operands in native machine order */
+	RESET_FLAGS(emu);
+
+    res = d & s;
+
+	CLEAR_FLAG(F_OF);
+	CONDITIONAL_SET_FLAG(res & 0x80, F_SF);
+	CONDITIONAL_SET_FLAG(res == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+    /* AF == dont care */
+	CLEAR_FLAG(F_CF);
+}
+
+void test16(x64emu_t *emu, uint16_t d, uint16_t s)
+{
+	uint32_t res;   /* all operands in native machine order */
+	RESET_FLAGS(emu);
+
+	res = d & s;
+
+	CLEAR_FLAG(F_OF);
+	CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
+	CONDITIONAL_SET_FLAG(res == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	/* AF == dont care */
+	CLEAR_FLAG(F_CF);
+}
+
+void test32(x64emu_t *emu, uint32_t d, uint32_t s)
+{
+	uint32_t res;   /* all operands in native machine order */
+	RESET_FLAGS(emu);
+
+	res = d & s;
+
+	CLEAR_FLAG(F_OF);
+	CONDITIONAL_SET_FLAG(res & 0x80000000, F_SF);
+	CONDITIONAL_SET_FLAG(res == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	/* AF == dont care */
+	CLEAR_FLAG(F_CF);
+}
+
+void test64(x64emu_t *emu, uint64_t d, uint64_t s)
+{
+	uint64_t res;   /* all operands in native machine order */
+	RESET_FLAGS(emu);
+
+	res = d & s;
+
+	CLEAR_FLAG(F_OF);
+	CONDITIONAL_SET_FLAG(res & 0x8000000000000000L, F_SF);
+	CONDITIONAL_SET_FLAG(res == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
+	/* AF == dont care */
+	CLEAR_FLAG(F_CF);
+}
+
+/****************************************************************************
+REMARKS:
+Implements the IDIV instruction and side effects.
+****************************************************************************/
+void idiv8(x64emu_t *emu, uint8_t s)
+{
+    int32_t dvd, quot, mod;
+	RESET_FLAGS(emu);
+
+	dvd = (int16_t)R_AX;
+	if (s == 0) {
+		INTR_RAISE_DIV0(emu);
+        return;
+	}
+	div_t p = div(dvd, (int8_t)s);
+	quot = p.quot;
+	mod = p.rem;
+	if (abs(quot) > 0x7f) {
+		INTR_RAISE_DIV0(emu);
+		return;
+	}
+	R_AL = (int8_t) quot;
+	R_AH = (int8_t) mod;
+}
+
+void idiv16(x64emu_t *emu, uint16_t s)
+{
+	int32_t dvd, quot, mod;
+
+	dvd = (((int32_t)R_DX) << 16) | R_AX;
+	if (s == 0) {
+		INTR_RAISE_DIV0(emu);
+		return;
+	}
+	div_t p = div(dvd, (int16_t)s);
+	quot = p.quot;
+	mod = p.rem;
+	if (abs(quot) > 0x7fff) {
+		INTR_RAISE_DIV0(emu);
+		return;
+	}
+	CLEAR_FLAG(F_CF);
+	CLEAR_FLAG(F_SF);
+	CONDITIONAL_SET_FLAG(quot == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(mod & 0xff), F_PF);
+
+	R_AX = (uint16_t)quot;
+	R_DX = (uint16_t)mod;
+}
+
+void idiv32(x64emu_t *emu, uint32_t s)
+{
+	int64_t dvd, quot, mod;
+	RESET_FLAGS(emu);
+
+	dvd = (((int64_t)R_EDX) << 32) | R_EAX;
+	if (s == 0) {
+		INTR_RAISE_DIV0(emu);
+		return;
+	}
+	lldiv_t p = lldiv(dvd, (int32_t)s);
+	quot = p.quot;
+	mod = p.rem;
+	if (llabs(quot) > 0x7fffffff) {
+		INTR_RAISE_DIV0(emu);
+		return;
+	}
+	CLEAR_FLAG(F_CF);
+	CLEAR_FLAG(F_AF);
+	CLEAR_FLAG(F_SF);
+	SET_FLAG(F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(mod & 0xff), F_PF);
+
+	R_EAX = (uint32_t)quot;
+	R_EDX = (uint32_t)mod;
+}
+
+/****************************************************************************
+REMARKS:
+Implements the DIV instruction and side effects.
+****************************************************************************/
+void div8(x64emu_t *emu, uint8_t s)
+{
+	uint32_t dvd, div, mod;
+	RESET_FLAGS(emu);
+
+	dvd = R_AX;
+    if (s == 0) {
+		INTR_RAISE_DIV0(emu);
+        return;
+    }
+	div = dvd / (uint8_t)s;
+	mod = dvd % (uint8_t)s;
+	if (div > 0xff) {
+		INTR_RAISE_DIV0(emu);
+        return;
+	}
+	R_AL = (uint8_t)div;
+	R_AH = (uint8_t)mod;
+}
+
+void div16(x64emu_t *emu, uint16_t s)
+{
+	uint32_t dvd, div, mod;
+	RESET_FLAGS(emu);
+
+	dvd = (((uint32_t)R_DX) << 16) | R_AX;
+	if (s == 0) {
+		INTR_RAISE_DIV0(emu);
+        return;
+    }
+	div = dvd / (uint16_t)s;
+	mod = dvd % (uint16_t)s;
+	if (div > 0xffff) {
+		INTR_RAISE_DIV0(emu);
+		return;
+	}
+	CLEAR_FLAG(F_CF);
+	CLEAR_FLAG(F_SF);
+	CONDITIONAL_SET_FLAG(div == 0, F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(mod & 0xff), F_PF);
+
+	R_AX = (uint16_t)div;
+	R_DX = (uint16_t)mod;
+}
+
+void div32(x64emu_t *emu, uint32_t s)
+{
+	uint64_t dvd, div, mod;
+	RESET_FLAGS(emu);
+
+	dvd = (((uint64_t)R_EDX) << 32) | R_EAX;
+	if (s == 0) {
+		INTR_RAISE_DIV0(emu);
+		return;
+	}
+	div = dvd / (uint32_t)s;
+	mod = dvd % (uint32_t)s;
+	if (div > 0xffffffff) {
+		INTR_RAISE_DIV0(emu);
+		return;
+	}
+	CLEAR_FLAG(F_CF);
+	CLEAR_FLAG(F_AF);
+	CLEAR_FLAG(F_SF);
+	SET_FLAG(F_ZF);
+	CONDITIONAL_SET_FLAG(PARITY(mod & 0xff), F_PF);
+
+	R_EAX = (uint32_t)div;
+	R_EDX = (uint32_t)mod;
+}
diff --git a/src/emu/x64primop.h b/src/emu/x64primop.h
new file mode 100755
index 00000000..377ae9aa
--- /dev/null
+++ b/src/emu/x64primop.h
@@ -0,0 +1,576 @@
+#ifndef __X64PRIMOP_H_
+#define __X64PRIMOP_H_
+
+typedef struct x64emu_s x64emu_t;
+
+// Based on libx86emu
+
+uint16_t     aaa16 (x64emu_t *emu, uint16_t d);
+uint16_t     aas16 (x64emu_t *emu, uint16_t d);
+uint16_t     aad16 (x64emu_t *emu, uint16_t d, uint8_t base);
+uint16_t     aam16 (x64emu_t *emu, uint8_t d, uint8_t base);
+uint8_t      adc8  (x64emu_t *emu, uint8_t d, uint8_t s);
+uint16_t     adc16 (x64emu_t *emu, uint16_t d, uint16_t s);
+uint32_t     adc32 (x64emu_t *emu, uint32_t d, uint32_t s);
+uint64_t     adc64 (x64emu_t *emu, uint64_t d, uint64_t s);
+
+static inline uint8_t add8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	emu->res = d + s;
+	emu->op1 = d;
+	emu->op2 = s;
+	emu->df = d_add8;
+	return (uint8_t)emu->res;
+}
+
+static inline uint16_t add16(x64emu_t *emu, uint16_t d, uint16_t s)
+{
+	emu->res = d + s;
+	emu->op1 = d;
+	emu->op2 = s;
+	emu->df = d_add16;
+	return (uint16_t)emu->res;
+}
+
+static inline uint32_t add32(x64emu_t *emu, uint32_t d, uint32_t s)
+{
+	emu->res = d + s;
+	emu->op1 = d;
+	emu->op2 = s;
+	emu->df = d_add32;
+    return emu->res;
+}
+
+static inline uint64_t add64(x64emu_t *emu, uint64_t d, uint64_t s)
+{
+	emu->res = d + s;
+	emu->op1 = d;
+	emu->op2 = s;
+	emu->df = d_add64;
+    return emu->res;
+}
+
+static inline uint8_t and8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	emu->res = d & s;
+	emu->df = d_and8;
+
+	return emu->res;
+}
+
+static inline uint16_t and16(x64emu_t *emu, uint16_t d, uint16_t s)
+{
+    emu->res = d & s;
+	emu->df = d_and16;
+
+    return emu->res;
+}
+
+static inline uint32_t and32(x64emu_t *emu, uint32_t d, uint32_t s)
+{
+	emu->res = d & s;
+	emu->df = d_and32;
+
+	return emu->res;
+}
+
+static inline uint64_t and64(x64emu_t *emu, uint64_t d, uint64_t s)
+{
+	emu->res = d & s;
+	emu->df = d_and64;
+
+	return emu->res;
+}
+
+uint8_t      cmp8  (x64emu_t *emu, uint8_t d, uint8_t s);
+uint16_t     cmp16 (x64emu_t *emu, uint16_t d, uint16_t s);
+uint32_t     cmp32 (x64emu_t *emu, uint32_t d, uint32_t s);
+uint64_t     cmp64 (x64emu_t *emu, uint64_t d, uint64_t s);
+uint8_t      daa8  (x64emu_t *emu, uint8_t d);
+uint8_t      das8  (x64emu_t *emu, uint8_t d);
+
+static inline uint8_t dec8(x64emu_t *emu, uint8_t d)
+{
+    emu->res = d - 1;
+	emu->op1 = d;
+	emu->df = d_dec8;
+	return (uint8_t)emu->res;
+}
+
+static inline uint16_t dec16(x64emu_t *emu, uint16_t d)
+{
+    emu->res = d - 1;
+	emu->op1 = d;
+	emu->df = d_dec16;
+	return (uint16_t)emu->res;
+
+}
+
+static inline uint32_t dec32(x64emu_t *emu, uint32_t d)
+{
+    emu->res = d - 1;
+	emu->op1 = d;
+	emu->df = d_dec32;
+
+	return emu->res;
+}
+
+static inline uint64_t dec64(x64emu_t *emu, uint64_t d)
+{
+    emu->res = d - 1;
+	emu->op1 = d;
+	emu->df = d_dec64;
+
+	return emu->res;
+}
+
+static inline uint8_t inc8(x64emu_t *emu, uint8_t d)
+{
+	emu->res = d + 1;
+	emu->op1 = d;
+	emu->df = d_inc8;
+	return (uint8_t)emu->res;
+}
+
+static inline uint16_t inc16(x64emu_t *emu, uint16_t d)
+{
+	emu->res = d + 1;
+	emu->op1 = d;
+	emu->df = d_inc16;
+	return (uint16_t)emu->res;
+}
+
+static inline uint32_t inc32(x64emu_t *emu, uint32_t d)
+{
+	if(emu->df == d_shr32) {
+		// workaround for some wine trickery
+		uint32_t cnt = emu->op2;
+        if (cnt > 0) {
+            uint32_t cc = emu->op1 & (1 << (cnt - 1));
+			CONDITIONAL_SET_FLAG(cc, F_CF);
+		}
+	}
+	emu->res = d + 1;
+	emu->op1 = d;
+	emu->df = d_inc32;
+	return emu->res;
+}
+
+static inline uint64_t inc64(x64emu_t *emu, uint64_t d)
+{
+	if(emu->df == d_shr64) {
+		// workaround for some wine trickery
+		uint64_t cnt = emu->op2;
+        if (cnt > 0) {
+            uint64_t cc = emu->op1 & (1L << (cnt - 1));
+			CONDITIONAL_SET_FLAG(cc, F_CF);
+		}
+	}
+	emu->res = d + 1;
+	emu->op1 = d;
+	emu->df = d_inc64;
+	return emu->res;
+}
+
+static inline uint8_t or8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	emu->res = d | s;
+	emu->df = d_or8;
+	return emu->res;
+}
+
+static inline uint16_t or16(x64emu_t *emu, uint16_t d, uint16_t s)
+{
+	emu->res = d | s;
+	emu->df = d_or16;
+	/* set the carry flag to be bit 8 */
+	return emu->res;
+}
+
+static inline uint32_t or32(x64emu_t *emu, uint32_t d, uint32_t s)
+{
+	emu->res = d | s;
+	emu->df = d_or32;
+	return emu->res;
+}
+
+static inline uint64_t or64(x64emu_t *emu, uint64_t d, uint64_t s)
+{
+	emu->res = d | s;
+	emu->df = d_or64;
+	return emu->res;
+}
+
+static inline uint8_t neg8(x64emu_t *emu, uint8_t s)
+{
+	emu->res = (uint8_t)-s;
+	emu->op1 = s;
+	emu->df = d_neg8;
+	return emu->res;
+}
+
+static inline uint16_t neg16(x64emu_t *emu, uint16_t s)
+{
+	emu->res = (uint16_t)-s;
+	emu->op1 = s;
+	emu->df = d_neg16;
+	return emu->res;
+}
+
+static inline uint32_t neg32(x64emu_t *emu, uint32_t s)
+{
+	emu->res = (uint32_t)-s;
+	emu->op1 = s;
+	emu->df = d_neg32;
+	return emu->res;
+}
+
+static inline uint64_t neg64(x64emu_t *emu, uint64_t s)
+{
+	emu->res = (uint64_t)-s;
+	emu->op1 = s;
+	emu->df = d_neg64;
+	return emu->res;
+}
+
+static inline uint8_t not8(x64emu_t *emu, uint8_t s)
+{
+	return ~s;
+}
+
+static inline uint16_t not16(x64emu_t *emu, uint16_t s)
+{
+	return ~s;
+}
+
+static inline uint32_t not32(x64emu_t *emu, uint32_t s)
+{
+	return ~s;
+}
+
+static inline uint64_t not64(x64emu_t *emu, uint64_t s)
+{
+	return ~s;
+}
+
+uint8_t      rcl8  (x64emu_t *emu, uint8_t d, uint8_t s);
+uint16_t     rcl16 (x64emu_t *emu, uint16_t d, uint8_t s);
+uint32_t     rcl32 (x64emu_t *emu, uint32_t d, uint8_t s);
+uint64_t     rcl64 (x64emu_t *emu, uint64_t d, uint8_t s);
+uint8_t      rcr8  (x64emu_t *emu, uint8_t d, uint8_t s);
+uint16_t     rcr16 (x64emu_t *emu, uint16_t d, uint8_t s);
+uint32_t     rcr32 (x64emu_t *emu, uint32_t d, uint8_t s);
+uint64_t     rcr64 (x64emu_t *emu, uint64_t d, uint8_t s);
+uint8_t      rol8  (x64emu_t *emu, uint8_t d, uint8_t s);
+uint16_t     rol16 (x64emu_t *emu, uint16_t d, uint8_t s);
+uint32_t     rol32 (x64emu_t *emu, uint32_t d, uint8_t s);
+uint64_t     rol64 (x64emu_t *emu, uint64_t d, uint8_t s);
+uint8_t      ror8  (x64emu_t *emu, uint8_t d, uint8_t s);
+uint16_t     ror16 (x64emu_t *emu, uint16_t d, uint8_t s);
+uint32_t     ror32 (x64emu_t *emu, uint32_t d, uint8_t s);
+uint64_t     ror64 (x64emu_t *emu, uint64_t d, uint8_t s);
+uint16_t     shld16 (x64emu_t *emu, uint16_t d, uint16_t fill, uint8_t s);
+uint32_t     shld32 (x64emu_t *emu, uint32_t d, uint32_t fill, uint8_t s);
+uint64_t     shld64 (x64emu_t *emu, uint64_t d, uint64_t fill, uint8_t s);
+uint16_t     shrd16 (x64emu_t *emu, uint16_t d, uint16_t fill, uint8_t s);
+uint32_t     shrd32 (x64emu_t *emu, uint32_t d, uint32_t fill, uint8_t s);
+uint64_t     shrd64 (x64emu_t *emu, uint64_t d, uint64_t fill, uint8_t s);
+uint8_t      sbb8  (x64emu_t *emu, uint8_t d, uint8_t s);
+uint16_t     sbb16 (x64emu_t *emu, uint16_t d, uint16_t s);
+uint32_t     sbb32 (x64emu_t *emu, uint32_t d, uint32_t s);
+uint64_t     sbb64 (x64emu_t *emu, uint64_t d, uint64_t s);
+
+static inline uint8_t shl8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	emu->df = d_shl8;
+	emu->op1 = d;
+
+	s &= 0x1f;
+	emu->op2 = s;
+	emu->res = d << s;
+
+	return (uint8_t)emu->res;
+}
+
+static inline uint16_t shl16(x64emu_t *emu, uint16_t d, uint8_t s)
+{
+	emu->df = d_shl16;
+	emu->op1 = d;
+
+	s &= 0x1f;
+	emu->op2 = s;
+	emu->res = d << s;
+	return (uint16_t)emu->res;
+}
+
+static inline uint32_t shl32(x64emu_t *emu, uint32_t d, uint8_t s)
+{
+	emu->df = d_shl32;
+	emu->op1 = d;
+
+	s &= 0x1f;
+	emu->op2 = s;
+	emu->res = d << s;
+
+	return emu->res;
+}
+
+static inline uint64_t shl64(x64emu_t *emu, uint64_t d, uint8_t s)
+{
+	emu->df = d_shl64;
+	emu->op1 = d;
+
+	s &= 0x3f;
+	emu->op2 = s;
+	emu->res = d << s;
+
+	return emu->res;
+}
+
+static inline uint8_t shr8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	emu->df = d_shr8;
+	emu->op1 = d;
+
+	s &= 0x1f;
+	emu->op2 = s;
+	emu->res = d >> s;
+
+	return (uint8_t)emu->res;
+}
+
+static inline uint16_t shr16(x64emu_t *emu, uint16_t d, uint8_t s)
+{
+	emu->df = d_shr16;
+	emu->op1 = d;
+
+	s &= 0x1f;
+	emu->op2 = s;
+	emu->res = d >> s;
+
+	return (uint16_t)emu->res;
+}
+
+static inline uint32_t shr32(x64emu_t *emu, uint32_t d, uint8_t s)
+{
+	RESET_FLAGS(emu);	// TODO: Defered this one?
+
+	emu->df = d_shr32;
+	emu->op1 = d;
+
+	s &= 0x1f;
+	emu->op2 = s;
+	emu->res = d >> s;
+
+    return emu->res;
+}
+
+static inline uint64_t shr64(x64emu_t *emu, uint64_t d, uint8_t s)
+{
+	RESET_FLAGS(emu);	// TODO: Defered this one?
+
+	emu->df = d_shr64;
+	emu->op1 = d;
+
+	s &= 0x3f;
+	emu->op2 = s;
+	emu->res = d >> s;
+
+    return emu->res;
+}
+
+static inline uint8_t sar8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	emu->df = d_sar8;
+	emu->op1 = d;
+
+	s &= 0x1f;
+	emu->op2 = s;
+	emu->res = (uint8_t)(((int8_t)d)>>s);
+
+	return (uint8_t)emu->res;
+}
+
+static inline uint16_t sar16(x64emu_t *emu, uint16_t d, uint8_t s)
+{
+	emu->df = d_sar16;
+	emu->op1 = d;
+
+	s &= 0x1f;
+	emu->op2 = s;
+	emu->res = (uint16_t)(((int16_t)d)>>s);
+
+	return (uint16_t)emu->res;
+}
+
+static inline uint32_t sar32(x64emu_t *emu, uint32_t d, uint8_t s)
+{
+	emu->df = d_sar32;
+	emu->op1 = d;
+
+	s &= 0x1f;
+	emu->op2 = s;
+	emu->res = (uint32_t)(((int32_t)d)>>s);
+
+	return emu->res;
+}
+
+static inline uint64_t sar64(x64emu_t *emu, uint64_t d, uint8_t s)
+{
+	emu->df = d_sar64;
+	emu->op1 = d;
+
+	s &= 0x3f;
+	emu->op2 = s;
+	emu->res = (uint64_t)(((int64_t)d)>>s);
+
+	return emu->res;
+}
+
+static inline uint8_t sub8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	emu->res = d - s;
+	emu->op1 = d;
+	emu->op2 = s;
+	emu->df = d_sub8;
+	return (uint8_t)emu->res;
+}
+
+static inline uint16_t sub16(x64emu_t *emu, uint16_t d, uint16_t s)
+{
+    emu->res = d - s;
+	emu->op1 = d;
+	emu->op2 = s;
+	emu->df = d_sub16;
+	return (uint16_t)emu->res;
+}
+
+static inline uint32_t sub32(x64emu_t *emu, uint32_t d, uint32_t s)
+{
+	emu->res = d - s;
+	emu->op1 = d;
+	emu->op2 = s;
+	emu->df = d_sub32;
+	return emu->res;
+}
+
+static inline uint64_t sub64(x64emu_t *emu, uint64_t d, uint64_t s)
+{
+	emu->res = d - s;
+	emu->op1 = d;
+	emu->op2 = s;
+	emu->df = d_sub64;
+	return emu->res;
+}
+
+void         test8  (x64emu_t *emu, uint8_t d, uint8_t s);
+void         test16 (x64emu_t *emu, uint16_t d, uint16_t s);
+void         test32 (x64emu_t *emu, uint32_t d, uint32_t s);
+void         test64 (x64emu_t *emu, uint64_t d, uint64_t s);
+
+static inline uint8_t xor8(x64emu_t *emu, uint8_t d, uint8_t s)
+{
+	emu->res = d ^ s;
+	emu->df = d_xor8;
+	return emu->res;
+}
+
+
+static inline uint16_t xor16(x64emu_t *emu, uint16_t d, uint16_t s)
+{
+	emu->res = d ^ s;
+	emu->df = d_xor16;
+	return emu->res;
+}
+
+static inline uint32_t xor32(x64emu_t *emu, uint32_t d, uint32_t s)
+{
+	emu->res = d ^ s;
+	emu->df = d_xor32;
+	return emu->res;
+}
+
+static inline uint64_t xor64(x64emu_t *emu, uint64_t d, uint64_t s)
+{
+	emu->res = d ^ s;
+	emu->df = d_xor64;
+	return emu->res;
+}
+
+static inline void imul8(x64emu_t *emu, uint8_t s)
+{
+	emu->df = d_imul8;
+	R_AX = emu->res = (int16_t)(int8_t)R_AL * (int8_t)s;
+}
+
+static inline void imul16_eax(x64emu_t *emu, uint16_t s)
+{
+	emu->df = d_imul16;
+	emu->res = (int32_t)(int16_t)R_AX * (int16_t)s;
+	R_AX = (uint16_t)emu->res;
+	R_DX = (uint16_t)(emu->res >> 16);
+}
+
+static inline uint16_t imul16(x64emu_t *emu, uint16_t op1, uint16_t op2)
+{
+	emu->df = d_imul16;
+	emu->res = (int32_t)(int16_t)op1 * (int16_t)op2;
+	return (uint16_t)emu->res;
+}
+
+static inline void imul32_direct(uint32_t *res_lo, uint32_t* res_hi,uint32_t d, uint32_t s)
+{
+	int64_t res = (int64_t)(int32_t)d * (int32_t)s;
+
+	*res_lo = (uint32_t)res;
+	*res_hi = (uint32_t)(res >> 32);
+}
+
+static inline uint32_t imul32(x64emu_t *emu, uint32_t op1, uint32_t op2)
+{
+	emu->df = d_imul32;
+	uint32_t _res, _op1;
+	imul32_direct(&_res,&_op1,op1,op2);
+	emu->res = _res;
+	emu->op1 = _op1;
+	return emu->res;
+}
+
+static inline void imul32_eax(x64emu_t *emu, uint32_t s)
+{
+	emu->df = d_imul32;
+	imul32_direct(&R_EAX,&R_EDX,R_EAX,s);
+	emu->res = R_EAX;
+	emu->op1 = R_EDX;
+}
+
+static inline void mul8(x64emu_t *emu, uint8_t s)
+{
+	emu->df = d_mul8;
+	R_AX = emu->res = (uint16_t)(R_AL) * s;
+}
+
+static inline void mul16(x64emu_t *emu, uint16_t s)
+{
+	emu->df = d_mul16;
+	emu->res = (uint32_t)R_AX * s;
+	R_AX = (uint16_t)emu->res;
+	R_DX = (uint16_t)(emu->res >> 16);
+}
+
+static inline void mul32_eax(x64emu_t *emu, uint32_t s)
+{
+	emu->df = d_mul32;
+	uint64_t res = (uint64_t)R_EAX * s;
+	emu->res = R_EAX = (uint32_t)res;
+	emu->op1 = R_EDX = (uint32_t)(res >> 32);
+}
+
+void         idiv8  (x64emu_t *emu, uint8_t s);
+void         idiv16 (x64emu_t *emu, uint16_t s);
+void         idiv32 (x64emu_t *emu, uint32_t s);
+void         div8  (x64emu_t *emu, uint8_t s);
+void         div16 (x64emu_t *emu, uint16_t s);
+void         div32 (x64emu_t *emu, uint32_t s);
+
+#endif //__X64PRIMOP_H_
diff --git a/src/emu/x64run.c b/src/emu/x64run.c
index 96d81976..482c9518 100755
--- a/src/emu/x64run.c
+++ b/src/emu/x64run.c
@@ -14,7 +14,7 @@
 #include "x64run.h"
 #include "x64emu_private.h"
 #include "x64run_private.h"
-//#include "x64primop.h"
+#include "x64primop.h"
 #include "x64trace.h"
 #include "x87emu_private.h"
 #include "box64context.h"
@@ -68,9 +68,13 @@ int Run(x64emu_t *emu, int step)
 #endif
 
 #define GETED oped=GetEd(emu, rex, nextop)
-#define GETGD opgd=GeG(emu, rex, nextop)
+#define GETGD opgd=GetGd(emu, rex, nextop)
+#define GETEB oped=GetEb(emu, rex, nextop)
+#define GETGB oped=GetGb(emu, rex, nextop)
 #define ED  oped
 #define GD  opgd
+#define EB  oped
+#define GB  oped->byte[0]
 
 x64emurun:
 
@@ -95,6 +99,56 @@ x64emurun:
 
         switch(opcode) {
 
+        #define GO(B, OP)                                   \
+        case B+0:                                           \
+            nextop = F8;                                    \
+            GETEB;                                          \
+            GETGB;                                          \
+            EB->byte[0] = OP##8(emu, EB->byte[0], GB);      \
+            break;                                          \
+        case B+1:                                           \
+            nextop = F8;                                    \
+            GETED;                                          \
+            GETGD;                                          \
+            if(rex.w)                                       \
+                ED->q[0] = OP##64(emu, ED->q[0], GD->q[0]); \
+            else                                            \
+                ED->dword[0] = OP##32(emu, ED->dword[0], GD->dword[0]); \
+            break;                                          \
+        case B+2:                                           \
+            nextop = F8;                                    \
+            GETEB;                                          \
+            GETGB;                                          \
+            GB = OP##8(emu, GB, EB->byte[0]);               \
+            break;                                          \
+        case B+3:                                           \
+            nextop = F8;                                    \
+            GETED;                                          \
+            GETGD;                                          \
+            if(rex.w)                                       \
+                GD->q[0] = OP##64(emu, GD->q[0], ED->q[0]); \
+            else                                            \
+                GD->dword[0] = OP##32(emu, GD->dword[0], ED->dword[0]); \
+            break;                                          \
+        case B+4:                                           \
+            R_AL = OP##8(emu, R_AL, F8);                    \
+            break;                                          \
+        case B+5:                                           \
+            if(rex.w)                                       \
+                R_RAX = OP##64(emu, R_RAX, F32);            \
+            else                                            \
+                R_EAX = OP##32(emu, R_EAX, F32);            \
+            break;
+
+        GO(0x00, add)                   /* ADD 0x00 -> 0x05 */
+        GO(0x08, or)                    /*  OR 0x08 -> 0x0D */
+        GO(0x10, adc)                   /* ADC 0x10 -> 0x15 */
+        GO(0x18, sbb)                   /* SBB 0x18 -> 0x1D */
+        GO(0x20, and)                   /* AND 0x20 -> 0x25 */
+        GO(0x28, sub)                   /* SUB 0x28 -> 0x2D */
+        GO(0x30, xor)                   /* XOR 0x30 -> 0x35 */
+        #undef GO
+
         case 0x40:
         case 0x41:
         case 0x42:
diff --git a/src/emu/x64run_private.h b/src/emu/x64run_private.h
index 7fb668f7..90550395 100755
--- a/src/emu/x64run_private.h
+++ b/src/emu/x64run_private.h
@@ -105,8 +105,12 @@ static inline reg64_t* GetEb(x64emu_t *emu, rex_t rex, uint8_t v)
     // rex ignored here
     uint8_t m = v&0xC7;    // filter Eb
     if(m>=0xC0) {
-        int lowhigh = (m&4)>>2;
-         return (reg64_t *)(((char*)(&emu->regs[(m&0x03)]))+lowhigh);  //?
+        if(rex.rex) {
+            int lowhigh = (m&4)>>2;
+            return (reg64_t *)(((char*)(&emu->regs[(m&0x03)]))+lowhigh);  //?
+        } else {
+            return &emu->regs[(m&0x07)+(rex.x<<4)];
+        }
     } else return GetECommon(emu, rex, m);
 }
 
@@ -114,7 +118,7 @@ static inline reg64_t* GetEd(x64emu_t *emu, rex_t rex, uint8_t v)
 {
     uint8_t m = v&0xC7;    // filter Ed
     if(m>=0xC0) {
-         return &emu->regs[(m&0x07)+(rex.b<<4)];
+         return &emu->regs[(m&0x07)+(rex.x<<4)];
     } else return GetECommon(emu, rex, m);
 }
 
@@ -186,12 +190,12 @@ static inline sse_regs_t* GetEx(x64emu_t *emu, rex_t rex, uint8_t v)
 {
     uint8_t m = v&0xC7;    // filter Ed
     if(m>=0xC0) {
-         return &emu->xmm[(m&0x07)+(rex.b<<4)];
+         return &emu->xmm[(m&0x07)+(rex.x<<4)];
     } else return (sse_regs_t*)GetECommon(emu, rex, m);
 }
 
 
-static inline reg64_t* GetG(x64emu_t *emu, rex_t rex, uint8_t v)
+static inline reg64_t* GetGd(x64emu_t *emu, rex_t rex, uint8_t v)
 {
     return &emu->regs[((v&0x38)>>3)+(rex.r<<4)];
 }
@@ -199,7 +203,10 @@ static inline reg64_t* GetG(x64emu_t *emu, rex_t rex, uint8_t v)
 static inline reg64_t* GetGb(x64emu_t *emu, rex_t rex, uint8_t v)
 {
     uint8_t m = (v&0x38)>>3;
-    return (reg64_t*)&emu->regs[m&3].byte[m>>2];
+    if(rex.rex) {
+        return (reg64_t*)&emu->regs[m&3].byte[m>>2];
+    } else
+        return &emu->regs[(m&7)+(rex.r<<4)];
 }
 
 static inline mmx_regs_t* GetGm(x64emu_t *emu, rex_t rex, uint8_t v)
@@ -214,9 +221,9 @@ static inline sse_regs_t* GetGx(x64emu_t *emu, rex_t rex, uint8_t v)
     return &emu->xmm[(m&7)+(rex.r<<4)];
 }
 
-//void UpdateFlags(x64emu_t *emu);
+void UpdateFlags(x64emu_t *emu);
 
-//#define CHECK_FLAGS(emu) if(emu->df) UpdateFlags(emu)
+#define CHECK_FLAGS(emu) if(emu->df) UpdateFlags(emu)
 #define RESET_FLAGS(emu) emu->df = d_none
 
 //void Run67(x64emu_t *emu);
@@ -240,7 +247,7 @@ uintptr_t GetSegmentBaseEmu(x64emu_t* emu, int seg);
 #define GetESBaseEmu(emu)    GetSegmentBaseEmu(emu, _ES)
 #define GetDSBaseEmu(emu)    GetSegmentBaseEmu(emu, _DS)
 
-//const char* GetNativeName(void* p);
+const char* GetNativeName(void* p);
 
 #ifdef HAVE_TRACE
 void PrintTrace(x64emu_t* emu, uintptr_t ip, int dynarec);