diff options
Diffstat (limited to 'miasm2/arch')
| -rw-r--r-- | miasm2/arch/__init__.py | 0 | ||||
| -rw-r--r-- | miasm2/arch/arm/__init__.py | 1 | ||||
| -rw-r--r-- | miasm2/arch/arm/arch.py | 2168 | ||||
| -rw-r--r-- | miasm2/arch/arm/disasm.py | 51 | ||||
| -rw-r--r-- | miasm2/arch/arm/ira.py | 112 | ||||
| -rw-r--r-- | miasm2/arch/arm/regs.py | 80 | ||||
| -rw-r--r-- | miasm2/arch/arm/sem.py | 1040 | ||||
| -rw-r--r-- | miasm2/arch/exe64.bin | bin | 0 -> 370176 bytes | |||
| -rw-r--r-- | miasm2/arch/msp430/__init__.py | 1 | ||||
| -rw-r--r-- | miasm2/arch/msp430/arch.py | 601 | ||||
| -rw-r--r-- | miasm2/arch/msp430/disasm.py | 8 | ||||
| -rw-r--r-- | miasm2/arch/msp430/ira.py | 75 | ||||
| -rw-r--r-- | miasm2/arch/msp430/regs.py | 105 | ||||
| -rw-r--r-- | miasm2/arch/msp430/sem.py | 440 | ||||
| -rw-r--r-- | miasm2/arch/sh4/__init__.py | 0 | ||||
| -rw-r--r-- | miasm2/arch/sh4/arch.py | 1404 | ||||
| -rw-r--r-- | miasm2/arch/sh4/regs.py | 80 | ||||
| -rw-r--r-- | miasm2/arch/x86/__init__.py | 1 | ||||
| -rw-r--r-- | miasm2/arch/x86/arch.py | 3839 | ||||
| -rw-r--r-- | miasm2/arch/x86/disasm.py | 51 | ||||
| -rw-r--r-- | miasm2/arch/x86/ira.py | 91 | ||||
| -rw-r--r-- | miasm2/arch/x86/regs.py | 405 | ||||
| -rw-r--r-- | miasm2/arch/x86/sem.py | 3029 |
23 files changed, 13582 insertions, 0 deletions
diff --git a/miasm2/arch/__init__.py b/miasm2/arch/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/arch/__init__.py diff --git a/miasm2/arch/arm/__init__.py b/miasm2/arch/arm/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm2/arch/arm/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py new file mode 100644 index 00000000..3c219710 --- /dev/null +++ b/miasm2/arch/arm/arch.py @@ -0,0 +1,2168 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import logging +from pyparsing import * +from miasm2.expression.expression import * +from miasm2.core.cpu import * +from collections import defaultdict +from miasm2.core.bin_stream import bin_stream +import regs as regs_module +from regs import * + +# A1 encoding + +log = logging.getLogger("armdis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + +# arm regs ############## +reg_dum = ExprId('DumReg') + +gen_reg('PC', globals()) + +# GP +regs_str = ['R%d' % r for r in xrange(0x10)] +regs_str[13] = 'SP' +regs_str[14] = 'LR' +regs_str[15] = 'PC' +regs_expr = [ExprId(x, 32) for x in regs_str] + +gpregs = reg_info(regs_str, regs_expr) + +gpregs_pc = reg_info(regs_str[-1:], regs_expr[-1:]) +gpregs_sp = reg_info(regs_str[13:14], regs_expr[13:14]) + +gpregs_nosppc = reg_info(regs_str[:13] + [str(reg_dum), regs_str[14]], + regs_expr[:13] + [reg_dum, regs_expr[14]]) + + +# psr +sr_flags = "cxsf" +cpsr_regs_str = [] +spsr_regs_str = [] +for i in xrange(0x10): + o = "" + for j in xrange(4): + if i & (1 << j): + o += sr_flags[j] + cpsr_regs_str.append("CPSR_%s" % o) + spsr_regs_str.append("SPSR_%s" % o) + +# psr_regs_str = ['CPSR', 'SPSR'] +# psr_regs_expr = [ExprId(x, 32) for x in psr_regs_str] + +# psr_regs = reg_info(psr_regs_str, psr_regs_expr) + +cpsr_regs_expr = [ExprId(x, 32) for x in cpsr_regs_str] +spsr_regs_expr = [ExprId(x, 32) for x in spsr_regs_str] + +cpsr_regs = reg_info(cpsr_regs_str, cpsr_regs_expr) +spsr_regs = reg_info(spsr_regs_str, spsr_regs_expr) + +# CP +cpregs_str = ['c%d' % r for r in xrange(0x10)] +cpregs_expr = [ExprId(x) for x in cpregs_str] + +cp_regs = reg_info(cpregs_str, cpregs_expr) + +# P +pregs_str = ['p%d' % r for r in xrange(0x10)] +pregs_expr = [ExprId(x) for x in pregs_str] + +p_regs = reg_info(pregs_str, pregs_expr) + + +# parser helper ########### + +def tok_reg_duo(s, l, t): + t = t[0] + i1 = gpregs.expr.index(t[0]) + i2 = gpregs.expr.index(t[1]) + o = [] + for i in xrange(i1, i2 + 1): + o.append(gpregs.expr[i]) + return o + +LPARENTHESIS = Literal("(") +RPARENTHESIS = Literal(")") + +LACC = Suppress(Literal("{")) +RACC = Suppress(Literal("}")) +MINUS = Suppress(Literal("-")) +CIRCUNFLEX = Literal("^") + + +def check_bounds(left_bound, right_bound, value): + if left_bound <= value and value <= right_bound: + return ExprInt32(value) + else: + raise ValueError('shift operator immediate value out of bound') + +int_1_31 = str_int.copy().setParseAction(lambda v: check_bounds(1, 31, v[0])) +int_1_32 = str_int.copy().setParseAction(lambda v: check_bounds(1, 32, v[0])) + + +def reglistparse(s, l, t): + t = t[0] + if t[-1] == "^": + return ExprOp('sbit', ExprOp('reglist', *t[:-1])) + return ExprOp('reglist', *t) + + +allshifts = ['<<', '>>', 'a>>', '>>>', 'rrx'] +allshifts_armt = ['<<', '>>', 'a>>', '>>>', 'rrx'] + +shift2expr_dct = {'LSL': '<<', 'LSR': '>>', 'ASR': 'a>>', + 'ROR': ">>>", 'RRX': "rrx"} + +expr2shift_dct = dict([(x[1], x[0]) for x in shift2expr_dct.items()]) + + +def op_shift2expr(s, l, t): + return shift2expr_dct[t[0]] + +reg_duo = Group(gpregs.parser + MINUS + + gpregs.parser).setParseAction(tok_reg_duo) +reg_or_duo = reg_duo | gpregs.parser +gpreg_list = Group(LACC + delimitedList( + reg_or_duo, delim=',') + RACC + Optional(CIRCUNFLEX)) +gpreg_list.setParseAction(reglistparse) + +LBRACK = Suppress("[") +RBRACK = Suppress("]") +COMMA = Suppress(",") +all_binaryop_1_31_shifts_t = literal_list( + ['LSL', 'ROR']).setParseAction(op_shift2expr) +all_binaryop_1_32_shifts_t = literal_list( + ['LSR', 'ASR']).setParseAction(op_shift2expr) +all_unaryop_shifts_t = literal_list(['RRX']).setParseAction(op_shift2expr) + +allshifts_t_armt = literal_list( + ['LSL', 'LSR', 'ASR', 'ROR', 'RRX']).setParseAction(op_shift2expr) + +gpreg_p = gpregs.parser + +psr_p = cpsr_regs.parser | spsr_regs.parser + + +def shift2expr(t): + if len(t) == 1: + return t[0] + elif len(t) == 2: + return ExprOp(t[1], t[0]) + elif len(t) == 3: + return ExprOp(t[1], t[0], t[2]) + +variable, operand, base_expr = gen_base_expr() + +int_or_expr = base_expr + + +def ast_id2expr(t): + if not t in mn_arm.regs.all_regs_ids_byname: + r = ExprId(t) + else: + r = mn_arm.regs.all_regs_ids_byname[t] + return r + + +def ast_int2expr(a): + return ExprInt32(a) + + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +shift_off = (gpregs.parser + Optional( + (all_unaryop_shifts_t) | + (all_binaryop_1_31_shifts_t + (gpregs.parser | int_1_31)) | + (all_binaryop_1_32_shifts_t + (gpregs.parser | int_1_32)) +)).setParseAction(shift2expr) +shift_off |= base_expr + + +def deref2expr_nooff(s, l, t): + t = t[0] + # XXX default + return ExprOp("preinc", t[0], ExprInt32(0)) + + +def deref2expr_pre(s, l, t): + t = t[0] + if len(t) == 1: + return ExprOp("preinc", t[0], ExprInt32(0)) + elif len(t) == 2: + return ExprOp("preinc", t[0], t[1]) + else: + raise NotImplementedError('len(t) > 2') + + +def deref2expr_post(s, l, t): + t = t[0] + return ExprOp("postinc", t[0], t[1]) + + +def deref_wb(s, l, t): + t = t[0] + if t[-1] == '!': + return ExprOp('wback', *t[:-1]) + return t[0] + +# shift_off.setParseAction(deref_off) +deref_nooff = Group( + LBRACK + gpregs.parser + RBRACK).setParseAction(deref2expr_nooff) +deref_pre = Group(LBRACK + gpregs.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre) +deref_post = Group(LBRACK + gpregs.parser + RBRACK + + COMMA + shift_off).setParseAction(deref2expr_post) +deref = Group((deref_post | deref_pre | deref_nooff) + + Optional('!')).setParseAction(deref_wb) + + +def parsegpreg_wb(s, l, t): + t = t[0] + if t[-1] == '!': + return ExprOp('wback', *t[:-1]) + return t[0] + +gpregs_wb = Group(gpregs.parser + Optional('!')).setParseAction(parsegpreg_wb) + + +# + + +cond_list = ['EQ', 'NE', 'CS', 'CC', 'MI', 'PL', 'VS', 'VC', + 'HI', 'LS', 'GE', 'LT', 'GT', 'LE', ''] # , 'NV'] +cond_dct = dict([(x[1], x[0]) for x in enumerate(cond_list)]) +# default_prio = 0x1337 + +bm_cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) # cond_dct) + + +def permut_args(order, args): + l = [] + for i, x in enumerate(order): + l.append((x.__class__, i)) + l = dict(l) + out = [None for x in xrange(len(args))] + for a in args: + out[l[a.__class__]] = a + return out + + +class additional_info: + + def __init__(self): + self.except_on_instr = False + self.lnk = None + self.cond = None + + +class instruction_arm(instruction): + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_arm, self).__init__(*args, **kargs) + + def dstflow(self): + if self.name.startswith('BIC'): + return False + return self.name.startswith('B') + + def dstflow2label(self, symbol_pool): + e = self.args[0] + if not isinstance(e, ExprInt): + return + if self.name == 'BLX': + ad = e.arg + 8 + self.offset + else: + ad = e.arg + 8 + self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0] = s + + def breakflow(self): + if self.name.startswith('B') and not self.name.startswith('BIC'): + return True + if self.name.startswith("LDM") and PC in self.args[1].args: + return True + + if self.args and PC in self.args[0].get_r(): + return True + return False + + def is_subcall(self): + if self.name == 'BLX': + return True + return self.additional_info.lnk + + def getdstflow(self, symbol_pool): + if self.name in ['CBZ', 'CBNZ']: + return [self.args[1]] + return [self.args[0]] + + def splitflow(self): + if self.additional_info.lnk: + return True + if self.name == 'BX': + return False + return self.breakflow() and self.additional_info.cond != 14 + + def get_symbol_size(self, symbol, symbol_pool): + return 32 + + def fixDstOffset(self): + e = self.args[0] + print 'FIX', e, self.offset, self.l + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + log.warning('zarb dst %r' % e) + return + off = e.arg - (self.offset + 4 + self.l) + print hex(off) + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[0] = ExprInt32(off) + print 'final', self.args[0] + + def get_args_expr(self): + args = [a for a in self.args] + return args + + +class instruction_armt(instruction_arm): + + def __init__(self, *args, **kargs): + super(instruction_armt, self).__init__(*args, **kargs) + + def dstflow(self): + if self.name.startswith('BIC'): + return False + if self.name in ["CBZ", "CBNZ"]: + return True + return self.name.startswith('B') + + def dstflow2label(self, symbol_pool): + if self.name in ["CBZ", "CBNZ"]: + e = self.args[1] + else: + e = self.args[0] + if not isinstance(e, ExprInt): + return + if self.name == 'BLX': + ad = e.arg + 4 + (self.offset & 0xfffffffc) + else: + ad = e.arg + 4 + self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + if self.name in ["CBZ", "CBNZ"]: + self.args[1] = s + else: + self.args[0] = s + + def breakflow(self): + if self.name in ['B', 'BL', 'BLX', + 'BEQ', 'BNE', 'BCS', 'BCC', 'BMI', 'BPL', 'BVS', + 'BVC', 'BHI', 'BLS', 'BGE', 'BLT', 'BGT', 'BLE', + 'CBZ', 'CBNZ']: + return True + if self.name.startswith("LDM") and PC in self.args[1].args: + return True + if self.args and PC in self.args[0].get_r(): + return True + return False + + def splitflow(self): + if self.name in ['BL', 'BLX', + 'BEQ', 'BNE', 'BCS', 'BCC', 'BMI', 'BPL', 'BVS', + 'BVC', 'BHI', 'BLS', 'BGE', 'BLT', 'BGT', 'BLE', + 'CBZ', 'CBNZ']: + return True + return False + + def is_subcall(self): + if self.name in ['BL', 'BLX']: + return True + return False + +mode_arm = 'arm' +mode_armthumb = 'armt' + + +class mn_arm(cls_mn): + delayslot = 0 + name = "arm" + regs = regs_module + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = {mode_arm: PC, mode_armthumb: PC} + sp = {mode_arm: SP, mode_armthumb: SP} + instruction = instruction_arm + max_instruction_len = 4 + + @classmethod + def fromstring(cls, s, mode='arm'): + return super(mn_arm, cls).fromstring(s, mode) + + @classmethod + def dis(cls, bs_o, mode_o='arm', offset=0): + return super(mn_arm, cls).dis(bs_o, mode_o, offset) + + @classmethod + def getpc(cls, attrib): + return PC + + @classmethod + def getsp(cls, attrib): + return SP + + def additional_info(self): + info = additional_info() + info.lnk = False + if hasattr(self, "lnk"): + info.lnk = self.lnk.value != 0 + info.cond = self.cond.value + return info + + @classmethod + def getbits(cls, bs, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start / 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = "" + for _ in xrange(l): + n_offset = (offset & ~3) + 3 - offset % 4 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l == 32, "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + l = sum([x.l for x in fields]) + if l == 32: + return fields + return [bm_cond] + fields + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = mode_arm + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_arm, self).value(mode) + return [x[::-1] for x in v] + + def fixDstOffset(self): + e = self.args[0].expr + + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + # raise ValueError('dst must be int or label') + log.warning('zarb dst %r' % e) + return + # return ExprInt32(e.arg - (self.offset + self.l)) + off = e.arg - (self.offset + 4 + self.l) + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[0].expr = ExprInt32(off / 4) + + def get_symbol_size(self, symbol, symbol_pool, mode): + return 32 + + +class mn_armt(cls_mn): + name = "armt" + regs = regs_module + delayslot = 0 + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = PC + sp = SP + instruction = instruction_armt + max_instruction_len = 8 + + @classmethod + def fromstring(cls, s, mode='armt'): + return super(mn_armt, cls).fromstring(s, mode) + + @classmethod + def dis(cls, bs_o, mode_o='armt', offset=0): + return super(mn_armt, cls).dis(bs_o, mode_o, offset) + + @classmethod + def getpc(cls, attrib): + return PC + + @classmethod + def getsp(cls, attrib): + return SP + + def additional_info(self): + info = additional_info() + info.lnk = False + if hasattr(self, "lnk"): + info.lnk = self.lnk.value != 0 + info.cond = 14 # COND_ALWAYS + return info + + @classmethod + def getbits(cls, bs, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start / 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = "" + for _ in xrange(l): + n_offset = (offset & ~1) + 1 - offset % 2 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l in [16, 32], "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + return list(fields) + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = mode_armthumb + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_armt, self).value(mode) + out = [] + for x in v: + if len(x) == 2: + out.append(x[::-1]) + elif len(x) == 4: + out.append(x[:2][::-1] + x[2:4][::-1]) + return out + + def get_args_expr(self): + args = [a.expr for a in self.args] + return args + + def get_symbol_size(self, symbol, symbol_pool, mode): + return 32 + + +class arm_reg(reg_noarg, m_arg): + pass + + +class arm_gpreg_noarg(reg_noarg): + reg_info = gpregs + parser = reg_info.parser + + +class arm_gpreg(arm_reg): + reg_info = gpregs + parser = reg_info.parser + + +class arm_reg_wb(arm_reg): + reg_info = gpregs + parser = gpregs_wb + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + return '%s' % e + return "%s!" % e.args[0] + + def decode(self, v): + v = v & self.lmask + e = self.reg_info.expr[v] + if self.parent.wback.value: + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + e = self.expr + self.parent.wback.value = 0 + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + if isinstance(e, ExprId): + self.value = self.reg_info.expr.index(e) + else: + self.parent.wback.value = 1 + self.value = self.reg_info.expr.index(e.args[0]) + return True + + +class arm_psr(m_arg): + parser = psr_p + + def decode(self, v): + v = v & self.lmask + if self.parent.psr.value == 0: + e = cpsr_regs.expr[v] + else: + e = spsr_regs.expr[v] + self.expr = e + return True + + def encode(self): + e = self.expr + if e in spsr_regs.expr: + self.parent.psr.value = 1 + v = spsr_regs.expr.index(e) + elif e in cpsr_regs.expr: + self.parent.psr.value = 0 + v = cpsr_regs.expr.index(e) + else: + return False + self.value = v + return True + + +class arm_cpreg(arm_reg): + reg_info = cp_regs + parser = reg_info.parser + + +class arm_preg(arm_reg): + reg_info = p_regs + parser = reg_info.parser + + +class arm_imm(imm_noarg, m_arg): + parser = base_expr + + +class arm_offs(arm_imm): + parser = base_expr + + def int2expr(self, v): + if v & ~self.intmask != 0: + return None + return ExprInt_fromsize(self.intsize, v) + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + + def decode(self, v): + v = v & self.lmask + if (1 << (self.l - 1)) & v: + v |= ~0 ^ self.lmask + v = self.decodeval(v) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if (1 << (self.l - 1)) & v: + v = -((0xffffffff ^ v) + 1) + v = self.encodeval(v) + self.value = (v & 0xffffffff) & self.lmask + return True + + +class arm_imm8_12(m_arg): + parser = deref + + def decode(self, v): + v = v & self.lmask + if self.parent.updown.value: + e = ExprInt32(v << 2) + else: + e = ExprInt32(-v << 2) + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, e) + else: + e = ExprOp('postinc', self.parent.rn.expr, e) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + self.parent.updown.value = 1 + e = self.expr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + else: + self.parent.wback.value = 0 + if e.op == "postinc": + self.parent.ppi.value = 0 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + self.parent.rn.expr = e.args[0] + if len(e.args) == 1: + self.value = 0 + return True + e = e.args[1] + if not isinstance(e, ExprInt): + log.debug('should be int %r' % e) + return False + v = int(e.arg) + if v < 0 or v & (1 << 31): + self.parent.updown.value = 0 + v = -v & 0xFFFFFFFF + if v & 0x3: + log.debug('arg shoulb be 4 aligned') + return False + v >>= 2 + self.value = v + return True + + @staticmethod + def arg2str(e): + wb = False + if isinstance(e, ExprOp) and e.op == 'wback': + wb = True + e = e.args[0] + if isinstance(e, ExprId): + r = e + s = None + else: + if len(e.args) == 1 and isinstance(e.args[0], ExprId): + r, s = e.args[0], None + elif isinstance(e.args[0], ExprId): + r, s = e.args[0], e.args[1] + else: + r, s = e.args[0].args + if isinstance(s, ExprOp) and s.op in expr2shift_dct: + s = ' '.join([str(x) + for x in s.args[0], expr2shift_dct[s.op], s.args[1]]) + if isinstance(e, ExprOp) and e.op == 'preinc': + if s and not (isinstance(s, ExprInt) and s.arg == 0): + o = '[%s, %s]' % (r, s) + else: + o = '[%s]' % (r) + else: + o = '[%s]' % r + if s and not (isinstance(s, ExprInt) and s.arg == 0): + o += ', %s' % s + if wb: + o += "!" + return o + + +class arm_imm_4_12(m_arg): + parser = base_expr + + def decode(self, v): + v = v & self.lmask + imm = (self.parent.imm4.value << 12) | v + self.expr = ExprInt32(imm) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if v > 0xffff: + return False + self.parent.imm4.value = v >> 12 + self.value = v & 0xfff + return True + + +class arm_op2(m_arg): + parser = shift_off + + def str_to_imm_rot_form(self, s, neg=False): + if neg: + s = -s & 0xffffffff + for i in xrange(0, 32, 2): + v = myrol32(s, i) + if 0 <= v < 0x100: + return ((i / 2) << 8) | v + return None + + def decode(self, v): + val = v & self.lmask + if self.parent.immop.value: + rot = val >> 8 + imm = val & 0xff + imm = myror32(imm, rot * 2) + self.expr = ExprInt32(imm) + return True + rm = val & 0xf + shift = val >> 4 + shift_kind = shift & 1 + shift_type = (shift >> 1) & 3 + shift >>= 3 + # print self.parent.immop.value, hex(shift), hex(shift_kind), + # hex(shift_type) + if shift_kind: + # shift kind is reg + if shift & 1: + # log.debug('error in shift1') + return False + rs = shift >> 1 + if rs == 0xf: + # log.debug('error in shift2') + return False + shift_op = regs_expr[rs] + else: + # shift kind is imm + amount = shift + shift_op = ExprInt32(amount) + a = regs_expr[rm] + if shift_op == ExprInt32(0): + if shift_type == 3: + self.expr = ExprOp(allshifts[4], a) + else: + self.expr = a + else: + self.expr = ExprOp(allshifts[shift_type], a, shift_op) + return True + + def encode(self): + e = self.expr + # pure imm + if isinstance(e, ExprInt): + val = self.str_to_imm_rot_form(int(e.arg)) + if val is None: + return False + self.parent.immop.value = 1 + self.value = val + return True + + self.parent.immop.value = 0 + # pure reg + if isinstance(e, ExprId): + rm = gpregs.expr.index(e) + shift_kind = 0 + shift_type = 0 + amount = 0 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + # rot reg + if not isinstance(e, ExprOp): + log.debug('bad reg rot1 %r', e) + return False + rm = gpregs.expr.index(e.args[0]) + shift_type = allshifts.index(e.op) + if e.op == 'rrx': + shift_kind = 0 + amount = 0 + shift_type = 3 + elif isinstance(e.args[1], ExprInt): + shift_kind = 0 + amount = int(e.args[1].arg) + # LSR/ASR of 32 => 0 + if amount == 32 and e.op in ['>>', 'a>>']: + amount = 0 + else: + shift_kind = 1 + amount = gpregs.expr.index(e.args[1]) << 1 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprInt) or isinstance(e, ExprId): + return str(e) + if isinstance(e, ExprOp) and e.op in expr2shift_dct: + if len(e.args) == 1: + return '%s %s' % (e.args[0], expr2shift_dct[e.op]) + elif len(e.args) == 2: + return '%s %s %s' % (e.args[0], expr2shift_dct[e.op], e.args[1]) + return str(e) + +# op2imm + rn + + +class arm_op2imm(arm_imm8_12): + parser = deref + + def str_to_imm_rot_form(self, s, neg=False): + if neg: + s = -s & 0xffffffff + if 0 <= s < (1 << 12): + return s + return None + + def decode(self, v): + val = v & self.lmask + if self.parent.immop.value == 0: + imm = val + if self.parent.updown.value == 0: + imm = -imm + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, ExprInt32(imm)) + else: + e = ExprOp('postinc', self.parent.rn.expr, ExprInt32(imm)) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = e + return True + rm = val & 0xf + shift = val >> 4 + shift_kind = shift & 1 + shift_type = (shift >> 1) & 3 + shift >>= 3 + # print self.parent.immop.value, hex(shift), hex(shift_kind), + # hex(shift_type) + if shift_kind: + # log.debug('error in disasm xx') + return False + else: + # shift kind is imm + amount = shift + shift_op = ExprInt32(amount) + a = regs_expr[rm] + if shift_op == ExprInt32(0): + pass + else: + a = ExprOp(allshifts[shift_type], a, shift_op) + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, a) + else: + e = ExprOp('postinc', self.parent.rn.expr, a) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + self.parent.immop.value = 1 + self.parent.updown.value = 1 + + e = self.expr + if e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + else: + self.parent.wback.value = 0 + if e.op == "postinc": + self.parent.ppi.value = 0 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + + # if len(v) <1: + # raise ValueError('cannot parse', s) + self.parent.rn.fromstring(e.args[0]) + if len(e.args) == 1: + self.parent.immop.value = 0 + self.value = 0 + return True + # pure imm + if isinstance(e.args[1], ExprInt): + self.parent.immop.value = 0 + val = self.str_to_imm_rot_form(int(e.args[1].arg)) + if val is None: + val = self.str_to_imm_rot_form(int(e.args[1].arg), True) + if val is None: + log.debug('cannot encode inm') + return False + self.parent.updown.value = 0 + self.value = val + return True + # pure reg + if isinstance(e.args[1], ExprId): + rm = gpregs.expr.index(e.args[1]) + shift_kind = 0 + shift_type = 0 + amount = 0 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + # rot reg + if not isinstance(e.args[1], ExprOp): + log.debug('bad reg rot2 %r' % e) + return False + e = e.args[1] + rm = gpregs.expr.index(e.args[0]) + shift_type = allshifts.index(e.op) + if isinstance(e.args[1], ExprInt): + shift_kind = 0 + amount = int(e.args[1].arg) + else: + shift_kind = 1 + amount = gpregs.expr.index(e.args[1]) << 1 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + + +def reglist2str(rlist): + out = [] + i = 0 + while i < len(rlist): + j = i + 1 + while j < len(rlist) and rlist[j] < 13 and rlist[j] == rlist[j - 1] + 1: + j += 1 + j -= 1 + if j < i + 2: + out.append(regs_str[rlist[i]]) + i += 1 + else: + out.append(regs_str[rlist[i]] + '-' + regs_str[rlist[j]]) + i = j + 1 + return "{" + ", ".join(out) + '}' + + +class arm_rlist(m_arg): + parser = gpreg_list + + def encode(self): + self.parent.sbit.value = 0 + e = self.expr + if isinstance(e, ExprOp) and e.op == "sbit": + e = e.args[0] + self.parent.sbit.value = 1 + rlist = [gpregs.expr.index(x) for x in e.args] + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in xrange(0x10): + if 1 << i & v: + out.append(gpregs.expr[i]) + e = ExprOp('reglist', *out) + if self.parent.sbit.value == 1: + e = ExprOp('sbit', e) + self.expr = e + return True + + @staticmethod + def arg2str(e): + o = [] + sb = False + if isinstance(e, ExprOp) and e.op == "sbit": + sb = True + e = e.args[0] + o = [gpregs.expr.index(x) for x in e.args] + out = reglist2str(o) + if sb: + out += "^" + return out + + +class updown_b_nosp_mn(bs_mod_name): + mn_mod = ['D', 'I'] + + def modname(self, name, f_i): + return name + self.args['mn_mod'][f_i] + + +class ppi_b_nosp_mn(bs_mod_name): + prio = 5 + mn_mod = ['A', 'B'] + + +class updown_b_sp_mn(bs_mod_name): + mn_mod = ['A', 'D'] + + def modname(self, name, f_i): + if name.startswith("STM"): + f_i = [1, 0][f_i] + return name + self.args['mn_mod'][f_i] + + +class ppi_b_sp_mn(bs_mod_name): + mn_mod = ['F', 'E'] + + def modname(self, name, f_i): + if name.startswith("STM"): + f_i = [1, 0][f_i] + return name + self.args['mn_mod'][f_i] + + +class arm_reg_wb_nosp(arm_reg_wb): + + def decode(self, v): + v = v & self.lmask + if v == 13: + return False + e = self.reg_info.expr[v] + if self.parent.wback.value: + e = ExprOp('wback', e) + self.expr = e + return True + + +class arm_offs_blx(arm_imm): + + def decode(self, v): + v = v & self.lmask + v = (v << 2) + (self.parent.lowb.value << 1) + v = sign_ext(v, 26, 32) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & 0x80000000: + v &= (1 << 26) - 1 + self.parent.lowb.value = (v >> 1) & 1 + self.value = v >> 2 + return True + + +class bs_lnk(bs_mod_name): + + def modname(self, name, i): + return name[:1] + self.args['mn_mod'][i] + name[1:] + + +accum = bs(l=1) +scc = bs_mod_name(l=1, fname='scc', mn_mod=['', 'S']) +dumscc = bs("1") +rd = bs(l=4, cls=(arm_gpreg,)) +rdl = bs(l=4, cls=(arm_gpreg,)) + +rn = bs(l=4, cls=(arm_gpreg,), fname="rn") +rs = bs(l=4, cls=(arm_gpreg,)) +rm = bs(l=4, cls=(arm_gpreg,)) +op2 = bs(l=12, cls=(arm_op2,)) +lnk = bs_lnk(l=1, fname='lnk', mn_mod=['', 'L']) +offs = bs(l=24, cls=(arm_offs,), fname="offs") + +rn_noarg = bs(l=4, cls=(arm_gpreg_noarg,), fname="rn") + +immop = bs(l=1, fname='immop') +dumr = bs(l=4, default_val="0000", fname="dumr") +# psr = bs(l=1, cls=(arm_psr,), fname="psr") + +psr = bs(l=1, fname="psr") +psr_field = bs(l=4, cls=(arm_psr,)) + +ppi = bs(l=1, fname='ppi') +updown = bs(l=1, fname='updown') +trb = bs_mod_name(l=1, fname='trb', mn_mod=['', 'B']) +wback = bs_mod_name(l=1, fname="wback", mn_mod=['', 'T']) +wback_no_t = bs(l=1, fname="wback") + +op2imm = bs(l=12, cls=(arm_op2imm,)) + +updown_b_nosp = updown_b_nosp_mn(l=1, mn_mod=['D', 'I'], fname='updown') +ppi_b_nosp = ppi_b_nosp_mn(l=1, mn_mod=['A', 'B'], fname='ppi') +updown_b_sp = updown_b_sp_mn(l=1, mn_mod=['A', 'D'], fname='updown') +ppi_b_sp = ppi_b_sp_mn(l=1, mn_mod=['F', 'E'], fname='ppi') + +sbit = bs(l=1, fname="sbit") +rn_sp = bs("1101", cls=(arm_reg_wb,), fname='rnsp') +rn_wb = bs(l=4, cls=(arm_reg_wb_nosp,), fname='rn') +rlist = bs(l=16, cls=(arm_rlist,), fname='rlist') + +swi_i = bs(l=24, cls=(arm_imm,), fname="swi_i") + +opc = bs(l=4, cls=(arm_imm, m_arg), fname='opc') +crn = bs(l=4, cls=(arm_cpreg,), fname='crn') +crd = bs(l=4, cls=(arm_cpreg,), fname='crd') +crm = bs(l=4, cls=(arm_cpreg,), fname='crm') +cpnum = bs(l=4, cls=(arm_preg,), fname='cpnum') +cp = bs(l=3, cls=(arm_imm, m_arg), fname='cp') + +imm8_12 = bs(l=8, cls=(arm_imm8_12, m_arg), fname='imm') +tl = bs_mod_name(l=1, fname="tl", mn_mod=['', 'L']) + +cpopc = bs(l=3, cls=(arm_imm, m_arg), fname='cpopc') +imm20 = bs(l=20, cls=(arm_imm, m_arg)) +imm4 = bs(l=4, cls=(arm_imm, m_arg)) +imm12 = bs(l=12, cls=(arm_imm, m_arg)) +imm16 = bs(l=16, cls=(arm_imm, m_arg)) + +imm4_noarg = bs(l=4, fname="imm4") + +imm_4_12 = bs(l=12, cls=(arm_imm_4_12,)) + +lowb = bs(l=1, fname='lowb') +offs_blx = bs(l=24, cls=(arm_offs_blx,), fname="offs") + +fix_cond = bs("1111", fname="cond") + + +class arm_immed(m_arg): + parser = deref + + def decode(self, v): + if self.parent.immop.value == 1: + imm = ExprInt32((self.parent.immedH.value << 4) | v) + else: + imm = gpregs.expr[v] + if self.parent.updown.value == 0: + imm = -imm + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, imm) + else: + e = ExprOp('postinc', self.parent.rn.expr, imm) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = e + + return True + + def encode(self): + self.parent.immop.value = 1 + self.parent.updown.value = 1 + e = self.expr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + else: + self.parent.wback.value = 0 + if e.op == "postinc": + self.parent.ppi.value = 0 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + self.parent.rn.expr = e.args[0] + if len(e.args) == 1: + self.value = 0 + self.parent.immedH.value = 0 + return True + e = e.args[1] + if isinstance(e, ExprInt): + v = int(e.arg) + if v < 0 or v & (1 << 31): + self.parent.updown.value = 0 + v = (-v) & 0xFFFFFFFF + if v > 0xff: + log.debug('cannot encode imm XXX') + return False + self.value = v & 0xF + self.parent.immedH.value = v >> 4 + return True + + self.parent.immop.value = 0 + if isinstance(e, ExprOp) and len(e.args) == 1 and e.op == "-": + self.parent.updown.value = 0 + e = e.args[0] + if e in gpregs.expr: + self.value = gpregs.expr.index(e) + self.parent.immedH.value = 0x0 + return True + else: + raise ValueError('e should be int: %r' % e) + + @staticmethod + def arg2str(e): + return arm_imm8_12.arg2str(e) + +immedH = bs(l=4, fname='immedH') +immedL = bs(l=4, cls=(arm_immed, m_arg), fname='immedL') +hb = bs(l=1) + + +def armop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_arm,), dct) + + +def armtop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_armt,), dct) + + +op_list = ['AND', 'EOR', 'SUB', 'RSB', 'ADD', 'ADC', 'SBC', 'RSC', + 'TST', 'TEQ', 'CMP', 'CMN', 'ORR', 'MOV', 'BIC', 'MVN'] +data_mov_name = {'MOV': 13, 'MVN': 15} +data_test_name = {'TST': 8, 'TEQ': 9, 'CMP': 10, 'CMN': 11} + +data_name = {} +for i, n in enumerate(op_list): + if n in data_mov_name.keys() + data_test_name.keys(): + continue + data_name[n] = i +bs_data_name = bs_name(l=4, name=data_name) + +bs_data_mov_name = bs_name(l=4, name=data_mov_name) + +bs_data_test_name = bs_name(l=4, name=data_test_name) + + +transfer_name = {'STR': 0, 'LDR': 1} +bs_transfer_name = bs_name(l=1, name=transfer_name) + +transferh_name = {'STRH': 0, 'LDRH': 1} +bs_transferh_name = bs_name(l=1, name=transferh_name) + + +transfer_ldr_name = {'LDRD': 0, 'LDRSB': 1} +bs_transfer_ldr_name = bs_name(l=1, name=transfer_ldr_name) + +btransfer_name = {'STM': 0, 'LDM': 1} +bs_btransfer_name = bs_name(l=1, name=btransfer_name) + +ctransfer_name = {'STC': 0, 'LDC': 1} +bs_ctransfer_name = bs_name(l=1, name=ctransfer_name) + +mr_name = {'MCR': 0, 'MRC': 1} +bs_mr_name = bs_name(l=1, name=mr_name) + +armop("mul", [bs('000000'), bs('0'), scc, rd, + bs('0000'), rs, bs('1001'), rm], [rd, rm, rs]) +armop("umull", [bs('000010'), + bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("smull", [bs('000011'), bs('0'), scc, rd, + rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("smlal", [bs('000011'), bs('1'), scc, rd, + rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("mla", [bs('000000'), bs('1'), scc, rd, + rn, rs, bs('1001'), rm], [rd, rm, rs, rn]) +armop("mrs", [bs('00010'), psr, bs('00'), + psr_field, rd, bs('000000000000')], [rd, psr]) +armop("msr", [bs('00010'), psr, bs('10'), psr_field, + bs('1111'), bs('0000'), bs('0000'), rm], [psr_field, rm]) +armop("data", [bs('00'), immop, bs_data_name, scc, rn, rd, op2], [rd, rn, op2]) +armop("data_mov", + [bs('00'), immop, bs_data_mov_name, scc, bs('0000'), rd, op2], [rd, op2]) +armop("data_test", [bs('00'), immop, bs_data_test_name, dumscc, rn, dumr, op2]) +armop("b", [bs('101'), lnk, offs]) + +# TODO TEST +armop("und", [bs('011'), imm20, bs('1'), imm4]) +armop("transfer", [bs('01'), immop, ppi, updown, trb, wback_no_t, + bs_transfer_name, rn_noarg, rd, op2imm], [rd, op2imm]) +armop("transferh", [bs('000'), ppi, updown, immop, wback_no_t, + bs_transferh_name, rn_noarg, rd, immedH, bs('1011'), immedL], [rd, immedL]) +armop("ldrd", [bs('000'), ppi, updown, immop, wback_no_t, bs_transfer_ldr_name, + rn_noarg, rd, immedH, bs('1101'), immedL], [rd, immedL]) +armop("ldrsh", [bs('000'), ppi, updown, immop, wback_no_t, bs('1'), rn_noarg, + rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) +armop("strd", [bs('000'), ppi, updown, immop, wback_no_t, bs('0'), rn_noarg, + rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) +armop("btransfersp", [bs('100'), ppi_b_sp, updown_b_sp, sbit, wback_no_t, + bs_btransfer_name, rn_sp, rlist]) +armop("btransfer", [bs('100'), ppi_b_nosp, updown_b_nosp, sbit, wback_no_t, + bs_btransfer_name, rn_wb, rlist]) +# TODO: TEST +armop("swp", [bs('00010'), trb, bs('00'), rn, rd, bs('0000'), bs('1001'), rm]) +armop("svc", [bs('1111'), swi_i]) +armop("cdp", [bs('1110'), opc, crn, crd, cpnum, cp, bs('0'), crm], + [cpnum, opc, crd, crn, crm, cp]) +armop("cdata", [bs('110'), ppi, updown, tl, wback_no_t, bs_ctransfer_name, + rn_noarg, crd, cpnum, imm8_12], [cpnum, crd, imm8_12]) +armop("mr", [bs('1110'), cpopc, bs_mr_name, crn, rd, cpnum, cp, bs('1'), crm], + [cpnum, cpopc, rd, crn, crm, cp]) +armop("bkpt", [bs('00010010'), imm12, bs('0111'), imm4]) +armop("bx", [bs('000100101111111111110001'), rn]) +armop("mov", [bs('00110000'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) +armop("movt", [bs('00110100'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) +armop("blx", [bs('00010010'), bs('1111'), + bs('1111'), bs('1111'), bs('0011'), rm], [rm]) +armop("blx", [fix_cond, bs('101'), lowb, offs_blx], [offs_blx]) +armop("clz", [bs('00010110'), bs('1111'), + rd, bs('1111'), bs('0001'), rm], [rd, rm]) +armop("qadd", + [bs('00010000'), rn, rd, bs('0000'), bs('0101'), rm], [rd, rm, rn]) + + +# +# thumnb ####################### +# +# ARM7-TDMI-manual-pt3 +gpregs_l = reg_info(regs_str[:8], regs_expr[:8]) +gpregs_h = reg_info(regs_str[8:], regs_expr[8:]) + +gpregs_sppc = reg_info(regs_str[-1:] + regs_str[13:14], + regs_expr[-1:] + regs_expr[13:14]) + +deref_low = Group(LBRACK + gpregs_l.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre) +deref_pc = Group(LBRACK + gpregs_pc.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre) +deref_sp = Group(LBRACK + gpregs_sp.parser + COMMA + + shift_off + RBRACK).setParseAction(deref2expr_pre) + +gpregs_l_wb = Group( + gpregs_l.parser + Optional('!')).setParseAction(parsegpreg_wb) + + +class arm_offreg(m_arg): + parser = deref_pc + + def decodeval(self, v): + return v + + def encodeval(self, v): + return v + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if v: + self.expr = self.off_reg + ExprInt32(v) + else: + self.expr = self.off_reg + + e = self.expr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + return True + + def encode(self): + e = self.expr + if not (isinstance(e, ExprOp) and e.op == "preinc"): + log.debug('cannot encode %r' % e) + return False + if e.args[0] != self.off_reg: + log.debug('cannot encode reg %r' % e.args[0]) + return False + v = int(e.args[1].arg) + v = self.encodeval(v) + self.value = v + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + o = str(e) + elif (len(e.args) == 2 and + isinstance(e.args[1], ExprInt) and e.args[1].arg == 0): + o = "%s" % e.args[0] + else: + o = '%s, %s' % (e.args[0], e.args[1]) + return '[%s]' % o + + +class arm_offpc(arm_offreg): + off_reg = regs_expr[15] + + +class arm_offsp(arm_offreg): + parser = deref_sp + off_reg = regs_expr[13] + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + + +class arm_offspc(arm_offs): + + def decodeval(self, v): + return v << 1 + + def encodeval(self, v): + return v >> 1 + + +class arm_offspchl(arm_offs): + + def decodeval(self, v): + if self.parent.hl.value == 0: + return v << 12 + else: + return v << 1 + + def encodeval(self, v): + if v > (1 << 12): + self.parent.hl.value = 0 + v >>= 12 + else: + self.parent.hl.value = 1 + v >>= 1 + return v + + +class arm_off8sppc(arm_imm): + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + + +class arm_off7(arm_imm): + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + + +class arm_deref(m_arg): + parser = deref_low + + def decode(self, v): + v = v & self.lmask + rbase = regs_expr[v] + e = ExprOp('preinc', rbase, self.parent.off.expr) + self.expr = e + return True + + def encode(self): + e = self.expr + if not (isinstance(e, ExprOp) and e.op == 'preinc'): + log.debug('cannot encode %r' % e) + return False + off = e.args[1] + if isinstance(off, ExprId): + self.parent.off.expr = off + elif isinstance(off, ExprInt): + self.parent.off.expr = off + else: + log.debug('cannot encode off %r' % off) + return False + self.value = gpregs.expr.index(e.args[0]) + if self.value >= 1 << self.l: + log.debug('cannot encode reg %r' % off) + return False + return True + + @staticmethod + def arg2str(e): + if not (isinstance(e, ExprOp) and e.op == 'preinc'): + log.debug('cannot str %r' % e) + raise ValueError() + return '[%s, %s]' % (e.args[0], e.args[1]) + + +class arm_offbw(imm_noarg): + + def decode(self, v): + v = v & self.lmask + if self.parent.trb.value == 0: + v <<= 2 + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if self.parent.trb.value == 0: + if v & 3: + log.debug('off must be aligned %r' % v) + return False + v >>= 2 + self.value = v + return True + + +class arm_offh(imm_noarg): + + def decode(self, v): + v = v & self.lmask + v <<= 1 + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if v & 1: + log.debug('off must be aligned %r' % v) + return False + v >>= 1 + self.value = v + return True + + +class armt_rlist(m_arg): + parser = gpreg_list + + def encode(self): + e = self.expr + rlist = [gpregs_l.expr.index(x) for x in e.args] + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in xrange(0x10): + if 1 << i & v: + out.append(gpregs.expr[i]) + e = ExprOp('reglist', *out) + self.expr = e + return True + + @staticmethod + def arg2str(e): + o = [] + o = [gpregs.expr.index(x) for x in e.args] + out = reglist2str(o) + return out + + +class armt_rlist_pclr(armt_rlist): + + def encode(self): + e = self.expr + reg_l = list(e.args) + self.parent.pclr.value = 0 + if self.parent.pp.value == 0: + # print 'push' + if regs_expr[14] in reg_l: + reg_l.remove(regs_expr[14]) + self.parent.pclr.value = 1 + else: + # print 'pop', + if regs_expr[15] in reg_l: + reg_l.remove(regs_expr[15]) + self.parent.pclr.value = 1 + rlist = [gpregs.expr.index(x) for x in reg_l] + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in xrange(0x10): + if 1 << i & v: + out.append(gpregs.expr[i]) + + if self.parent.pclr.value == 1: + if self.parent.pp.value == 0: + out += [regs_expr[14]] + else: + out += [regs_expr[15]] + e = ExprOp('reglist', *out) + self.expr = e + return True + + +class armt_reg_wb(arm_reg_wb): + reg_info = gpregs_l + parser = gpregs_l_wb + + def decode(self, v): + v = v & self.lmask + e = self.reg_info.expr[v] + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + e = self.expr + self.value = self.reg_info.expr.index(e.args[0]) + return True + + +class arm_gpreg_l(arm_reg): + reg_info = gpregs_l + parser = reg_info.parser + + +class arm_gpreg_h(arm_reg): + reg_info = gpregs_h + parser = reg_info.parser + + +class arm_gpreg_l_noarg(arm_gpreg_noarg): + reg_info = gpregs_l + parser = reg_info.parser + + +class arm_sppc(arm_reg): + reg_info = gpregs_sppc + parser = reg_info.parser + + +class arm_sp(arm_reg): + reg_info = gpregs_sp + parser = reg_info.parser + + +off5 = bs(l=5, cls=(arm_imm,), fname="off") +off3 = bs(l=3, cls=(arm_imm,), fname="off") +off8 = bs(l=8, cls=(arm_imm,), fname="off") +off7 = bs(l=7, cls=(arm_off7,), fname="off") + +rdl = bs(l=3, cls=(arm_gpreg_l,), fname="rd") +rnl = bs(l=3, cls=(arm_gpreg_l,), fname="rn") +rsl = bs(l=3, cls=(arm_gpreg_l,), fname="rs") +rml = bs(l=3, cls=(arm_gpreg_l,), fname="rm") +rol = bs(l=3, cls=(arm_gpreg_l,), fname="ro") +rbl = bs(l=3, cls=(arm_gpreg_l,), fname="rb") +rbl_deref = bs(l=3, cls=(arm_deref,), fname="rb") +dumrh = bs(l=3, default_val="000") + +rdh = bs(l=3, cls=(arm_gpreg_h,), fname="rd") +rsh = bs(l=3, cls=(arm_gpreg_h,), fname="rs") + +offpc8 = bs(l=8, cls=(arm_offpc,), fname="offs") +offsp8 = bs(l=8, cls=(arm_offsp,), fname="offs") +rol_noarg = bs(l=3, cls=(arm_gpreg_l_noarg,), fname="off") + +off5bw = bs(l=5, cls=(arm_offbw,), fname="off") +off5h = bs(l=5, cls=(arm_offh,), fname="off") +sppc = bs(l=1, cls=(arm_sppc,)) + + +pclr = bs(l=1, fname='pclr') + + +sp = bs(l=0, cls=(arm_sp,)) + +tswi_i = bs(l=8, cls=(arm_imm,), fname="swi_i") + + +off8s = bs(l=8, cls=(arm_offs,), fname="offs") +trlistpclr = bs(l=8, cls=(armt_rlist_pclr,)) +trlist = bs(l=8, cls=(armt_rlist,)) + +rbl_wb = bs(l=3, cls=(armt_reg_wb,), fname='rb') + +offs8 = bs(l=8, cls=(arm_offspc,), fname="offs") +offs11 = bs(l=11, cls=(arm_offspc,), fname="offs") + +offs11hl = bs(l=11, cls=(arm_offspchl,), fname="offs") +hl = bs(l=1, prio=default_prio + 1, fname='hl') +off8sppc = bs(l=8, cls=(arm_off8sppc,), fname="off") + +imm8_d1 = bs(l=8, default_val="00000001") +imm8 = bs(l=8, cls=(arm_imm,), default_val = "00000001") + + +mshift_name = {'LSLS': 0, 'LSRS': 1, 'ASRS': 2} +bs_mshift_name = bs_name(l=2, name=mshift_name) + + +addsub_name = {'ADDS': 0, 'SUBS': 1} +bs_addsub_name = bs_name(l=1, name=addsub_name) + +mov_cmp_add_sub_name = {'MOVS': 0, 'CMP': 1, 'ADDS': 2, 'SUBS': 3} +bs_mov_cmp_add_sub_name = bs_name(l=2, name=mov_cmp_add_sub_name) + +alu_name = {'ANDS': 0, 'EORS': 1, 'LSLS': 2, 'LSRS': 3, + 'ASRS': 4, 'ADCS': 5, 'SBCS': 6, 'RORS': 7, + 'TST': 8, 'NEGS': 9, 'CMP': 10, 'CMN': 11, + 'ORRS': 12, 'MULS': 13, 'BICS': 14, 'MVNS': 15} +bs_alu_name = bs_name(l=4, name=alu_name) + +hiregop_name = {'ADDS': 0, 'CMP': 1, 'MOV': 2} +bs_hiregop_name = bs_name(l=2, name=hiregop_name) + +ldr_str_name = {'STR': 0, 'LDR': 1} +bs_ldr_str_name = bs_name(l=1, name=ldr_str_name) + +ldrh_strh_name = {'STRH': 0, 'LDRH': 1} +bs_ldrh_strh_name = bs_name(l=1, name=ldrh_strh_name) + +ldstsp_name = {'STR': 0, 'LDR': 1} +bs_ldstsp_name = bs_name(l=1, name=ldstsp_name) + +addsubsp_name = {'ADD': 0, 'SUB': 1} +bs_addsubsp_name = bs_name(l=1, name=addsubsp_name) + +pushpop_name = {'PUSH': 0, 'POP': 1} +bs_pushpop_name = bs_name(l=1, name=pushpop_name, fname='pp') + +tbtransfer_name = {'STMIA': 0, 'LDMIA': 1} +bs_tbtransfer_name = bs_name(l=1, name=tbtransfer_name) + +br_name = {'BEQ': 0, 'BNE': 1, 'BCS': 2, 'BCC': 3, 'BMI': 4, + 'BPL': 5, 'BVS': 6, 'BVC': 7, 'BHI': 8, 'BLS': 9, + 'BGE': 10, 'BLT': 11, 'BGT': 12, 'BLE': 13} +bs_br_name = bs_name(l=4, name=br_name) + + +armtop("mshift", [bs('000'), bs_mshift_name, off5, rsl, rdl], [rdl, rsl, off5]) +armtop("addsubr", + [bs('000110'), bs_addsub_name, rnl, rsl, rdl], [rdl, rsl, rnl]) +armtop("addsubi", + [bs('000111'), bs_addsub_name, off3, rsl, rdl], [rdl, rsl, off3]) +armtop("mcas", [bs('001'), bs_mov_cmp_add_sub_name, rnl, off8]) +armtop("alu", [bs('010000'), bs_alu_name, rsl, rdl], [rdl, rsl]) + # should not be used ?? +armtop("hiregop00", + [bs('010001'), bs_hiregop_name, bs('00'), rsl, rdl], [rdl, rsl]) +armtop("hiregop01", + [bs('010001'), bs_hiregop_name, bs('01'), rsh, rdl], [rdl, rsh]) +armtop("hiregop10", + [bs('010001'), bs_hiregop_name, bs('10'), rsl, rdh], [rdh, rsl]) +armtop("hiregop11", + [bs('010001'), bs_hiregop_name, bs('11'), rsh, rdh], [rdh, rsh]) +armtop("bx", [bs('010001'), bs('11'), bs('00'), rsl, dumrh]) +armtop("bx", [bs('010001'), bs('11'), bs('01'), rsh, dumrh]) +armtop("ldr", [bs('01001'), rdl, offpc8]) +armtop("ldrstr", [bs('0101'), bs_ldr_str_name, + trb, bs('0'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("strh", [bs('0101'), bs('00'), bs('1'), + rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldrh", [bs('0101'), bs('10'), bs('1'), + rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldsb", [bs('0101'), bs('01'), bs('1'), + rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldsh", [bs('0101'), bs('11'), bs('1'), + rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldst", [bs('011'), trb, + bs_ldr_str_name, off5bw, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldhsth", + [bs('1000'), bs_ldrh_strh_name, off5h, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldstsp", [bs('1001'), bs_ldstsp_name, rdl, offsp8], [rdl, offsp8]) +armtop("add", [bs('1010'), sppc, rdl, off8sppc], [rdl, sppc, off8sppc]) +armtop("addsp", [bs('10110000'), bs_addsubsp_name, sp, off7], [sp, off7]) +armtop("pushpop", + [bs('1011'), bs_pushpop_name, bs('10'), pclr, trlistpclr], [trlistpclr]) +armtop("btransfersp", [bs('1100'), bs_tbtransfer_name, rbl_wb, trlist]) +armtop("br", [bs('1101'), bs_br_name, offs8]) +armtop("blx", [bs("01000111"), bs('10'), rnl, bs('000')]) +armtop("swi", [bs('11011111'), tswi_i]) +armtop("b", [bs('11100'), offs11]) +armtop("und", [bs('1101'), bs('1110'), imm8_d1]) + +# +# thumnb2 ###################### +# + +# ARM Architecture Reference Manual Thumb-2 Supplement + +armt_gpreg_shift_off = Group( + gpregs_nosppc.parser + allshifts_t_armt + base_expr +).setParseAction(shift2expr) +armt_gpreg_shift_off |= gpregs_nosppc.parser + + +class arm_gpreg_nosppc(arm_reg): + reg_info = gpregs_nosppc + + +class armt_gpreg_rm_shift_off(arm_reg): + parser = armt_gpreg_shift_off + + def decode(self, v): + v = v & self.lmask + r = gpregs_nosppc.expr[v] + + i = int(self.parent.imm5_3.value) << 2 + i |= int(self.parent.imm5_2.value) + + if self.parent.stype.value < 3 or i != 0: + shift = allshifts_armt[self.parent.stype.value] + else: + shift = allshifts_armt[4] + self.expr = ExprOp(shift, r, ExprInt32(i)) + return True + + def encode(self): + e = self.expr + if isinstance(e, ExprId): + self.value = gpregs_nosppc.index(e) + self.parent.stype.value = 0 + self.parent.imm5_3.value = 0 + self.parent.imm5_2.value = 0 + return True + shift = e.op + r = gpregs_nosppc.expr.index(e.args[0]) + self.value = r + i = int(e.args[1].arg) + if shift == 'rrx': + if i != 1: + log.debug('rrx shift must be 1') + return False + self.parent.imm5_3.value = 0 + self.parent.imm5_2.value = 0 + self.parent.stype.value = 3 + return True + self.parent.stype.value = allshifts_armt.index(shift) + self.parent.imm5_2.value = i & 3 + self.parent.imm5_3.value = i >> 2 + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + return str(e) + return str(e)[1:-1] + +rn_nosppc = bs(l=4, cls=(arm_gpreg_nosppc,), fname="rn") +rd_nosppc = bs(l=4, cls=(arm_gpreg_nosppc,), fname="rd") +rm_sh = bs(l=4, cls=(armt_gpreg_rm_shift_off,), fname="rm") + + +class armt2_imm12(arm_imm): + + def decode(self, v): + v = v & self.lmask + v |= int(self.parent.imm12_3.value) << 8 + v |= int(self.parent.imm12_1.value) << 11 + + # simple encoding + if 0 <= v < 0x100: + self.expr = ExprInt32(v) + return True + # 00XY00XY form + if v >> 8 == 1: + v &= 0xFF + self.expr = ExprInt32((v << 16) | v) + return True + # XY00XY00 form + if v >> 8 == 2: + v &= 0xFF + self.expr = ExprInt32((v << 24) | (v << 8)) + return True + # XYXYXYXY + if v >> 8 == 3: + v &= 0xFF + self.expr = ExprInt32((v << 24) | (v << 16) | (v << 8) | v) + return True + r = v >> 7 + v = v & 0xFF + self.expr = ExprInt32(myror32(v, r)) + return True + + def encode(self): + v = int(self.expr.arg) + value = None + # simple encoding + if 0 <= v < 0x100: + value = v + elif v & 0xFF00FF00 == 0 and v & 0xFF == (v >> 16) & 0xff: + # 00XY00XY form + value = (1 << 8) | (v & 0xFF) + elif v & 0x00FF00FF == 0 and (v >> 8) & 0xff == (v >> 24) & 0xff: + # XY00XY00 form + value = (2 << 8) | ((v >> 8) & 0xff) + elif (v & 0xFF == + (v >> 8) & 0xFF == + (v >> 16) & 0xFF == + (v >> 24) & 0xFF): + # XYXYXYXY form + value = (3 << 8) | ((v >> 16) & 0xff) + else: + # rol encoding + for i in xrange(32): + o = myrol32(v, i) + if 0 <= o < 0x100 and o & 0x80: + value = (i << 7) | o + break + if value is None: + log.debug('cannot encode imm12') + return False + self.value = value & self.lmask + self.parent.imm12_3.value = (value >> 8) & self.parent.imm12_3.lmask + self.parent.imm12_1.value = (value >> 11) & self.parent.imm12_1.lmask + return True + + +class armt2_imm10l(arm_imm): + + def decode(self, v): + v = v & self.lmask + s = self.parent.sign.value + j1 = self.parent.j1.value + j2 = self.parent.j2.value + imm10h = self.parent.imm10h.value + imm10l = v + + i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 + + v = (s << 24) | (i1 << 23) | ( + i2 << 22) | (imm10h << 12) | (imm10l << 2) + v = sign_ext(v, 25, 32) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + s = 0 + if v & 0x80000000: + s = 1 + v = (-v) & 0xffffffff + if v > (1 << 26): + return False + i1, i2, imm10h, imm10l = (v >> 23) & 1, ( + v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 2) & 0x3ff + j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 + self.parent.sign.value = s + self.parent.j1.value = j1 + self.parent.j2.value = j2 + self.parent.imm10h.value = imm10h + self.value = imm10l + return True + + +class armt2_imm11l(arm_imm): + + def decode(self, v): + v = v & self.lmask + s = self.parent.sign.value + j1 = self.parent.j1.value + j2 = self.parent.j2.value + imm10h = self.parent.imm10h.value + imm11l = v + + i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 + + v = (s << 24) | (i1 << 23) | ( + i2 << 22) | (imm10h << 12) | (imm11l << 1) + v = sign_ext(v, 25, 32) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + s = 0 + if v & 0x80000000: + s = 1 + v = (-v) & 0xffffffff + if v > (1 << 26): + return False + i1, i2, imm10h, imm11l = (v >> 23) & 1, ( + v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 1) & 0x7ff + j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 + self.parent.sign.value = s + self.parent.j1.value = j1 + self.parent.j2.value = j2 + self.parent.imm10h.value = imm10h + self.value = imm11l + return True + + +imm12_1 = bs(l=1, fname="imm12_1", order=1) +imm12_3 = bs(l=3, fname="imm12_3", order=1) +imm12_8 = bs(l=8, cls=(armt2_imm12,), fname="imm", order=2) + + +imm5_3 = bs(l=3, fname="imm5_3") +imm5_2 = bs(l=2, fname="imm5_2") +imm_stype = bs(l=2, fname="stype") + +imm1 = bs(l=1, fname="imm1") + + +class armt_imm5_1(arm_imm): + + def decode(self, v): + v = sign_ext(((self.parent.imm1.value << 5) | v) << 1, 7, 32) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & 0x80000000: + v &= (1 << 7) - 1 + self.parent.imm1.value = (v >> 6) & 1 + self.value = (v >> 1) & 0x1f + return True + +imm5_off = bs(l=5, cls=(armt_imm5_1,), fname="imm5_off") + +tsign = bs(l=1, fname="sign") +tj1 = bs(l=1, fname="j1") +tj2 = bs(l=1, fname="j2") + +timm10H = bs(l=10, fname="imm10h") +timm10L = bs(l=10, cls=(armt2_imm10l,), fname="imm10l") +timm11L = bs(l=11, cls=(armt2_imm11l,), fname="imm11l") + + +armtop("adc", [bs('11110'), imm12_1, bs('0'), bs('1010'), scc, rn_nosppc, + bs('0'), imm12_3, rd_nosppc, imm12_8]) +armtop("adc", [bs('11101'), bs('01'), bs('1010'), scc, rn_nosppc, + bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh]) +armtop("bl", [bs('11110'), tsign, timm10H, + bs('11'), tj1, bs('1'), tj2, timm11L]) +armtop("blx", [bs('11110'), tsign, timm10H, + bs('11'), tj1, bs('0'), tj2, timm10L, bs('0')]) +armtop("cbz", [bs('101100'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) +armtop("cbnz", [bs('101110'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) diff --git a/miasm2/arch/arm/disasm.py b/miasm2/arch/arm/disasm.py new file mode 100644 index 00000000..64e10eec --- /dev/null +++ b/miasm2/arch/arm/disasm.py @@ -0,0 +1,51 @@ +from miasm2.core.asmbloc import asm_constraint, disasmEngine +from arch import mn_arm, mn_armt + + +def cb_arm_fix_call( + mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + """ + for arm: + MOV LR, PC + LDR PC, [R5, 0x14] + * is a subcall * + + """ + if len(cur_bloc.lines) < 2: + return + l1 = cur_bloc.lines[-1] + l2 = cur_bloc.lines[-2] + if l1.name != "LDR": + return + if l2.name != "MOV": + return + # print cur_bloc + # print l1 + if not l1.args[0] in mn.pc.values(): + return + if not l2.args[1] in mn.pc.values(): + return + cur_bloc.add_cst(l1.offset + 4, asm_constraint.c_next, symbol_pool) + offsets_to_dis.add(l1.offset + 4) + +cb_arm_funcs = [cb_arm_fix_call] + + +def cb_arm_disasm(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + for func in cb_arm_funcs: + func(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) + + +class dis_arm(disasmEngine): + attrib = 'arm' + + def __init__(self, bs=None, **kwargs): + super(dis_arm, self).__init__(mn_arm, self.attrib, bs, **kwargs) + self.dis_bloc_callback = cb_arm_disasm + + +class dis_armt(disasmEngine): + attrib = 'armt' + + def __init__(self, bs=None, **kwargs): + super(dis_armt, self).__init__(mn_armt, self.attrib, bs, **kwargs) diff --git a/miasm2/arch/arm/ira.py b/miasm2/arch/arm/ira.py new file mode 100644 index 00000000..7ff2c142 --- /dev/null +++ b/miasm2/arch/arm/ira.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.ir.ir import ir, irbloc +from miasm2.ir.analysis import ira +from miasm2.arch.arm.sem import ir_arm, ir_armt +from miasm2.arch.arm.regs import * +# from miasm2.core.graph import DiGraph + + +class ir_a_arm_base(ir_arm, ira): + + def __init__(self, symbol_pool=None): + ir_arm.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.R0 + + +class ir_a_arm(ir_a_arm_base): + + def __init__(self, symbol_pool=None): + ir_a_arm_base.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.R0 + + # for test XXX TODO + def set_dead_regs(self, b): + b.rw[-1][1].add(self.arch.regs.zf) + b.rw[-1][1].add(self.arch.regs.nf) + b.rw[-1][1].add(self.arch.regs.of) + b.rw[-1][1].add(self.arch.regs.cf) + + def call_effects(self, ad): + irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), + ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ]] + return irs + + def post_add_bloc(self, bloc, ir_blocs): + ir.post_add_bloc(self, bloc, ir_blocs) + # flow_graph = DiGraph() + for irb in ir_blocs: + # print 'X'*40 + # print irb + pc_val = None + lr_val = None + for exprs in irb.irs: + for e in exprs: + if e.dst == PC: + pc_val = e.src + if e.dst == LR: + lr_val = e.src + if pc_val is None or lr_val is None: + continue + if not isinstance(lr_val, ExprInt): + continue + + l = bloc.lines[-1] + if lr_val.arg != l.offset + l.l: + continue + # print 'IS CALL!' + lbl = bloc.get_next() + new_lbl = self.gen_label() + irs = self.call_effects(pc_val) + nbloc = irbloc(new_lbl, ExprId(lbl, size=self.pc.size), irs) + nbloc.lines = [l] + self.blocs[new_lbl] = nbloc + irb.dst = ExprId(new_lbl, size=self.pc.size) + + """ + if not bloc.lines: + return + l = bloc.lines[-1] + sub_call_dst = None + if not l.is_subcall(): + return + sub_call_dst = l.args[0] + if self.ExprIsLabel(sub_call_dst): + sub_call_dst = sub_call_dst.name + for b in ir_blocs: + l = b.lines[-1] + sub_call_dst_b = None + sub_call_dst_b = l.args[0] + #if self.ExprIsLabel(sub_call_dst_b): + # sub_call_dst_b = sub_call_dst.name + #if str(b.dst) == str(sub_call_dst_b): + # pass + if not l.is_subcall(): + continue + if b.dst != sub_call_dst_b: + continue + sub_call_dst_b = l.args[0] + if self.ExprIsLabel(sub_call_dst_b): + sub_call_dst_b = sub_call_dst.name + lbl = bloc.get_next() + new_lbl = self.gen_label() + irs = self.call_effects(l.args[0]) + nbloc = irbloc(new_lbl, ExprId(lbl, size=self.pc.size), irs) + nbloc.lines = [l] + self.blocs[new_lbl] = nbloc + b.dst = ExprId(new_lbl, size=self.pc.size) + return + """ + + def get_out_regs(self, b): + return set([self.ret_reg, self.sp]) + + +class ir_a_armt(ir_armt, ir_a_arm): + + def __init__(self, symbol_pool): + ir_armt.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.R0 diff --git a/miasm2/arch/arm/regs.py b/miasm2/arch/arm/regs.py new file mode 100644 index 00000000..6ddac2ef --- /dev/null +++ b/miasm2/arch/arm/regs.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * + + +# GP + +regs32_str = ["R%d" % i for i in xrange(13)] + ["SP", "LR", "PC"] +regs32_expr = [ExprId(x, 32) for x in regs32_str] + + +R0 = regs32_expr[0] +R1 = regs32_expr[1] +R2 = regs32_expr[2] +R3 = regs32_expr[3] +R4 = regs32_expr[4] +R5 = regs32_expr[5] +R6 = regs32_expr[6] +R7 = regs32_expr[7] +R8 = regs32_expr[8] +R9 = regs32_expr[9] +R10 = regs32_expr[10] +R11 = regs32_expr[11] +R12 = regs32_expr[12] +SP = regs32_expr[13] +LR = regs32_expr[14] +PC = regs32_expr[15] + +R0_init = ExprId("R0_init") +R1_init = ExprId("R1_init") +R2_init = ExprId("R2_init") +R3_init = ExprId("R3_init") +R4_init = ExprId("R4_init") +R5_init = ExprId("R5_init") +R6_init = ExprId("R6_init") +R7_init = ExprId("R7_init") +R8_init = ExprId("R8_init") +R9_init = ExprId("R9_init") +R10_init = ExprId("R10_init") +R11_init = ExprId("R11_init") +R12_init = ExprId("R12_init") +SP_init = ExprId("SP_init") +LR_init = ExprId("LR_init") +PC_init = ExprId("PC_init") + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + +zf_init = ExprId("zf_init", size=1) +nf_init = ExprId("nf_init", size=1) +of_init = ExprId("of_init", size=1) +cf_init = ExprId("cf_init", size=1) + + +all_regs_ids = [ + R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, SP, LR, PC, + zf, nf, of, cf +] + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [R0_init, R1_init, R2_init, R3_init, + R4_init, R5_init, R6_init, R7_init, + R8_init, R9_init, R10_init, R11_init, + R12_init, SP_init, LR_init, PC_init, + zf_init, nf_init, of_init, cf_init + ] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py new file mode 100644 index 00000000..d40c86eb --- /dev/null +++ b/miasm2/arch/arm/sem.py @@ -0,0 +1,1040 @@ +from miasm2.expression.expression import * +from miasm2.ir.ir import ir, irbloc +from miasm2.arch.arm.arch import mn_arm, mn_armt + +# liris.cnrs.fr/~mmrissa/lib/exe/fetch.php?media=armv7-a-r-manual.pdf + +EXCEPT_PRIV_INSN = (1 << 17) + +# CPSR: N Z C V + +reg_r0 = 'R0' +reg_r1 = 'R1' +reg_r2 = 'R2' +reg_r3 = 'R3' +reg_r4 = 'R4' +reg_r5 = 'R5' +reg_r6 = 'R6' +reg_r7 = 'R7' +reg_r8 = 'R8' +reg_r9 = 'R9' +reg_r10 = 'R10' +reg_r11 = 'R11' +reg_r12 = 'R12' +reg_sp = 'SP' +reg_lr = 'LR' +reg_pc = 'PC' + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + +R0 = ExprId(reg_r0) +R1 = ExprId(reg_r1) +R2 = ExprId(reg_r2) +R3 = ExprId(reg_r3) +R4 = ExprId(reg_r4) +R5 = ExprId(reg_r5) +R6 = ExprId(reg_r6) +R7 = ExprId(reg_r7) +R8 = ExprId(reg_r8) +R9 = ExprId(reg_r9) +R10 = ExprId(reg_r10) +R11 = ExprId(reg_r11) +R12 = ExprId(reg_r12) +SP = ExprId(reg_sp) +LR = ExprId(reg_lr) +PC = ExprId(reg_pc) + + +all_registers = [ + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + SP, + LR, + PC, +] + + +def update_flag_zf(a): + return [ExprAff(zf, ExprCond(a, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + +def update_flag_nf(a): + return [ExprAff(nf, a.msb())] + + +def update_flag_pf(a): + return [ExprAff(pf, ExprOp('parity', a))] + + +def update_flag_af(a): + return [ExprAff(af, ExprCond(a & ExprInt_from(a, 0x10), + ExprInt_from(af, 1), ExprInt_from(af, 0)))] + + +def update_flag_zn(a): + e = [] + e += update_flag_zf(a) + e += update_flag_nf(a) + return e + + +def update_flag_logic(a): + e = [] + e += update_flag_zn(a) + e.append(ExprAff(cf, ExprInt1(0))) + return e + + +def update_flag_arith(a): + e = [] + e += update_flag_zn(a) + return e + + +def check_ops_msb(a, b, c): + if not a or not b or not c or a != b or a != c: + raise ValueError('bad ops size %s %s %s' % (a, b, c)) + + +def arith_flag(a, b, c): + a_s, b_s, c_s = a.size, b.size, c.size + check_ops_msb(a_s, b_s, c_s) + a_s, b_s, c_s = a.msb(), b.msb(), c.msb() + return a_s, b_s, c_s + +# checked: ok for adc add because b & c before +cf + + +def update_flag_add_cf(a, b, c): + return ExprAff(cf, + ((((a ^ b) ^ c) ^ ((a ^ c) & (~(a ^ b)))).msb()) ^ ExprInt1(1)) + + +def update_flag_add_of(a, b, c): + return ExprAff(of, (((a ^ c) & (~(a ^ b)))).msb()) + + +# checked: ok for sbb add because b & c before +cf +def update_flag_sub_cf(a, b, c): + return ExprAff(cf, + ((((a ^ b) ^ c) ^ ((a ^ c) & (a ^ b))).msb()) ^ ExprInt1(1)) + + +def update_flag_sub_of(a, b, c): + return ExprAff(of, (((a ^ c) & (a ^ b))).msb()) + +# z = x+y (+cf?) + + +def update_flag_add(x, y, z): + e = [] + e.append(update_flag_add_cf(x, y, z)) + e.append(update_flag_add_of(x, y, z)) + return e + +# z = x-y (+cf?) + + +def update_flag_sub(x, y, z): + e = [] + e.append(update_flag_sub_cf(x, y, z)) + e.append(update_flag_sub_of(x, y, z)) + return e + + +def get_dst(a): + if a == PC: + return PC + return None + +# instruction definition ############## + + +def adc(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b + x + cf.zeroExtend(32) + if instr.name == 'ADCS' and a != PC: + e += update_flag_arith(c) + e += update_flag_add(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def add(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b + x + if instr.name == 'ADDS' and a != PC: + e += update_flag_arith(c) + e += update_flag_add(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def l_and(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b & x + if instr.name == 'ANDS' and a != PC: + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def sub(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b - x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def subs(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b - x + e += update_flag_arith(c) + e += update_flag_sub(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def eor(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b ^ x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def eors(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b ^ x + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def rsb(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = x - b + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def rsbs(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = x - b + e += update_flag_arith(c) + e += update_flag_sub(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def sbc(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = (b + cf.zeroExtend(32)) - (x + ExprInt32(1)) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def sbcs(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = (b + cf.zeroExtend(32)) - (x + ExprInt32(1)) + e += update_flag_arith(c) + e += update_flag_sub(b, x, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def rsc(ir, instr, x, a, b): + e = [] + if x is None: + b, x = a, b + c = (x + cf.zeroExtend(32)) - (b + ExprInt32(1)) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def rscs(ir, instr, x, a, b): + e = [] + if x is None: + b, x = a, b + c = (x + cf.zeroExtend(32)) - (b + ExprInt32(1)) + e.append(ExprAff(a, c)) + e += update_flag_arith(c) + e += update_flag_sub(x, b, c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def tst(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b & x + e += update_flag_logic(c) + return None, e + + +def teq(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b ^ x + e += update_flag_logic(c) + return None, e + + +def l_cmp(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b - x + e += update_flag_arith(c) + e += update_flag_sub(x, b, c) + return None, e + + +def cmn(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b + x + e += update_flag_arith(c) + e += update_flag_add(b, x, c) + return None, e + + +def orr(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b | x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def orrs(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b | x + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def mov(ir, instr, a, b): + e = [ExprAff(a, b)] + dst = get_dst(a) + return dst, e + + +def movt(ir, instr, a, b): + e = [ExprAff(a, a | b << ExprInt32(16))] + dst = get_dst(a) + return dst, e + + +def movs(ir, instr, a, b): + e = [] + e.append(ExprAff(a, b)) + # XXX TODO check + e += update_flag_logic(b) + dst = get_dst(a) + return dst, e + + +def mvn(ir, instr, a, b): + e = [ExprAff(a, b ^ ExprInt32(-1))] + dst = get_dst(a) + return dst, e + + +def mvns(ir, instr, a, b): + e = [] + c = b ^ ExprInt32(-1) + e.append(ExprAff(a, c)) + # XXX TODO check + e += update_flag_logic(c) + dst = get_dst(a) + return dst, e + + +def bic(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b & (x ^ ExprInt(uint32(-1))) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def bics(ir, instr, a, b, x=None): + e = [] + if x is None: + b, x = a, b + c = b & (x ^ ExprInt(uint32(-1))) + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def mla(ir, instr, x, a, b, c): + e = [] + d = (a * b) + c + e.append(ExprAff(x, d)) + dst = get_dst(x) + return dst, e + + +def mlas(ir, instr, x, a, b, c): + e = [] + d = (a * b) + c + e += update_flag_zn(d) + e.append(ExprAff(x, d)) + dst = get_dst(x) + return dst, e + + +def mul(ir, instr, x, a, b): + e = [] + c = a * b + e.append(ExprAff(x, c)) + dst = get_dst(x) + return dst, e + + +def muls(ir, instr, x, a, b): + e = [] + c = a * b + e += update_flag_zn(c) + e.append(ExprAff(x, c)) + dst = get_dst(x) + return dst, e + + +def b(ir, instr, a): + e = [] + e.append(ExprAff(PC, a)) + return a, e + + +def bl(ir, instr, a): + e = [] + l = ExprInt32(instr.offset + instr.l) + e.append(ExprAff(PC, a)) + e.append(ExprAff(LR, l)) + return a, e + + +def bx(ir, instr, a): + e = [] + e.append(ExprAff(PC, a)) + return a, e + + +def blx(ir, instr, a): + e = [] + l = ExprInt32(instr.offset + instr.l) + e.append(ExprAff(PC, a)) + e.append(ExprAff(LR, l)) + return a, e + + +def st_ld_r(ir, instr, a, b, store=False, size=32, s_ext=False, z_ext=False): + e = [] + wb = False + b = b.copy() + postinc = False + if isinstance(b, ExprOp): + if b.op == "wback": + wb = True + b = b.args[0] + if b.op == "postinc": + postinc = True + if isinstance(b, ExprOp) and b.op in ["postinc", 'preinc']: + # XXX TODO CHECK + base, off = b.args[0], b.args[1] # ExprInt32(size/8) + else: + base, off = b, ExprInt32(0) + # print a, wb, base, off, postinc + if postinc: + ad = base + else: + ad = base + off + + dmem = False + if size in [8, 16]: + if store: + a = a[:size] + m = ExprMem(ad, size=size) + elif s_ext: + m = ExprMem(ad, size=size).signExtend(a.size) + elif z_ext: + m = ExprMem(ad, size=size).zeroExtend(a.size) + else: + raise ValueError('unhandled case') + elif size == 32: + m = ExprMem(ad, size=size) + pass + elif size == 64: + m = ExprMem(ad, size=32) + dmem = True + a2 = ir.arch.regs.all_regs_ids[ir.arch.regs.all_regs_ids.index(a) + 1] + size = 32 + else: + raise ValueError('the size DOES matter') + dst = None + + if store: + e.append(ExprAff(m, a)) + if dmem: + e.append(ExprAff(ExprMem(ad + ExprInt32(4), size=size), a2)) + else: + if a == PC: + dst = PC + e.append(ExprAff(a, m)) + if dmem: + e.append(ExprAff(a2, ExprMem(ad + ExprInt32(4), size=size))) + + # XXX TODO check multiple write cause by wb + if wb or postinc: + e.append(ExprAff(base, base + off)) + return dst, e + + +def ldr(ir, instr, a, b): + return st_ld_r(ir, instr, a, b, store=False) + + +def ldrd(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=False, size=64) + return dst, e + + +def l_str(ir, instr, a, b): + return st_ld_r(ir, instr, a, b, store=True) + + +def l_strd(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=True, size=64) + return dst, e + + +def ldrb(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=False, size=8, z_ext=True) + return dst, e + + +def strb(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=True, size=8) + return dst, e + + +def ldrh(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=False, size=16, z_ext=True) + return dst, e + + +def strh(ir, instr, a, b): + dst, e = st_ld_r(ir, instr, a, b, store=True, size=16, z_ext=True) + return dst, e + + +def ldrsh(ir, instr, a, b): + dst, e = st_ld_r( + ir, instr, a, b, store=False, size=16, s_ext=True, z_ext=False) + return dst, e + + +def st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False): + e = [] + wb = False + # sb = False + dst = None + if isinstance(a, ExprOp) and a.op == 'wback': + wb = True + a = a.args[0] + if isinstance(b, ExprOp) and b.op == 'sbit': + # sb = True + b = b.args[0] + regs = b.args + base = a + if updown: + step = 4 + else: + step = -4 + regs = regs[::-1] + if postinc: + pass + else: + base += ExprInt32(step) + for i, r in enumerate(regs): + ad = base + ExprInt32(i * step) + if store: + e.append(ExprAff(ExprMem(ad), r)) + else: + e.append(ExprAff(r, ExprMem(ad))) + # XXX TODO check multiple write cause by wb + if wb: + if postinc: + e.append(ExprAff(a, base + ExprInt32(len(regs) * step))) + else: + e.append(ExprAff(a, base + ExprInt32((len(regs) - 1) * step))) + if store: + pass + else: + assert(isinstance(b, ExprOp) and b.op == "reglist") + if PC in b.args: + dst = PC + + return dst, e + + +def ldmia(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=True, updown=True) + + +def ldmib(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=True) + + +def ldmda(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=True, updown=False) + + +def ldmdb(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False) + + +def stmia(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=True, updown=True) + + +def stmib(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=False, updown=True) + + +def stmda(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=True, updown=False) + + +def stmdb(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=False, updown=False) + + +def svc(ir, instr, a): + # XXX TODO implement + e = [ + ExprAff(ExprId('vmmngr.exception_flags'), ExprInt32(EXCEPT_PRIV_INSN))] + return None, e + + +def und(ir, instr, a, b): + # XXX TODO implement + e = [] + return None, e + + +def lsr(ir, instr, a, b, x): + e = [] + c = b >> x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def lsrs(ir, instr, a, b, x): + e = [] + c = b >> x + e.append(ExprAff(a, c)) + e += update_flag_logic(c) + dst = get_dst(a) + return dst, e + + +def lsl(ir, instr, a, b, x): + e = [] + c = b << x + e.append(ExprAff(a, c)) + dst = get_dst(a) + return dst, e + + +def lsls(ir, instr, a, b, x): + e = [] + c = b << x + e.append(ExprAff(a, c)) + e += update_flag_logic(c) + dst = get_dst(a) + return dst, e + + +def push(ir, instr, a): + e = [] + regs = list(a.args) + for i in xrange(len(regs)): + c = SP + ExprInt32(-4 * (i + 1)) + e.append(ExprAff(regs[i], ExprMem(c))) + c = SP + ExprInt32(-4 * len(regs)) + e.append(ExprAff(SP, c)) + return None, e + + +def pop(ir, instr, a): + e = [] + regs = list(a.args) + for i in xrange(len(regs)): + c = SP + ExprInt32(4 * i) + e.append(ExprAff(regs[i], ExprMem(c))) + c = SP + ExprInt32(4 * len(regs)) + e.append(ExprAff(SP, c)) + dst = None + if PC in a.get_r(): + dst = PC + return dst, e + + +def cbz(ir, instr, a, b): + e = [] + lbl_next = ExprId(ir.get_next_label(instr), 32) + dst = ExprCond(a, lbl_next, b) + return dst, e + + +def cbnz(ir, instr, a, b): + e = [] + lbl_next = ExprId(ir.get_next_label(instr), 32) + dst = ExprCond(a, b, lbl_next) + return dst, e + + +COND_EQ = 0 +COND_NE = 1 +COND_CS = 2 +COND_CC = 3 +COND_MI = 4 +COND_PL = 5 +COND_VS = 6 +COND_VC = 7 +COND_HI = 8 +COND_LS = 9 +COND_GE = 10 +COND_LT = 11 +COND_GT = 12 +COND_LE = 13 +COND_AL = 14 +COND_NV = 15 + +cond_dct = { + COND_EQ: "EQ", + COND_NE: "NE", + COND_CS: "CS", + COND_CC: "CC", + COND_MI: "MI", + COND_PL: "PL", + COND_VS: "VS", + COND_VC: "VC", + COND_HI: "HI", + COND_LS: "LS", + COND_GE: "GE", + COND_LT: "LT", + COND_GT: "GT", + COND_LE: "LE", + COND_AL: "AL", + # COND_NV: "NV", +} + + +tab_cond = {COND_EQ: zf, + COND_NE: ExprOp('==', zf, ExprInt1(0)), + COND_CS: cf, + COND_CC: ExprOp('==', cf, ExprInt1(0)), + COND_MI: nf, + COND_PL: ExprOp('==', nf, ExprInt1(0)), + COND_VS: of, + COND_VC: ExprOp('==', of, ExprInt1(0)), + COND_HI: cf & ExprOp('==', zf, ExprInt1(0)), + # COND_HI: cf, + # COND_HI: ExprOp('==', + # ExprOp('|', cf, zf), + # ExprInt1(0)), + COND_LS: ExprOp('==', cf, ExprInt1(0)) | zf, + COND_GE: ExprOp('==', nf, of), + COND_LT: nf ^ of, + # COND_GT: ExprOp('|', + # ExprOp('==', zf, ExprInt1(0)) & (nf | of), + # ExprOp('==', nf, ExprInt1(0)) & ExprOp('==', of, ExprInt1(0))), + COND_GT: ExprOp('==', zf, ExprInt1(0)) & ExprOp('==', nf, of), + COND_LE: zf | (nf ^ of), + } + + +def is_pc_written(ir, instr_ir): + all_pc = ir.mn.pc.values() + for ir in instr_ir: + if ir.dst in all_pc: + return True, ir.dst + return False, None + + +def add_condition_expr(ir, instr, cond, instr_ir, dst): + # print "XXX", hex(instr.offset), instr + if cond == COND_AL: + return dst, instr_ir, [] + if not cond in tab_cond: + raise ValueError('unknown condition %r' % cond) + cond = tab_cond[cond] + + lbl_next = ExprId(ir.get_next_label(instr), 32) + lbl_do = ExprId(ir.gen_label(), 32) + + dst_cond = ExprCond(cond, lbl_do, lbl_next) + assert(isinstance(instr_ir, list)) + + if dst is None: + dst = lbl_next + e_do = irbloc(lbl_do.name, dst, [instr_ir]) + return dst_cond, [], [e_do] + +mnemo_func = {} +mnemo_func_cond = {} +mnemo_condm0 = {'add': add, + 'sub': sub, + 'eor': eor, + 'and': l_and, + 'rsb': rsb, + 'adc': adc, + 'sbc': sbc, + 'rsc': rsc, + + 'tst': tst, + 'teq': teq, + 'cmp': l_cmp, + 'cmn': cmn, + 'orr': orr, + 'mov': mov, + 'movt': movt, + 'bic': bic, + 'mvn': mvn, + + 'mul': mul, + 'mla': mla, + 'ldr': ldr, + 'ldrd': ldrd, + 'str': l_str, + 'strd': l_strd, + 'b': b, + 'bl': bl, + 'svc': svc, + 'und': und, + 'bx': bx, + 'ldrh': ldrh, + 'strh': strh, + 'ldrsh': ldrsh, + } + +mnemo_condm1 = {'adds': add, + 'subs': subs, + 'eors': eors, + 'ands': l_and, + 'rsbs': rsbs, + 'adcs': adc, + 'sbcs': sbcs, + 'rscs': rscs, + + 'orrs': orrs, + 'movs': movs, + 'bics': bics, + 'mvns': mvns, + + 'muls': muls, + 'mlas': mlas, + 'blx': blx, + + 'ldrb': ldrb, + 'strb': strb, + + } + +mnemo_condm2 = {'ldmia': ldmia, + 'ldmib': ldmib, + 'ldmda': ldmda, + 'ldmdb': ldmdb, + + 'ldmfa': ldmda, + 'ldmfd': ldmia, + 'ldmea': ldmdb, + 'ldmed': ldmib, # XXX + + + 'stmia': stmia, + 'stmib': stmib, + 'stmda': stmda, + 'stmdb': stmdb, + + 'stmfa': stmib, + 'stmed': stmda, + 'stmfd': stmdb, + 'stmea': stmia, + } + + +mnemo_nocond = {'lsr': lsr, + 'lsrs': lsrs, + 'lsl': lsl, + 'lsls': lsls, + 'push': push, + 'pop': pop, + 'cbz': cbz, + 'cbnz': cbnz, + } +mn_cond_x = [mnemo_condm0, + mnemo_condm1, + mnemo_condm2] + +for index, mn_base in enumerate(mn_cond_x): + for mn, mf in mn_base.items(): + for cond, cn in cond_dct.items(): + if cond == COND_AL: + cn = "" + cn = cn.lower() + if index == 0: + mn_mod = mn + cn + else: + mn_mod = mn[:-index] + cn + mn[-index:] + # print mn_mod + mnemo_func_cond[mn_mod] = cond, mf + +for name, mf in mnemo_nocond.items(): + mnemo_func_cond[name] = COND_AL, mf + + +def split_expr_dst(ir, instr_ir): + out = [] + dst = None + for i in instr_ir: + if i.dst == ir.pc: + out.append(i) + dst = ir.pc # i.src + else: + out.append(i) + return out, dst + + +def get_mnemo_expr(ir, instr, *args): + if not instr.name.lower() in mnemo_func_cond: + raise ValueError('unknown mnemo %s' % instr) + cond, mf = mnemo_func_cond[instr.name.lower()] + dst, instr_ir = mf(ir, instr, *args) + dst, instr, extra_ir = add_condition_expr(ir, instr, cond, instr_ir, dst) + return dst, instr, extra_ir + +get_arm_instr_expr = get_mnemo_expr + + +class arminfo: + mode = "arm" + # offset + + +class ir_arm(ir): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_arm, "arm", symbol_pool) + self.pc = PC + self.sp = SP + + def get_ir(self, instr): + args = instr.args + # ir = get_mnemo_expr(self, self.name.lower(), *args) + if len(args) and isinstance(args[-1], ExprOp): + if args[-1].op == 'rrx': + args[-1] = ExprCompose( + [(args[-1].args[0][1:], 0, 31), (cf, 31, 32)]) + elif (args[-1].op in ['<<', '>>', '<<a', 'a>>', '<<<', '>>>'] and + isinstance(args[-1].args[-1], ExprId)): + args[-1].args = args[-1].args[:-1] + ( + args[-1].args[-1][:8].zeroExtend(32),) + dst, instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) + # if self.name.startswith('B'): + # return instr_ir, extra_ir + for i, x in enumerate(instr_ir): + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt32(instr.offset + 8)})) + instr_ir[i] = x + for b in extra_ir: + for irs in b.irs: + for i, x in enumerate(irs): + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt32(instr.offset + 8)})) + irs[i] = x + # return out_ir, extra_ir + return dst, instr_ir, extra_ir + + +class ir_armt(ir): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_armt, "armt", symbol_pool) + self.pc = PC + self.sp = SP + + def get_ir(self, instr): + return get_mnemo_expr(self, instr, *instr.args) + diff --git a/miasm2/arch/exe64.bin b/miasm2/arch/exe64.bin new file mode 100644 index 00000000..d7b5256b --- /dev/null +++ b/miasm2/arch/exe64.bin Binary files differdiff --git a/miasm2/arch/msp430/__init__.py b/miasm2/arch/msp430/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm2/arch/msp430/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py new file mode 100644 index 00000000..74cce9ea --- /dev/null +++ b/miasm2/arch/msp430/arch.py @@ -0,0 +1,601 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import logging +from pyparsing import * +from miasm2.expression.expression import * +from miasm2.core.cpu import * +from collections import defaultdict +from miasm2.core.bin_stream import bin_stream +import regs as regs_module +from regs import * + +log = logging.getLogger("armdis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + + +def deref2expr_nooff(s, l, t): + t = t[0] + if len(t) == 1 and isinstance(t[0], ExprId): + return ExprMem(t[0], 16) + elif len(t) == 1 and isinstance(t[0], ExprInt): + return ExprMem(t[0], 16) + raise NotImplementedError('not fully functional') + + +def deref2expr_pinc(s, l, t): + t = t[0] + if len(t) == 1 and isinstance(t[0], ExprId): + return ExprOp('autoinc', t[0]) + raise NotImplementedError('not fully functional') + + +def deref2expr_off(s, l, t): + t = t[0] + if len(t) == 2 and isinstance(t[1], ExprId): + return ExprMem(t[1] + t[0], 16) + raise NotImplementedError('not fully functional') + + +def deref_expr(s, l, t): + t = t[0] + assert(len(t) == 1) + t = t[0] + if isinstance(t, ExprId): + return t + elif isinstance(t, ExprInt): + return t + elif isinstance(t, ExprMem): + return t + elif isinstance(t, ExprOp) and t.op == "autoinc": + return t + raise NotImplementedError('not fully functional') + if t[-1] == '!': + return ExprOp('wback', *t[:-1]) + return t[0] + + +def f_reg2expr(t): + t = t[0] + i = regs16_str.index(t) + r = regs16_expr[i] + return r + +# gpregs.parser.setParseAction(f_reg2expr) + +ARO = Suppress("@") +LPARENT = Suppress("(") +RPARENT = Suppress(")") + +PINC = Suppress("+") + + +def ast_id2expr(t): + if not t in mn_msp430.regs.all_regs_ids_byname: + r = ExprId(t, 16) + else: + r = mn_msp430.regs.all_regs_ids_byname[t] + return r + + +def ast_int2expr(a): + return ExprInt16(a) + + +variable, operand, base_expr = gen_base_expr() + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + + +deref_nooff = Group(ARO + base_expr).setParseAction(deref2expr_nooff) +deref_pinc = Group(ARO + base_expr + PINC).setParseAction(deref2expr_pinc) +deref_off = Group(base_expr + LPARENT + + gpregs.parser + RPARENT).setParseAction(deref2expr_off) + + +sreg_p = Group(deref_pinc | deref_nooff | + deref_off | base_expr).setParseAction(deref_expr) + + +class additional_info: + + def __init__(self): + self.except_on_instr = False + + +class instruction_msp430(instruction): + delayslot = 0 + + def dstflow(self): + if self.name.startswith('j'): + return True + return self.name in ['call'] + + def dstflow2label(self, symbol_pool): + e = self.args[0] + if not isinstance(e, ExprInt): + return + if self.name == "call": + ad = e.arg + else: + ad = e.arg + int(self.offset) + self.l + + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0] = s + + def breakflow(self): + if self.name.startswith('j'): + return True + if self.name.startswith('ret'): + return True + if self.name.startswith('int'): + return True + if self.name.startswith('mov') and self.args[1] == PC: + return True + return self.name in ['call'] + + def splitflow(self): + if self.name.startswith('jmp'): + return False + if self.name.startswith('j'): + return True + return self.name in ['call'] + + def setdstflow(self, a): + return + + def is_subcall(self): + return self.name in ['call'] + + def getdstflow(self, symbol_pool): + return [self.args[0]] + + def get_symbol_size(self, symbol, symbol_pool): + return self.mode + + def fixDstOffset(self): + e = self.args[0] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + # raise ValueError('dst must be int or label') + log.warning('dynamic dst %r' % e) + return + # return ExprInt32(e.arg - (self.offset + self.l)) + self.args[0] = ExprInt_fromsize(self.mode, e.arg) + + def get_info(self, c): + pass + + def __str__(self): + o = super(instruction_msp430, self).__str__() + return o + + def get_args_expr(self): + args = [] + for a in self.args: + # a = a.replace_expr(replace_regs[self.mode]) + args.append(a) + return args + + +mode_msp430 = None + + +class mn_msp430(cls_mn): + name = "msp430" + regs = regs_module + all_mn = [] + bintree = {} + num = 0 + delayslot = 0 + pc = {None: PC} + sp = {None: SP} + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + instruction = instruction_msp430 + max_instruction_len = 8 + + @classmethod + def getpc(cls, attrib): + return PC + + @classmethod + def getsp(cls, attrib): + return SP + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l % 16 == 00, "len %r" % l + + @classmethod + def getbits(cls, bs, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start / 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = "" + for _ in xrange(l): + n_offset = (offset & ~1) + 1 - offset % 2 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + def decoded2bytes(self, result): + tmp = super(mn_msp430, self).decoded2bytes(result) + out = [] + for x in tmp: + o = "" + while x: + o += x[:2][::-1] + x = x[2:] + out.append(o) + return out + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def additional_info(self): + info = additional_info() + return info + + @classmethod + def getmn(cls, name): + return name.upper() + + def reset_class(self): + super(mn_msp430, self).reset_class() + + def getnextflow(self, symbol_pool): + raise NotImplementedError('not fully functional') + return self.offset + 4 + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_msp430,), dct) + + +class bw_mn(bs_mod_name): + prio = 5 + mn_mod = ['.w', '.b'] + + +class msp430_sreg_arg(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = gpregs + parser = sreg_p + + def decode(self, v): + size = 16 + if hasattr(self.parent, 'size'): + size = [16, 8][self.parent.size.value] + v = v & self.lmask + e = self.reg_info.expr[v] + if self.parent.a_s.value == 0b00: + if e == R3: + self.expr = ExprInt_fromsize(size, 0) + else: + self.expr = e + elif self.parent.a_s.value == 0b01: + if e == SR: + self.expr = ExprMem(ExprInt16(self.parent.off_s.value), size) + elif e == R3: + self.expr = ExprInt_fromsize(size, 1) + else: + self.expr = ExprMem( + e + ExprInt16(self.parent.off_s.value), size) + elif self.parent.a_s.value == 0b10: + if e == SR: + self.expr = ExprInt_fromsize(size, 4) + elif e == R3: + self.expr = ExprInt_fromsize(size, 2) + else: + self.expr = ExprMem(e, size) + elif self.parent.a_s.value == 0b11: + if e == SR: + self.expr = ExprInt_fromsize(size, 8) + elif e == R3: + if self.parent.size.value == 0: + self.expr = ExprInt_fromsize(size, 0xffff) + else: + self.expr = ExprInt_fromsize(size, 0xff) + elif e == PC: + self.expr = ExprInt_fromsize(size, self.parent.off_s.value) + else: + self.expr = ExprOp('autoinc', e) + else: + raise NotImplementedError( + "unknown value self.parent.a_s.value = " + + "%d" % self.parent.a_s.value) + return True + + def encode(self): + e = self.expr + if e in self.reg_info.expr: + self.parent.a_s.value = 0 + self.value = self.reg_info.expr.index(e) + elif isinstance(e, ExprInt): + v = int(e.arg) + if v == 0xffff and self.parent.size.value == 0: + self.parent.a_s.value = 0b11 + self.value = 3 + elif v == 0xff and self.parent.size.value == 1: + self.parent.a_s.value = 0b11 + self.value = 3 + elif v == 2: + self.parent.a_s.value = 0b10 + self.value = 3 + elif v == 1: + self.parent.a_s.value = 0b01 + self.value = 3 + elif v == 8: + self.parent.a_s.value = 0b11 + self.value = 2 + elif v == 4: + self.parent.a_s.value = 0b10 + self.value = 2 + elif v == 0: + self.parent.a_s.value = 0b00 + self.value = 3 + else: + self.parent.a_s.value = 0b11 + self.value = 0 + self.parent.off_s.value = v + elif isinstance(e, ExprMem): + if isinstance(e.arg, ExprId): + self.parent.a_s.value = 0b10 + self.value = self.reg_info.expr.index(e.arg) + elif isinstance(e.arg, ExprInt): + self.parent.a_s.value = 0b01 + self.value = self.reg_info.expr.index(SR) + self.parent.off_s.value = int(e.arg.arg) + elif isinstance(e.arg, ExprOp): + self.parent.a_s.value = 0b01 + self.value = self.reg_info.expr.index(e.arg.args[0]) + self.parent.off_s.value = int(e.arg.args[1].arg) + else: + raise NotImplementedError( + 'unknown instance e.arg = %s' % type(e.arg)) + elif isinstance(e, ExprOp) and e.op == "autoinc": + self.parent.a_s.value = 0b11 + self.value = self.reg_info.expr.index(e.args[0]) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + o = str(e) + elif isinstance(e, ExprInt): + o = str(e) + elif isinstance(e, ExprOp) and e.op == "autoinc": + o = "@%s+" % str(e.args[0]) + elif isinstance(e, ExprMem): + if isinstance(e.arg, ExprId): + o = "@%s" % e.arg + elif isinstance(e.arg, ExprInt): + o = "@%s" % e.arg + elif isinstance(e.arg, ExprOp): + o = "%s(%s)" % (e.arg.args[1], e.arg.args[0]) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return o + + +class msp430_dreg_arg(msp430_sreg_arg): + prio = default_prio + 1 + reg_info = gpregs + parser = sreg_p + + def decode(self, v): + if hasattr(self.parent, 'size'): + size = [16, 8][self.parent.size.value] + else: + size = 16 + + v = v & self.lmask + e = self.reg_info.expr[v] + if self.parent.a_d.value == 0: + self.expr = e + elif self.parent.a_d.value == 1: + if e == SR: + x = ExprInt16(self.parent.off_d.value) + else: + x = e + ExprInt16(self.parent.off_d.value) + self.expr = ExprMem(x, size) + else: + raise NotImplementedError( + "unknown value self.parent.a_d.value = " + + "%d" % self.parent.a_d.value) + return True + + def encode(self): + e = self.expr + if e in self.reg_info.expr: + self.parent.a_d.value = 0 + self.value = self.reg_info.expr.index(e) + elif isinstance(e, ExprMem): + if isinstance(e.arg, ExprId): + r, i = e.arg, ExprInt16(0) + elif isinstance(e.arg, ExprOp): + r, i = e.arg.args[0], e.arg.args[1] + elif isinstance(e.arg, ExprInt): + r, i = SR, e.arg + else: + raise NotImplementedError( + 'unknown instance e.arg = %s' % type(e.arg)) + self.parent.a_d.value = 1 + self.value = self.reg_info.expr.index(r) + self.parent.off_d.value = int(i.arg) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return True + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + o = str(e) + elif isinstance(e, ExprMem): + if isinstance(e.arg, ExprId): + o = "0x0(%s)" % e.arg + elif isinstance(e.arg, ExprInt): + o = "@%s" % e.arg + elif isinstance(e.arg, ExprOp): + o = "%s(%s)" % (e.arg.args[1], e.arg.args[0]) + else: + raise NotImplementedError( + 'unknown instance e.arg = %s' % type(e.arg)) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return o + + +class bs_cond_off_s(bs_cond): + + @classmethod + def flen(cls, mode, v): + if v['a_s'] == 0b00: + return None + elif v['a_s'] == 0b01: + if v['sreg'] in [3]: + return None + else: + return 16 + elif v['a_s'] == 0b10: + return None + elif v['a_s'] == 0b11: + """ + if v['sreg'] in [2, 3]: + return None + else: + return 16 + """ + if v['sreg'] in [0]: + return 16 + else: + return None + else: + raise NotImplementedError("unknown value v[a_s] = %d" % v['a_s']) + + def encode(self): + return super(bs_cond, self).encode() + + def decode(self, v): + if self.l == 0: + self.value = None + self.value = v + return True + + +class bs_cond_off_d(bs_cond_off_s): + + @classmethod + def flen(cls, mode, v): + if v['a_d'] == 0: + return None + elif v['a_d'] == 1: + return 16 + else: + raise NotImplementedError("unknown value v[a_d] = %d" % v['a_d']) + + +class msp430_offs(imm_noarg, m_arg): + parser = base_expr + + def int2expr(self, v): + if v & ~self.intmask != 0: + return None + return ExprInt_fromsize(16, v) + + def decodeval(self, v): + return v << 1 + + def encodeval(self, v): + return v >> 1 + + def decode(self, v): + v = v & self.lmask + if (1 << (self.l - 1)) & v: + v |= ~0 ^ self.lmask + v = self.decodeval(v) + self.expr = ExprInt16(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if (1 << (self.l - 1)) & v: + v = -((0xffff ^ v) + 1) + v = self.encodeval(v) + self.value = (v & 0xffff) & self.lmask + return True + + +off_s = bs(l=16, order=-10, cls=(bs_cond_off_s,), fname = "off_s") +off_d = bs(l=16, order=-10, cls=(bs_cond_off_d,), fname = "off_d") + +a_s = bs(l=2, order=-4, fname='a_s') +a_d = bs(l=1, order=-6, fname='a_d') + +a_d2 = bs(l=2, order=-2, fname='a_d') + +sreg = bs(l=4, order=-3, cls=(msp430_sreg_arg,), fname='sreg') +dreg = bs(l=4, order=-5, cls=(msp430_dreg_arg,), fname='dreg') + +bw = bw_mn(l=1, order=-10, mn_mod=['.w', '.b'], fname='size') + +bs_f1 = bs_name( + l=4, name={ + 'mov': 4, 'add': 5, 'addc': 6, 'subc': 7, 'sub': 8, 'cmp': 9, + 'dadd': 10, 'bit': 11, 'bic': 12, 'bis': 13, 'xor': 14, 'and': 15}) +addop("f1", [bs_f1, sreg, a_d, bw, a_s, dreg, off_s, off_d]) + +bs_f2 = bs_name(l=3, name={'rrc': 0, 'rra': 2, + 'push': 4}) +addop("f2_1", [bs('000100'), bs_f2, bw, a_s, sreg, off_s]) + + +bs_f2_nobw = bs_name(l=3, name={'swpb': 1, 'sxt': 3, + 'call': 5}) +addop("f2_2", [bs('000100'), bs_f2_nobw, bs('0'), a_s, sreg, off_s]) + + +offimm = bs(l=10, cls=(msp430_offs,), fname="offs") + +bs_f2_jcc = bs_name(l=3, name={'jnz': 0, 'jz': 1, 'jnc': 2, 'jc': 3, 'jn': 4, + 'jge': 5, 'jl': 6, 'jmp': 7}) +addop("f2_3", [bs('001'), bs_f2_jcc, offimm]) diff --git a/miasm2/arch/msp430/disasm.py b/miasm2/arch/msp430/disasm.py new file mode 100644 index 00000000..f0138bdf --- /dev/null +++ b/miasm2/arch/msp430/disasm.py @@ -0,0 +1,8 @@ +from miasm2.core.asmbloc import disasmEngine +from arch import mn_msp430 + + +class dis_msp430(disasmEngine): + + def __init__(self, bs=None, **kwargs): + super(dis_msp430, self).__init__(mn_msp430, None, bs, **kwargs) diff --git a/miasm2/arch/msp430/ira.py b/miasm2/arch/msp430/ira.py new file mode 100644 index 00000000..8e9a70dd --- /dev/null +++ b/miasm2/arch/msp430/ira.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.ir.ir import ir, irbloc +from miasm2.ir.analysis import ira +from miasm2.arch.msp430.sem import ir_msp430 +from miasm2.arch.msp430.regs import * +# from miasm2.core.graph import DiGraph + + +class ir_a_msp430_base(ir_msp430, ira): + + def __init__(self, symbol_pool=None): + ir_msp430.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.R15 + + +class ir_a_msp430(ir_a_msp430_base): + + def __init__(self, symbol_pool=None): + ir_a_msp430_base.__init__(self, symbol_pool) + + # for test XXX TODO + def set_dead_regs(self, b): + b.rw[-1][1].add(self.arch.regs.zf) + b.rw[-1][1].add(self.arch.regs.nf) + b.rw[-1][1].add(self.arch.regs.of) + b.rw[-1][1].add(self.arch.regs.cf) + + b.rw[-1][1].add(self.arch.regs.res) + b.rw[-1][1].add(self.arch.regs.scg1) + b.rw[-1][1].add(self.arch.regs.scg0) + b.rw[-1][1].add(self.arch.regs.osc) + b.rw[-1][1].add(self.arch.regs.cpuoff) + b.rw[-1][1].add(self.arch.regs.gie) + + def call_effects(self, ad): + irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), + ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ]] + return irs + + def post_add_bloc(self, bloc, ir_blocs): + ir.post_add_bloc(self, bloc, ir_blocs) + # flow_graph = DiGraph() + + l = bloc.lines[-1] + if not l.is_subcall(): + return + + for irb in ir_blocs: + # print 'X'*40 + # print irb + pc_val = None + for exprs in irb.irs: + for e in exprs: + if e.dst == PC: + pc_val = e.src + if pc_val is None: + continue + + l = bloc.lines[-1] + # print str(l), 'IS CALL!' + lbl = bloc.get_next() + new_lbl = self.gen_label() + irs = self.call_effects(pc_val) + nbloc = irbloc(new_lbl, ExprId(lbl, size=self.pc.size), irs) + nbloc.lines = [l] + self.blocs[new_lbl] = nbloc + irb.dst = ExprId(new_lbl, size=self.pc.size) + + def get_out_regs(self, b): + return set([self.ret_reg, self.sp]) + diff --git a/miasm2/arch/msp430/regs.py b/miasm2/arch/msp430/regs.py new file mode 100644 index 00000000..7a389ae1 --- /dev/null +++ b/miasm2/arch/msp430/regs.py @@ -0,0 +1,105 @@ +from miasm2.expression.expression import * +from miasm2.core.cpu import reg_info + + +# GP + +regs16_str = ["PC", "SP", "SR"] + ["R%d" % i for i in xrange(3, 16)] +regs16_expr = [ExprId(x, 16) for x in regs16_str] + + +gpregs = reg_info(regs16_str, regs16_expr) + +PC = regs16_expr[0] +SP = regs16_expr[1] +SR = regs16_expr[2] +R3 = regs16_expr[3] +R4 = regs16_expr[4] +R5 = regs16_expr[5] +R6 = regs16_expr[6] +R7 = regs16_expr[7] +R8 = regs16_expr[8] +R9 = regs16_expr[9] +R10 = regs16_expr[10] +R11 = regs16_expr[11] +R12 = regs16_expr[12] +R13 = regs16_expr[13] +R14 = regs16_expr[14] +R15 = regs16_expr[15] + +PC_init = ExprId("PC_init", 16) +SP_init = ExprId("SP_init", 16) +SR_init = ExprId("SR_init", 16) +R3_init = ExprId("R3_init", 16) +R4_init = ExprId("R4_init", 16) +R5_init = ExprId("R5_init", 16) +R6_init = ExprId("R6_init", 16) +R7_init = ExprId("R7_init", 16) +R8_init = ExprId("R8_init", 16) +R9_init = ExprId("R9_init", 16) +R10_init = ExprId("R10_init", 16) +R11_init = ExprId("R11_init", 16) +R12_init = ExprId("R12_init", 16) +R13_init = ExprId("R13_init", 16) +R14_init = ExprId("R14_init", 16) +R15_init = ExprId("R15_init", 16) + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' +reg_cpuoff = 'cpuoff' +reg_gie = 'gie' +reg_osc = 'osc' +reg_scg0 = 'scg0' +reg_scg1 = 'scg1' +reg_res = 'res' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + +cpuoff = ExprId(reg_cpuoff, size=1) +gie = ExprId(reg_gie, size=1) +osc = ExprId(reg_osc, size=1) +scg0 = ExprId(reg_scg0, size=1) +scg1 = ExprId(reg_scg1, size=1) +res = ExprId(reg_res, size=7) + + +zf_init = ExprId("zf_init", size=1) +nf_init = ExprId("nf_init", size=1) +of_init = ExprId("of_init", size=1) +cf_init = ExprId("cf_init", size=1) + + +cpuoff_init = ExprId("cpuoff_init", size=1) +gie_init = ExprId("gie_init", size=1) +osc_init = ExprId("osc_init", size=1) +scg0_init = ExprId("scg0_init", size=1) +scg1_init = ExprId("scg1_init", size=1) +res_init = ExprId("res_init", size=7) + + +all_regs_ids = [ + PC, SP, SR, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, + zf, nf, of, cf, + cpuoff, gie, osc, scg0, scg1, res, +] + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [PC_init, SP_init, SR_init, R3_init, + R4_init, R5_init, R6_init, R7_init, + R8_init, R9_init, R10_init, R11_init, + R12_init, R13_init, R14_init, R15_init, + zf_init, nf_init, of_init, cf_init, + cpuoff_init, gie_init, osc_init, + scg0_init, scg1_init, res_init, + ] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py new file mode 100644 index 00000000..6fea2c21 --- /dev/null +++ b/miasm2/arch/msp430/sem.py @@ -0,0 +1,440 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.arch.msp430.regs import * +from miasm2.arch.msp430.arch import mn_msp430 +from miasm2.ir.ir import ir +from regs import * + + +# Utils +def hex2bcd(val): + "Return val as BCD" + try: + return int("%x" % val, 10) + except ValueError: + raise NotImplementedError("Not defined behaviour") + + +def bcd2hex(val): + "Return the hex value of a BCD" + try: + return int("0x%d" % val, 16) + except ValueError: + raise NotImplementedError("Not defined behaviour") + + +def reset_sr_res(): + return [ExprAff(res, ExprInt_fromsize(7, 0))] + + +def update_flag_zf(a): + return [ExprAff(zf, ExprCond(a, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + +def update_flag_nf(a): + return [ExprAff(nf, a.msb())] + + +def update_flag_pf(a): + return [ExprAff(pf, ExprOp('parity', a & ExprInt_from(a, 0xFF)))] + + +def update_flag_cf_inv_zf(a): + return [ExprAff(cf, ExprCond(a, ExprInt_from(cf, 1), ExprInt_from(cf, 0)))] + + +def update_flag_zn_r(a): + e = [] + e += update_flag_zf(a) + e += update_flag_nf(a) + e += reset_sr_res() + return e + + +def update_flag_sub_cf(a, b, c): + return [ExprAff(cf, + ((((a ^ b) ^ c) ^ ((a ^ c) & (a ^ b))).msb()) ^ ExprInt1(1))] + + +def update_flag_add_cf(a, b, c): + return [ExprAff(cf, (((a ^ b) ^ c) ^ ((a ^ c) & (~(a ^ b)))).msb())] + + +def update_flag_add_of(a, b, c): + return [ExprAff(of, (((a ^ c) & (~(a ^ b)))).msb())] + + +def update_flag_sub_of(a, b, c): + return [ExprAff(of, (((a ^ c) & (a ^ b))).msb())] + + +def mng_autoinc(a, b, size): + e = [] + if not (isinstance(a, ExprOp) and a.op == "autoinc"): + return e, a, b + + a_r = a.args[0] + e.append(ExprAff(a_r, a_r + ExprInt_from(a_r, size / 8))) + a = ExprMem(a_r, size) + if isinstance(b, ExprMem) and a_r in b.arg: + b = ExprMem(b.arg + ExprInt16(size / 8), b.size) + return e, a, b + +# Mnemonics + + +def mov_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + if isinstance(b, ExprMem): + b = ExprMem(b.arg, 8) + a = a[:8] + else: + a = a[:8].zeroExtend(16) + e.append(ExprAff(b, a)) + return None, e, [] + + +def mov_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + e.append(ExprAff(b, a)) + if b == ir.pc: + dst = PC + else: + dst = None + return dst, e, [] + + +def and_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + c = a[:8] & b[:8] + e.append(ExprAff(b, c.zeroExtend(16))) + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e += [ExprAff(of, ExprInt1(0))] + return None, e, [] + + +def and_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = a & b + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e += [ExprAff(of, ExprInt1(0))] + return None, e, [] + + +def bic_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + c = (a[:8] ^ ExprInt8(0xff)) & b[:8] + c = c.zeroExtend(b.size) + e.append(ExprAff(b, c)) + return None, e, [] + + +def bic_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = (a ^ ExprInt16(0xffff)) & b + e.append(ExprAff(b, c)) + return None, e, [] + + +def bis_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = a | b + e.append(ExprAff(b, c)) + return None, e, [] + + +def bit_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = a & b + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e.append(ExprAff(of, ExprInt1(0))) + return None, e, [] + +""" +def sub_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + c = b - a + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_sub_cf(b, a, c) + return None, e, [] +""" + + +def sub_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = b - a + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_sub_cf(b, a, c) + # micrcorruption + # e += update_flag_sub_of(a, b, c) + # e += update_flag_sub_of(b, a, c) + return None, e, [] + + +def add_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = b + a + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_add_cf(a, b, c) + e += update_flag_add_of(a, b, c) + return None, e, [] + + +def dadd_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + # TODO: microcorruption no carryflag + c = ExprOp("bcdadd", b, a) # +zeroExtend(cf, 16)) + + e.append(ExprAff(b, c)) + # e += update_flag_zn_r(c) + + # micrcorruption + e += update_flag_zf(a) + # e += update_flag_nf(a) + e += reset_sr_res() + + e.append(ExprAff(cf, ExprOp("bcdadd_cf", b, a))) # +zeroExtend(cf, 16)))) + + # of : undefined + return None, e, [] + + +def xor_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = b ^ a + e.append(ExprAff(b, c)) + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e.append(ExprAff(of, b.msb() & a.msb())) + return None, e, [] + + +def push_w(ir, instr, a): + e = [] + e.append(ExprAff(ExprMem(SP - ExprInt16(2), 16), a)) + e.append(ExprAff(SP, SP - ExprInt16(2))) + return None, e, [] + + +def call(ir, instr, a): + e, a, dummy = mng_autoinc(a, None, 16) + n = ExprId(ir.get_next_label(instr), 16) + e.append(ExprAff(ExprMem(SP - ExprInt16(2), 16), n)) + e.append(ExprAff(SP, SP - ExprInt16(2))) + e.append(ExprAff(PC, a)) + return PC, e, [] + + +def swpb(ir, instr, a): + e = [] + x, y = a[:8], a[8:16] + e.append(ExprAff(a, ExprCompose([(y, 0, 8), + (x, 8, 16)]))) + return None, e, [] + + +def cmp_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = b - a + e += update_flag_zn_r(c) + e += update_flag_sub_cf(a, b, c) + e += update_flag_sub_of(a, b, c) + return None, e, [] + + +def cmp_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + c = b[:8] - a[:8] + e += update_flag_zn_r(c) + e += update_flag_sub_cf(a[:8], b[:8], c) + e += update_flag_sub_of(a[:8], b[:8], c) + return None, e, [] + + +def jz(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(zf, a, n))) + return PC, e, [] + + +def jnz(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(zf, n, a))) + return PC, e, [] + + +def jl(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(nf ^ of, a, n))) + return PC, e, [] + + +def jc(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(cf, a, n))) + return PC, e, [] + + +def jnc(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(cf, n, a))) + return PC, e, [] + + +def jge(ir, instr, a): + n = ExprId(ir.get_next_label(instr), 16) + e = [] + e.append(ExprAff(PC, ExprCond(nf ^ of, n, a))) + return PC, e, [] + + +def jmp(ir, instr, a): + e = [] + e.append(ExprAff(PC, a)) + return PC, e, [] + + +def rrc_w(ir, instr, a): + e = [] + c = ExprCompose([(a[1:16], 0, 15), + (cf, 15, 16)]) + e.append(ExprAff(a, c)) + e.append(ExprAff(cf, a[:1])) + # e += update_flag_zn_r(c) + + # micrcorruption + e += update_flag_zf(a) + # e += update_flag_nf(a) + e += reset_sr_res() + + e.append(ExprAff(of, ExprInt1(0))) + return None, e, [] + + +def rra_w(ir, instr, a): + e = [] + c = ExprCompose([(a[1:16], 0, 15), + (a[15:16], 15, 16)]) + e.append(ExprAff(a, c)) + # TODO: error in disasm microcorruption? + # e.append(ExprAff(cf, a[:1])) + # e += update_flag_zn_r(c) + + # micrcorruption + e += update_flag_zf(a) + # e += update_flag_nf(a) + e += reset_sr_res() + + e.append(ExprAff(of, ExprInt1(0))) + return None, e, [] + + +def sxt(ir, instr, a): + e = [] + c = a[:8].signExtend(16) + e.append(ExprAff(a, c)) + + e += update_flag_zn_r(c) + e += update_flag_cf_inv_zf(c) + e.append(ExprAff(of, ExprInt1(0))) + + return None, e, [] + +mnemo_func = { + "mov.b": mov_b, + "mov.w": mov_w, + "and.b": and_b, + "and.w": and_w, + "bic.b": bic_b, + "bic.w": bic_w, + "bis.w": bis_w, + "bit.w": bit_w, + "sub.w": sub_w, + "add.w": add_w, + "push.w": push_w, + "dadd.w": dadd_w, + "xor.w": xor_w, + "call": call, + "swpb": swpb, + "cmp.w": cmp_w, + "cmp.b": cmp_b, + "jz": jz, + "jnz": jnz, + "jl": jl, + "jc": jc, + "jnc": jnc, + "jmp": jmp, + "jge": jge, + "rrc.w": rrc_w, + "rra.w": rra_w, + "sxt": sxt, +} + + +composed_sr = ExprCompose([ + (cf, 0, 1), + (zf, 1, 2), + (nf, 2, 3), + (gie, 3, 4), + (cpuoff, 4, 5), + (osc, 5, 6), + (scg0, 6, 7), + (scg1, 7, 8), + (of, 8, 9), + (res, 9, 16), +]) + + +def ComposeExprAff(dst, src): + e = [] + for x, start, stop in dst.args: + e.append(ExprAff(x, src[start:stop])) + return e + + +class ir_msp430(ir): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_msp430, None, symbol_pool) + self.pc = PC + self.sp = SP + + def mod_pc(self, instr, instr_ir, extra_ir): + pass + + def get_ir(self, instr): + # print instr#, args + args = instr.args + dst, instr_ir, extra_ir = mnemo_func[instr.name](self, instr, *args) + self.mod_sr(instr, instr_ir, extra_ir) + + return dst, instr_ir, extra_ir + + def mod_sr(self, instr, instr_ir, extra_ir): + for i, x in enumerate(instr_ir): + x.src = x.src.replace_expr({SR: composed_sr}) + if x.dst != SR: + continue + xx = ComposeExprAff(composed_sr, x.src) + instr_ir[i:i + 1] = xx + for i, x in enumerate(instr_ir): + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt16(instr.offset + instr.l)})) + instr_ir[i] = x + + if extra_ir: + raise NotImplementedError('not fully functional') diff --git a/miasm2/arch/sh4/__init__.py b/miasm2/arch/sh4/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/arch/sh4/__init__.py diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py new file mode 100644 index 00000000..c2029000 --- /dev/null +++ b/miasm2/arch/sh4/arch.py @@ -0,0 +1,1404 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import os +from pyparsing import * +from miasm2.core.cpu import * +from miasm2.expression.expression import * +from collections import defaultdict +from regs import * + + +jra = ExprId('jra') +jrb = ExprId('jrb') +jrc = ExprId('jrc') + + +# parser helper ########### +PLUS = Suppress("+") +MULT = Suppress("*") +MINUS = Suppress("-") +AND = Suppress("&") +LBRACK = Suppress("[") +RBRACK = Suppress("]") +DEREF = Suppress("@") +COMMA = Suppress(",") +LPARENT = Suppress("(") +RPARENT = Suppress(")") + + +def parse_deref_pcimm(t): + t = t[0] + return t[0] + t[1] + + +def parse_pcandimmimm(t): + t = t[0] + return (t[0] & t[1]) + t[2] + +def ast_id2expr(a): + return ExprId(a, 32) + +def ast_int2expr(a): + return ExprInt32(a) + + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + +int_or_expr = base_expr + +ref_pc = Group(LPARENT + regi_pc.parser + COMMA + + int_or_expr + RPARENT).setParseAction(parse_deref_pcimm) +ref_pcandimm = Group( + LPARENT + regi_pc.parser + AND + int_or_expr + + COMMA + int_or_expr + RPARENT).setParseAction(parse_pcandimmimm) + + +pcdisp = Group(regi_pc.parser + AND + int_or_expr + + PLUS + int_or_expr).setParseAction(parse_pcandimmimm) + +PTR = Suppress('PTR') + + +def parse_deref_mem(s, l, t): + t = t[0] + e = ExprMem(t[0], 32) + return e + + +def parse_predec(s, l, t): + t = t[0] + e = ExprOp('predec', t[0]) + return e + + +def parse_postinc(s, l, t): + t = t[0] + e = ExprOp('postinc', t[0]) + return e + + +def parse_regdisp(t): + t = t[0] + e = ExprMem(t[0] + t[1]) + return e + + +def parse_regreg(t): + t = t[0] + e = ExprMem(t[0] + t[1]) + return e + + +deref_pc = Group(DEREF + ref_pc).setParseAction(parse_deref_mem) +deref_pcimm = Group(DEREF + ref_pcandimm).setParseAction(parse_deref_mem) + +dgpregs_base = Group(DEREF + gpregs.parser).setParseAction(parse_deref_mem) +dgpregs_predec = Group( + DEREF + MINUS + gpregs.parser).setParseAction(parse_predec) +dgpregs_postinc = Group( + DEREF + gpregs.parser + PLUS).setParseAction(parse_postinc) + +dgpregs = dgpregs_base | dgpregs_predec | dgpregs_postinc + +d_gpreg_gpreg = Group(DEREF + + LPARENT + gpregs.parser + COMMA + gpregs.parser + RPARENT + ).setParseAction(parse_regdisp) +dgpregs_p = dgpregs_predec | dgpregs_postinc + + +dgpregs_ir = Group(DEREF + LPARENT + gpregs.parser + + COMMA + int_or_expr + RPARENT).setParseAction(parse_regdisp) +dgpregs_ir |= d_gpreg_gpreg + +dgbr_imm = Group(DEREF + LPARENT + regi_gbr.parser + + COMMA + int_or_expr + RPARENT).setParseAction(parse_regdisp) + +dgbr_reg = Group(DEREF + LPARENT + regi_gbr.parser + + COMMA + gpregs.parser + RPARENT).setParseAction(parse_regreg) + + +class sh4_reg(reg_noarg, m_arg): + pass + + +class sh4_gpreg(sh4_reg): + reg_info = gpregs + parser = reg_info.parser + + +class sh4_dr(sh4_reg): + reg_info = dregs + parser = reg_info.parser + + +class sh4_bgpreg(sh4_reg): + reg_info = bgpregs + parser = reg_info.parser + + +class sh4_gpreg_noarg(reg_noarg, ): + reg_info = gpregs + parser = reg_info.parser + + +class sh4_freg(sh4_reg): + reg_info = fregs + parser = reg_info.parser + + +class sh4_dgpreg(m_arg): + parser = dgpregs_base + + def fromstring(self, s, parser_result=None): + start, stop = super(sh4_dgpreg, self).fromstring(s, parser_result) + if start is None: + return start, stop + self.expr = ExprMem(self.expr.arg, self.sz) + return start, stop + + def decode(self, v): + r = gpregs.expr[v] + self.expr = ExprMem(r, self.sz) + return True + + def encode(self): + e = self.expr + if not isinstance(e, ExprMem): + return False + if not isinstance(e.arg, ExprId): + return False + v = gpregs.expr.index(e.arg) + self.value = v + return True + + @staticmethod + def arg2str(e): + ad = e.arg + if isinstance(ad, ExprOp): + s = ','.join([str(x).replace('(', '').replace(')', '') + for x in ad.args]) + s = "@(%s)" % s + else: + s = "@%s" % ad + return s + + +class sh4_dgpregpinc(m_arg): + parser = dgpregs_p + + def fromstring(self, s, parser_result=None): + start, stop = super(sh4_dgpregpinc, self).fromstring(s, parser_result) + if not isinstance(self.expr, ExprOp): + return None, None + if self.expr.op != self.op: + return None, None + return start, stop + + def decode(self, v): + r = gpregs.expr[v] + e = ExprOp(self.op, r, ExprInt32(self.sz)) + self.expr = e + return True + + def encode(self): + e = self.expr + res = MatchExpr(e, ExprOp(self.op, jra), [jra]) + if not res: + return False + r = res[jra] + if not r in gpregs.expr: + return False + v = gpregs.expr.index(r) + self.value = v + return True + + @staticmethod + def arg2str(e): + if e.op == "predec": + o = '-%s' % e.args[0] + elif e.op == "postinc": + o = '%s+' % e.args[0] + else: + raise ValueError('unknown e.op: %s' % e.op) + return "@%s" % o + + +class sh4_dgpregpdec(m_arg): + parser = dgpregs_postinc + op = "preinc" + + +class sh4_dgpreg_imm(sh4_dgpreg): + parser = dgpregs_ir + + def decode(self, v): + p = self.parent + r = gpregs.expr[v] + s = self.sz + d = ExprInt32(p.disp.value * s / 8) + e = ExprMem(r + d, s) + self.expr = e + return True + + def encode(self): + e = self.expr + p = self.parent + s = self.sz + if not isinstance(e, ExprMem): + return False + if isinstance(e.arg, ExprId): + v = gpregs.expr.index(e.arg) + p.disp.value = 0 + elif isinstance(e.arg, ExprOp): + res = MatchExpr(e, ExprMem(jra + jrb, self.sz), [jra, jrb]) + if not res: + return False + if not isinstance(res[jra], ExprId): + return False + if not isinstance(res[jrb], ExprInt): + return False + d = int(res[jrb].arg) + p.disp.value = d / (s / 8) + if not res[jra] in gpregs.expr: + return False + v = gpregs.expr.index(res[jra]) + else: + return False + self.value = v + return True + + +class sh4_imm(imm_noarg, m_arg): + parser = base_expr + pass + + +class sh4_simm(sh4_imm): + parser = base_expr + + def decode(self, v): + v = sign_ext(v, self.l, 32) + v = self.decodeval(v) + self.expr = ExprInt32(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if (1 << (self.l - 1)) & v: + v = -((0xffffffff ^ v) + 1) + v = self.encodeval(v) + self.value = (v & 0xffffffff) & self.lmask + return True + + +class sh4_dpc16imm(sh4_dgpreg): + parser = deref_pc + + def decode(self, v): + self.expr = ExprMem(PC + ExprInt32(v * 2 + 4), 16) + return True + + def calcdisp(self, v): + v = (int(v.arg) - 4) / 2 + if not 0 < v <= 0xff: + return None + return v + + def encode(self): + res = MatchExpr(self.expr, ExprMem(PC + jra, 16), [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + v = self.calcdisp(res[jra]) + if v is None: + return False + self.value = v + return True + + +class sh4_dgbrimm8(sh4_dgpreg): + parser = dgbr_imm + + def decode(self, v): + s = self.sz + self.expr = ExprMem(GBR + ExprInt32(v * s / 8), s) + return True + + def encode(self): + e = self.expr + s = self.sz + if e == ExprMem(GBR): + self.value = 0 + return True + res = MatchExpr(self.expr, ExprMem(GBR + jra, s), [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + self.value = int(res[jra].arg) / (s / 8) + return True + + +class sh4_dpc32imm(sh4_dpc16imm): + parser = deref_pcimm + + def decode(self, v): + self.expr = ExprMem( + (PC & ExprInt32(0xfffffffc)) + ExprInt32(v * 4 + 4)) + return True + + def calcdisp(self, v): + v = (int(v.arg) - 4) / 4 + if not 0 < v <= 0xff: + return None + return v + + def encode(self): + res = MatchExpr( + self.expr, ExprMem((PC & ExprInt32(0xFFFFFFFC)) + jra, 32), [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + v = self.calcdisp(res[jra]) + if v is None: + return False + self.value = v + return True + + +class sh4_pc32imm(m_arg): + parser = pcdisp + + def decode(self, v): + self.expr = (PC & ExprInt32(0xfffffffc)) + ExprInt32(v * 4 + 4) + return True + + def encode(self): + res = MatchExpr(self.expr, (PC & ExprInt32(0xfffffffc)) + jra, [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + v = (int(res[jra].arg) - 4) / 4 + if v is None: + return False + self.value = v + return True + + @staticmethod + def arg2str(e): + s = str(e).replace('(', '').replace(')', '') + return "%s" % s + + +class additional_info: + + def __init__(self): + self.except_on_instr = False + + +class instruction_sh4(instruction): + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_arm, self).__init__(*args, **kargs) + + def dstflow(self): + return self.name.startswith('J') + """ + def dstflow2label(self, symbol_pool): + e = self.args[0] + if not isinstance(e, ExprInt): + return + if self.name == 'BLX': + ad = e.arg+8+self.offset + else: + ad = e.arg+8+self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0] = s + """ + + def breakflow(self): + if self.name.startswith('J'): + return True + return False + + def is_subcall(self): + return self.name == 'JSR' + + def getdstflow(self, symbol_pool): + return [self.args[0]] + + def splitflow(self): + return self.name == 'JSR' + + def get_symbol_size(self, symbol, symbol_pool): + return 32 + + def fixDstOffset(self): + e = self.args[0] + print 'FIX', e, self.offset, self.l + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + log.warning('zarb dst %r' % e) + return + off = e.arg - (self.offset + 4 + self.l) + print hex(off) + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[0] = ExprInt32(off) + print 'final', self.args[0] + + def get_args_expr(self): + args = [a for a in self.args] + return args + + +class mn_sh4(cls_mn): + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = PC + # delayslot: + # http://resource.renesas.com/lib/eng/e_learnig/sh4/13/index.html + delayslot = 0 # unit is instruction instruction + + def additional_info(self): + info = additional_info() + return info + + @classmethod + def getbits(cls, bs, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start / 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = "" + for _ in xrange(l): + n_offset = (offset & ~1) + 1 - offset % 2 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l == 16, "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper().replace('_', '.') + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_sh4, self).value(mode) + return [x[::-1] for x in v] + + +class bs_dr0gbr(sh4_dgpreg): + parser = dgbr_reg + + def decode(self, v): + self.expr = ExprMem(GBR + R0, 8) + return True + + def encode(self): + return self.expr == ExprMem(GBR + R0, 8) + + +class bs_dr0gp(sh4_dgpreg): + parser = d_gpreg_gpreg + + def decode(self, v): + self.expr = ExprMem(gpregs.expr[v] + R0, self.sz) + return True + + def encode(self): + res = MatchExpr(self.expr, ExprMem(R0 + jra, self.sz), [jra]) + if not res: + return False + r = res[jra] + if not r in gpregs.expr: + return False + self.value = gpregs.expr.index(r) + return True + + +class bs_dgpreg(sh4_dgpreg): + parser = dgpregs_base + + +rn = bs(l=4, cls=(sh4_gpreg,), fname="rn") +rm = bs(l=4, cls=(sh4_gpreg,), fname="rm") + + +d08_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 8) +d16_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 16) +d32_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 32) +d08_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 8) +d16_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 16) +d32_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 32) + + +brm = bs(l=3, cls=(sh4_bgpreg,), fname="brm") +brn = bs(l=3, cls=(sh4_bgpreg,), fname="brn") + +d08rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 8) +d16rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 16) +d32rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 32) + +d08rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 8) +d16rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 16) +d32rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 32) + +btype = bs(l=4, fname="btype", order=-1) + +s08imm = bs(l=8, cls=(sh4_simm,), fname="imm") +s12imm = bs(l=12, cls=(sh4_simm,), fname="imm") +dpc16imm = bs(l=8, cls=(sh4_dpc16imm,), fname="pcimm", sz=16) +dpc32imm = bs(l=8, cls=(sh4_dpc32imm,), fname="pcimm", sz=32) +dimm4 = bs(l=4, fname='disp', order=-1) +d08gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=8) +d16gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=16) +d32gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=32) + +pc32imm = bs(l=8, cls=(sh4_pc32imm,), fname="pcimm") + +d08rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=8, fname="rn") +d08rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=8, fname="rm") + +d16rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=16, fname="rn") +d16rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=16, fname="rm") + +d32rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=32, fname="rn") +d32rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=32, fname="rm") + +d08rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=8, fname="rn") +d08rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=8, fname="rm") + +d16rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=16, fname="rn") +d16rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=16, fname="rm") + +d32rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=32, fname="rn") +d32rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=32, fname="rm") + + +u08imm = bs(l=8, cls=(sh4_imm,), fname="imm") +dr0gbr = bs(l=0, cls=(bs_dr0gbr,), sz=8) + +d08gpreg = bs(l=4, cls=(bs_dgpreg,), sz=8) +d32gpreg = bs(l=4, cls=(bs_dgpreg,), sz=32) + +frn = bs(l=4, cls=(sh4_freg,), fname="frn") +frm = bs(l=4, cls=(sh4_freg,), fname="frm") + +bd08r0gp = bs(l=4, cls=(bs_dr0gp,), sz=8) +bd16r0gp = bs(l=4, cls=(bs_dr0gp,), sz=16) +bd32r0gp = bs(l=4, cls=(bs_dr0gp,), sz=32) + +drn = bs(l=3, cls=(sh4_dr,), fname="drn") +drm = bs(l=3, cls=(sh4_dr,), fname="drm") + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_sh4,), dct) + +addop("mov", [bs('1110'), rn, s08imm], [s08imm, rn]) +addop("mov_w", [bs('1001'), rn, dpc16imm], [dpc16imm, rn]) +addop("mov_l", [bs('1101'), rn, dpc32imm], [dpc32imm, rn]) +addop("mov", [bs('0110', fname="opc"), rn, rm, bs('0011')], [rm, rn]) +addop("mov_b", [bs('0010', fname="opc"), d08_rn, rm, bs('0000')], [rm, d08_rn]) +addop("mov_w", [bs('0010', fname="opc"), d16_rn, rm, bs('0001')], [rm, d16_rn]) +addop("mov_l", [bs('0010', fname="opc"), d32_rn, rm, bs('0010')], [rm, d32_rn]) +addop("mov_b", [bs('0110', fname="opc"), rn, d08_rm, bs('0000')], [d08_rm, rn]) +addop("mov_w", [bs('0110', fname="opc"), rn, d16_rm, bs('0001')], [d16_rm, rn]) +addop("mov_l", [bs('0110', fname="opc"), rn, d32_rm, bs('0010')], [d32_rm, rn]) +addop("mov_b", + [bs('0010', fname="opc"), d08rnpdec, rm, bs('0100')], [rm, d08rnpdec]) +addop("mov_w", + [bs('0010', fname="opc"), d16rnpdec, rm, bs('0101')], [rm, d16rnpdec]) +addop("mov_l", + [bs('0010', fname="opc"), d32rnpdec, rm, bs('0110')], [rm, d32rnpdec]) +addop("mov_b", + [bs('0110', fname="opc"), rn, d08rmpinc, bs('0100')], [rm, d08rnpinc]) +addop("mov_w", + [bs('0110', fname="opc"), rn, d16rmpinc, bs('0101')], [d16rmpinc, rn]) +addop("mov_l", + [bs('0110', fname="opc"), rn, d32rmpinc, bs('0110')], [d32rmpinc, rn]) +addop("mov_b", [bs('10000000', fname='opc'), bsr0, d08rnimm, dimm4]) +addop("mov_w", [bs('10000001', fname='opc'), bsr0, d16rnimm, dimm4]) +addop("mov_l", [bs('0001', fname='opc'), d32rnimm, rm, dimm4], [rm, d32rnimm]) +addop("mov_b", [bs('10000100', fname='opc'), d08rmimm, dimm4, bsr0]) +addop("mov_w", [bs('10000101', fname='opc'), d16rmimm, dimm4, bsr0]) +addop("mov_l", [bs('0101', fname='opc'), rn, d32rmimm, dimm4], [d32rmimm, rn]) +addop("mov_b", + [bs('0000', fname='opc'), bd08r0gp, rm, bs('0100')], [rm, bd08r0gp]) +addop("mov_w", + [bs('0000', fname='opc'), bd16r0gp, rm, bs('0101')], [rm, bd16r0gp]) +addop("mov_l", + [bs('0000', fname='opc'), bd32r0gp, rm, bs('0110')], [rm, bd32r0gp]) +addop("mov_b", + [bs('0000', fname='opc'), rn, bd08r0gp, bs('1100')], [bd08r0gp, rn]) +addop("mov_w", + [bs('0000', fname='opc'), rn, bd16r0gp, bs('1101')], [bd16r0gp, rn]) +addop("mov_l", + [bs('0000', fname='opc'), rn, bd32r0gp, bs('1110')], [bd32r0gp, rn]) + +addop("mov_b", [bs('11000000'), bsr0, d08gbrimm8]) +addop("mov_w", [bs('11000001'), bsr0, d16gbrimm8]) +addop("mov_l", [bs('11000010'), bsr0, d32gbrimm8]) + +addop("mov_b", [bs('11000100'), d08gbrimm8, bsr0]) +addop("mov_w", [bs('11000101'), d16gbrimm8, bsr0]) +addop("mov_l", [bs('11000110'), d32gbrimm8, bsr0]) + +addop("mov", [bs('11000111'), pc32imm, bsr0]) + +addop("swapb", [bs('0110'), rn, rm, bs('1000')], [rm, rn]) +addop("swapw", [bs('0110'), rn, rm, bs('1001')], [rm, rn]) +addop("xtrct", [bs('0010'), rn, rm, bs('1101')], [rm, rn]) + + +addop("add", [bs('0011'), rn, rm, bs('1100')], [rm, rn]) +addop("add", [bs('0111'), rn, s08imm], [s08imm, rn]) +addop("addc", [bs('0011'), rn, rm, bs('1110')], [rm, rn]) +addop("addv", [bs('0011'), rn, rm, bs('1111')], [rm, rn]) + + +addop("cmpeq", [bs('10001000'), s08imm, bsr0]) + + +addop("cmpeq", [bs('0011'), rn, rm, bs('0000')], [rm, rn]) +addop("cmphs", [bs('0011'), rn, rm, bs('0010')], [rm, rn]) +addop("cmpge", [bs('0011'), rn, rm, bs('0011')], [rm, rn]) +addop("cmphi", [bs('0011'), rn, rm, bs('0110')], [rm, rn]) +addop("cmpgt", [bs('0011'), rn, rm, bs('0111')], [rm, rn]) + + +addop("cmppz", [bs('0100'), rn, bs('00010001')]) +addop("cmppl", [bs('0100'), rn, bs('00010101')]) +addop("cmpstr", [bs('0010'), rn, rm, bs('1100')], [rm, rn]) + + +addop("div1", [bs('0011'), rn, rm, bs('0100')], [rm, rn]) + +addop("div0s", [bs('0010'), rn, rm, bs('0111')], [rm, rn]) +addop("div0u", [bs('0000000000011001')]) + +addop("dmuls", [bs('0011'), rn, rm, bs('1101')], [rm, rn]) +addop("dmulu", [bs('0011'), rn, rm, bs('0101')], [rm, rn]) + +addop("dt", [bs('0100'), rn, bs('00010000')]) + + +addop("extsb", [bs('0110'), rn, rm, bs('1110')], [rm, rn]) +addop("extsw", [bs('0110'), rn, rm, bs('1111')], [rm, rn]) +addop("extub", [bs('0110'), rn, rm, bs('1100')], [rm, rn]) +addop("extuw", [bs('0110'), rn, rm, bs('1101')], [rm, rn]) + +addop("mac_l", [bs('0000', fname='opc'), d32rnpinc, + d32rmpinc, bs('1111')], [d32rmpinc, d32rnpinc]) +addop("mac_w", [bs('0100', fname='opc'), d16rnpinc, + d16rmpinc, bs('1111')], [d16rmpinc, d16rnpinc]) + +addop("mull", [bs('0000'), rn, rm, bs('0111')], [rm, rn]) +addop("mulsw", [bs('0010'), rn, rm, bs('1111')], [rm, rn]) +addop("muluw", [bs('0010'), rn, rm, bs('1110')], [rm, rn]) + +addop("neg", [bs('0110'), rn, rm, bs('1011')], [rm, rn]) +addop("negc", [bs('0110'), rn, rm, bs('1010')], [rm, rn]) + +addop("sub", [bs('0011'), rn, rm, bs('1000')], [rm, rn]) +addop("subc", [bs('0011'), rn, rm, bs('1010')], [rm, rn]) +addop("subv", [bs('0011'), rn, rm, bs('1011')], [rm, rn]) + +addop("and", [bs('0010'), rn, rm, bs('1001')], [rm, rn]) +addop("and", [bs('11001001'), u08imm, bsr0]) +addop("and_b", [bs('11001101'), u08imm, dr0gbr]) + +addop("not", [bs('0110'), rn, rm, bs('0111')], [rm, rn]) + +addop("or", [bs('0010'), rn, rm, bs('1011')], [rm, rn]) + +addop("or", [bs('11001011'), u08imm, bsr0]) +addop("or_b", [bs('11001111'), u08imm, dr0gbr]) + +addop("tas_b", [bs('0100'), d08gpreg, bs('00011011')]) +addop("tst", [bs('0010'), rn, rm, bs('1000')], [rm, rn]) +addop("tst", [bs('11001000'), u08imm, bsr0]) +addop("tst_b", [bs('11001100'), u08imm, dr0gbr]) + + +addop("xor", [bs('0010'), rn, rm, bs('1010')], [rm, rn]) +addop("xor", [bs('11001010'), u08imm, bsr0]) +addop("xor_b", [bs('11001110'), u08imm, dr0gbr]) + +addop("rotl", [bs('0100'), rn, bs('00000100')]) +addop("rotr", [bs('0100'), rn, bs('00000101')]) +addop("rotcl", [bs('0100'), rn, bs('00100100')]) +addop("rotcr", [bs('0100'), rn, bs('00100101')]) + +addop("shad", [bs('0100'), rn, rm, bs('1100')], [rm, rn]) +addop("shal", [bs('0100'), rn, bs('00100000')]) +addop("shar", [bs('0100'), rn, bs('00100001')]) +addop("shld", [bs('0100'), rn, rm, bs('1101')], [rm, rn]) + +addop("shll", [bs('0100'), rn, bs('00000000')]) +addop("shlr", [bs('0100'), rn, bs('00000001')]) +addop("shll2", [bs('0100'), rn, bs('00001000')]) +addop("shlr2", [bs('0100'), rn, bs('00001001')]) +addop("shll8", [bs('0100'), rn, bs('00011000')]) +addop("shlr8", [bs('0100'), rn, bs('00011001')]) +addop("shll16", [bs('0100'), rn, bs('00101000')]) +addop("shlr16", [bs('0100'), rn, bs('00101001')]) + + +addop("bf", [bs('10001011'), s08imm]) +""" + def splitflow(self): + return True + def breakflow(self): + return True + def dstflow(self): + return True + def dstflow2label(self, symbol_pool): + e = self.args[0].expr + ad = e.arg*2+4+self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0].expr = s +""" + +addop("bfs", [bs('10001111'), s08imm]) +""" + delayslot = 1 +""" +addop("bt", [bs('10001001'), s08imm]) + +addop("bts", [bs('10001101'), s08imm]) + +addop("bra", [bs('1010'), s12imm]) +""" + delayslot = 1 + def breakflow(self): + return True + def dstflow(self): + return True + def dstflow2label(self, symbol_pool): + e = self.args[0].expr + ad = e.arg*2+4+self.offset + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0].expr = s +""" + +addop("braf", [bs('0000'), rn, bs('00100011')]) +""" + delayslot = 1 + def breakflow(self): + return True + def dstflow(self): + return True +""" +addop("bsr", [bs('1011'), s12imm]) + +addop("bsrf", [bs('0000'), rn, bs('00000011')]) +""" + delayslot = 1 + def breakflow(self): + return True + def is_subcall(self): + return True + def splitflow(self): + return True +""" + +addop("jmp_l", [bs('0100'), d32gpreg, bs('00101011')]) +""" + delayslot = 1 + def breakflow(self): + return True +""" + +addop("jsr_l", [bs('0100'), d32gpreg, bs('00001011')]) +""" + delayslot = 1 + def breakflow(self): + return True + def is_subcall(self): + return True + def splitflow(self): + return True +""" + +addop("rts", [bs('0000000000001011')]) +""" + delayslot = 1 + def breakflow(self): + return True +""" +addop("clrmac", [bs('0000000000101000')]) +addop("clrs", [bs('0000000001001000')]) +addop("clrt", [bs('0000000000001000')]) + + +addop("ldc", [bs('0100'), rm, bssr, bs('00001110')]) +addop("ldc", [bs('0100'), rm, bsgbr, bs('00011110')]) +addop("ldc", [bs('0100'), rm, bsvbr, bs('00101110')]) +addop("ldc", [bs('0100'), rm, bsssr, bs('00111110')]) +addop("ldc", [bs('0100'), rm, bsspc, bs('01001110')]) +addop("ldc", [bs('0100'), rm, bsdbr, bs('11111010')]) +addop("ldc", [bs('0100'), rm, bs('1'), brn, bs('1110')], [rm, brn]) +addop("ldc_l", [bs('0100'), d32rmpinc, bssr, bs('00000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsgbr, bs('00010111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsvbr, bs('00100111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsssr, bs('00110111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsspc, bs('01000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bsdbr, bs('11110110')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs('1'), brn, bs('0111')]) +addop("lds", [bs('0100'), rm, bsmach, bs('00001010')]) +addop("lds", [bs('0100'), rm, bsmacl, bs('00011010')]) +addop("lds", [bs('0100'), rm, bspr, bs('00101010')]) +addop("lds_l", [bs('0100'), d32rmpinc, bsmach, bs('00000110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bsmacl, bs('00010110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bspr, bs('00100110')]) +addop("ldtlb", [bs('0000000000111000')]) + +addop("movca_l", [bs('0000'), bsr0, d32gpreg, bs('11000011')]) +addop("nop", [bs('0000000000001001')]) +addop("ocbi_l", [bs('0000'), d32gpreg, bs('10010011')]) +addop("ocbp_l", [bs('0000'), d32gpreg, bs('10100011')]) +addop("ocbwb_l", [bs('0000'), d32gpreg, bs('10110011')]) +addop("pref_l", [bs('0000'), d32gpreg, bs('10000011')]) + + +addop("rte", [bs('0000000000101011')]) +addop("sets", [bs('0000000001011000')]) +addop("sett", [bs('0000000000011000')]) +addop("sleep", [bs('0000000000011011')]) +addop("stc", [bs('0000'), bssr, rn, bs('00000010')]) +addop("stc", [bs('0000'), bsgbr, rn, bs('00010010')]) +addop("stc", [bs('0000'), bsvbr, rn, bs('00100010')]) +addop("stc", [bs('0000'), bsssr, rn, bs('00110010')]) +addop("stc", [bs('0000'), bsspc, rn, bs('01000010')]) +addop("stc", [bs('0000'), bssgr, rn, bs('00111010')]) +addop("stc", [bs('0000'), bsdbr, rn, bs('11111010')]) +addop("stc", [bs('0000'), rn, bs('1'), brm, bs('0010')], [brm, rn]) + +addop("stc_l", [bs('0100'), bssr, d32rmpdec, bs('00000011')]) +addop("stc_l", [bs('0100'), bsgbr, d32rmpdec, bs('00010011')]) +addop("stc_l", [bs('0100'), bsvbr, d32rmpdec, bs('00100011')]) +addop("stc_l", [bs('0100'), bsssr, d32rmpdec, bs('00110011')]) +addop("stc_l", [bs('0100'), bsspc, d32rmpdec, bs('01000011')]) +addop("stc_l", [bs('0100'), bssgr, d32rmpdec, bs('00110010')]) +addop("stc_l", [bs('0100'), bsdbr, d32rmpdec, bs('11110010')]) +addop("stc_l", + [bs('0100'), d32rnpdec, bs('1'), brm, bs('0011')], [brm, d32rnpdec]) + +# float +addop("sts", [bs('0000'), bsmach, rm, bs('00001010')]) +addop("sts", [bs('0000'), bsmacl, rm, bs('00011010')]) +addop("sts", [bs('0000'), bspr, rm, bs('00101010')]) +addop("sts_l", [bs('0100'), bsmach, d32rmpdec, bs('00000010')]) +addop("sts_l", [bs('0100'), bsmacl, d32rmpdec, bs('00010010')]) +addop("sts_l", + [bs('0100'), d32rnpdec, bspr, bs('00100010')], [bspr, d32rnpdec]) +addop("trapa", [bs('11000011'), u08imm]) + +addop("fldi0", [bs('1111'), frn, bs('10001101')]) +addop("fldi1", [bs('1111'), frn, bs('10011101')]) +addop("fmov", [bs('1111'), frn, frm, bs('1100')], [frm, frn]) +addop("fmov_s", [bs('1111'), frn, d32gpreg, bs('1000')], [d32gpreg, frn]) +addop("fmov_s", [bs('1111'), frn, bd32r0gp, bs('0110')], [bd32r0gp, frn]) +addop("fmov_s", [bs('1111'), frn, d32rmpinc, bs('1001')], [d32rmpinc, frn]) +addop("fmov_s", [bs('1111'), d32gpreg, frm, bs('1010')], [frm, d32gpreg]) +addop("fmov_s", [bs('1111'), d32rnpdec, frm, bs('1011')], [frm, d32rnpdec]) +addop("fmov_s", [bs('1111'), bd32r0gp, frm, bs('0111')], [frm, bd32r0gp]) + +addop("flds", [bs('1111'), frm, bsfpul, bs('00011101')]) +addop("fsts", [bs('1111'), bsfpul, frm, bs('00001101')]) +addop("fabs", [bs('1111'), frn, bs('01011101')]) +addop("fadd", [bs('1111'), frn, frm, bs('0000')], [frm, frn]) +addop("fcmpeq", [bs('1111'), frn, frm, bs('0100')], [frm, frn]) +addop("fcmpgt", [bs('1111'), frn, frm, bs('0101')], [frm, frn]) +addop("fdiv", [bs('1111'), frn, frm, bs('0011')], [frm, frn]) + +addop("float", [bs('1111'), bsfpul, frn, bs('00101101')]) +addop("fmac", [bs('1111'), bsfr0, frn, frm, bs('1110')], [bsfr0, frm, frn]) +addop("fmul", [bs('1111'), frn, frm, bs('0010')], [frm, frn]) +addop("fneg", [bs('1111'), frn, bs('01001101')]) +addop("fsqrt", [bs('1111'), frn, bs('01101101')]) +addop("fsub", [bs('1111'), frn, frm, bs('0001')], [frm, frn]) +addop("ftrc", [bs('1111'), frm, bsfpul, bs('00111101')]) + + +if __name__ == '__main__': + import os + import time + filename = os.environ.get('PYTHONSTARTUP') + if filename and os.path.isfile(filename): + execfile(filename) + + def h2i(s): + return s.replace(' ', '').decode('hex') + + reg_tests_sh4 = [ + # vxworks + ("c80022f2 MOV 0x10, R6", + "10e6"), + ("c8002250 MOV 0xFFFFFFFF, R0", + "ffe0"), + ("c800226a MOV.W @(PC,0xC0), R9", + "5e99"), + ("c8002006 MOV.L @(PC&0xFFFFFFFC,0x10), R15", + "03df"), + ("c800cfc4 MOV R4, R9", + "4369"), + ("C8005004 MOV.B R1, @R2", + "1022"), + ("C8002E04 MOV.W R0, @R8", + '0128'), + ("c800223e MOV.L R1, @R14", + "122E"), + + ("c8002002 MOV.L @R1, R0", + "1260"), + ("c8002E08 MOV.W @R8, R1", + "8161"), + ("c800357c MOV.B @R4, R1", + "4061"), + + ("c8002220 MOV.L R8, @-R15", + "862f"), + ("c8022a66 MOV.B R4, @-R0", + "4420"), + ("c8002310 MOV.L @R15+, R14", + "f66e"), + ("c80038a4 MOV.W @R8+, R5", + "8565"), + ("xxxxxxxx MOV.B R0, @(R8,0x2)", + "8280"), + ("xxxxxxxx MOV.W R0, @(R8,0x4)", + "8281"), + ("c8002274 MOV.L R0, @(R9,0x8)", + "0219"), + ("xxxxxxxx MOV.B @(R8,0x8), R0", + "8884"), + ("xxxxxxxx MOV.W @(R8,0x10), R0", + "8885"), + ("c8002500 MOV.L @(R14,0x4), R5", + "e155"), + ("xxxxxxxx MOV.B R4, @(R0,R8)", + "4408"), + ("xxxxxxxx MOV.W R4, @(R0,R8)", + "4508"), + ("xxxxxxxx MOV.L R4, @(R0,R8)", + "4608"), + ("xxxxxxxx MOV.B @(R0,R4), R8", + "4c08"), + ("xxxxxxxx MOV.W @(R0,R4), R8", + "4d08"), + ("xxxxxxxx MOV.L @(R0,R4), R8", + "4e08"), + ("xxxxxxxx MOV.B R0, @(GBR,0x4)", + "04c0"), + ("xxxxxxxx MOV.W R0, @(GBR,0x8)", + "04c1"), + ("xxxxxxxx MOV.L R0, @(GBR,0x10)", + "04c2"), + ("xxxxxxxx MOV.B @(GBR,0x4), R0", + "04c4"), + ("xxxxxxxx MOV.W @(GBR,0x8), R0", + "04c5"), + ("xxxxxxxx MOV.L @(GBR,0x10), R0", + "04c6"), + #("xxxxxxxx MOV PC&0xFFFFFFFC+0x14, R0", + # "04c7"), + ("xxxxxxxx SWAPB R2, R1", + "2861"), + ("c803f492 SWAPW R4, R9", + "4969"), + ("xxxxxxxx XTRCT R4, R9", + "4d29"), + ("c8002270 ADD R12, R9", + "cc39"), + ("c8002238 ADD 0xFFFFFFFC, R15", + "FC7F"), + ("c80164cc ADDC R0, R1", + "0e31"), + ("xxxxxxxx ADDV R0, R1", + "0f31"), + ("c8002994 CMPEQ 0x20, R0", + "2088"), + ("c80029d2 CMPEQ R2, R1", + "2031"), + ("c8003964 CMPHS R5, R3", + "5233"), + ("c8002df2 CMPGE R0, R1", + "0331"), + ("c80029a4 CMPHI R1, R0", + "1630"), + ("c8002bfe CMPGT R10, R8", + "a738"), + ("c8002bf8 CMPPZ R0", + "1140"), + ("c8006294 CMPPL R2", + "1542"), + ("c8033800 CMPSTR R14, R4", + "ec24"), + ("xxxxxxxx DIV1 R14, R4", + "e434"), + ("c8d960de DIV0S R0, R3", + "0723"), + ("xxxxxxxx DIV0U ", + "1900"), + ("c800dcd8 DMULS R1, R0", + "1d30"), + ("c80164da DMULU R3, R8", + "3538"), + ("c80024e2 DT R10", + "104a"), + ("c800343a EXTSB R1, R1", + "1e61"), + ("c8002bf6 EXTSW R0, R0", + "0f60"), + ("c8002fba EXTUB R0, R0", + "0c60"), + ("c8002398 EXTUW R0, R0", + "0d60"), + ("xxxxxxxx MAC.L @R5+, @R4+", + "5f04"), + ("xxxxxxxx MAC.W @R5+, @R4+", + "5f44"), + ("c8005112 MULL R1, R3", + "1703"), + ("xxxxxxxx MULSW R1, R3", + "1F23"), + ("xxxxxxxx MULUW R1, R3", + "1e23"), + ("c8004856 NEG R1, R8", + "1b68"), + ("c80054fc NEGC R9, R7", + "9a67"), + ("c8004b36 SUB R1, R5", + "1835"), + ("c800a536 SUBC R1, R0", + "1a30"), + ("xxxxxxxx SUBV R1, R0", + "1b30"), + ("c80023ca AND R0, R5", + "0925"), + ("c800257c AND 0x2, R0", + "02c9"), + ("xxxxxxxx AND.B 0x2, @(GBR,R0)", + "02cd"), + ("c80065fe NOT R5, R1", + "5761"), + ("c8002586 OR R10, R1", + "ab21"), + ("c80023aa OR 0x4, R0", + "04cb"), + ("xxxxxxxx OR.B 0x4, @(GBR,R0)", + "04cf"), + ("xxxxxxxx TAS.B @R8", + "1b48"), + ("c8002368 TST R10, R13", + "a82d"), + ("c8003430 TST 0x11, R0", + "11c8"), + ("xxxxxxxx TST.B 0x4, @(GBR,R0)", + "04cc"), + ("c8003978 XOR R1, R6", + "1a26"), + ("c8028270 XOR 0x1, R0", + "01ca"), + ("xxxxxxxx XOR.B 0x4, @(GBR,R0)", + "04cE"), + ("xxxxxxxx ROTL R9", + "0449"), + ("xxxxxxxx ROTR R9", + "0549"), + ("xxxxxxxx ROTCL R9", + "2449"), + ("xxxxxxxx ROTCR R9", + "2549"), + ("xxxxxxxx SHAL R11", + "204b"), + ("xxxxxxxx SHAR R11", + "214b"), + ("c800236c SHLD R6, R10", + "6d4a"), + ("xxxxxxxx SHLL R11", + "004b"), + ("xxxxxxxx SHLR R11", + "014b"), + ("xxxxxxxx SHLL2 R11", + "084b"), + ("xxxxxxxx SHLR2 R11", + "094b"), + ("xxxxxxxx SHLL8 R11", + "184b"), + ("xxxxxxxx SHLR8 R11", + "194b"), + ("xxxxxxxx SHLL16 R11", + "284b"), + ("xxxxxxxx SHLR16 R11", + "294b"), + ("c8002c00 BF 0xFFFFFFF4", + "f48b"), + ("c80023c2 BFS 0xFFFFFFD8", + "d88f"), + ("c8002266 BT 0x5B", + "5b89"), + ("c8002266 BTS 0x5C", + "5c8d"), + ("c8002326 BRA 0xFFFFFFF0", + "f0af"), + ("c8004b4a BRAF R1", + "2301"), + ("c8055da4 BSR 0xFFFFFE48", + "48be"), + ("xxxxxxxx BSRF R1", + "0301"), + ("c80027b4 JMP.L @R1", + "2b41"), + ("c800200c JSR.L @R0", + "0b40"), + ("c800231a RTS ", + "0b00"), + ("xxxxxxxx CLRMAC ", + "2800"), + ("xxxxxxxx CLRS ", + "4800"), + ("xxxxxxxx CLRT ", + "0800"), + ("c8002004 LDC R0, SR", + "0e40"), + ("c800200e LDC R1, GBR", + "1e41"), + ("c8064bd4 LDC R8, VBR", + "2e48"), + ("xxxxxxxx LDC R8, SSR", + "3e48"), + ("xxxxxxxx LDC R8, SPC", + "4e48"), + ("xxxxxxxx LDC R8, DBR", + "fa48"), + ("xxxxxxxx LDC R8, R0_BANK", + "8e48"), + ("xxxxxxxx LDC.L @R8+, SR", + "0748"), + ("xxxxxxxx LDC.L @R8+, GBR", + "1748"), + ("xxxxxxxx LDC.L @R8+, VBR", + "2748"), + ("xxxxxxxx LDC.L @R8+, SSR", + "3748"), + ("xxxxxxxx LDC.L @R8+, SPC", + "4748"), + ("xxxxxxxx LDC.L @R8+, DBR", + "f648"), + ("xxxxxxxx LDC.L @R8+, R2_BANK", + "a748"), + ("xxxxxxxx LDS R8, MACH", + "0a48"), + ("xxxxxxxx LDS R8, MACL", + "1a48"), + ("xxxxxxxx LDS R8, PR", + "2a48"), + ("xxxxxxxx LDS.L @R8+, MACH", + "0648"), + ("xxxxxxxx LDS.L @R8+, MACL", + "1648"), + ("xxxxxxxx LDTLB ", + "3800"), + ("xxxxxxxx MOVCA.L R0, @R8", + "c308"), + ("xxxxxxxx NOP ", + "0900"), + ("xxxxxxxx OCBI.L @R8", + "9308"), + ("xxxxxxxx OCBP.L @R8", + "a308"), + ("xxxxxxxx OCBWB.L @R8", + "b308"), + ("xxxxxxxx PREF.L @R8", + "8308"), + ("xxxxxxxx STS MACH, R8", + "0a08"), + ("xxxxxxxx STS MACL, R8", + "1a08"), + ("xxxxxxxx STS PR, R8", + "2a08"), + ("xxxxxxxx STS.L MACH, @-R8", + "0248"), + ("xxxxxxxx STS.L MACL, @-R8", + "1248"), + ("xxxxxxxx STS.L PR, @-R8", + "2248"), + + + + + + ("c8004b50 STC GBR, R0", + "1200"), + ("c8064516 STC VBR, R1", + "2201"), + ("c8004b54 STC SSR, R1", + "3201"), + ("c801ed6c STC SPC, R0", + "4200"), + ("xxxxxxxx STC SGR, R0", + "3a00"), + ("xxxxxxxx STC DBR, R0", + "fa00"), + ("c8004b56 STC R3_BANK, R1", + "B201"), + ("xxxxxxxx STC.L SR, @-R8", + "0348"), + ("xxxxxxxx STC.L GBR, @-R8", + "1348"), + ("xxxxxxxx STC.L VBR, @-R8", + "2348"), + ("xxxxxxxx STC.L SSR, @-R8", + "3348"), + ("xxxxxxxx STC.L SPC, @-R8", + "4348"), + ("xxxxxxxx STC.L DBR, @-R8", + "f248"), + ("xxxxxxxx STC.L R7_BANK, @-R8", + "f348"), + ("c803b130 TRAPA 0xE0", + "e0c3"), + + ("xxxxxxxx FLDI0 FR8", + "8df8"), + ("xxxxxxxx FLDI1 FR8", + "9df8"), + ("c8019ca8 FMOV FR15, FR5", + "fcf5"), + ("c800affe FMOV.S @R1, FR4", + "18f4"), + ("c80283f6 FMOV.S @(R0,R14), FR5", + "e6f5"), + ("c800aff8 FMOV.S @R1+, FR5", + "19f5"), + ("c80cb692 FMOV.S FR0, @R2", + "0af2"), + ("c80cb694 FMOV.S FR1, @-R2", + "1bf2"), + ("c80283aa FMOV.S FR1, @(R0,R14)", + "17fe"), + ("c800ce16 FLDS FR13, FPUL", + "1dfd"), + ("c800ce08 FSTS FPUL, FR13", + "0dfd"), + ("xxxxxxxx FABS FR8", + "5df8"), + ("c800cf28 FADD FR2, FR6", + "20f6"), + ("c805dacc FCMPEQ FR2, FR6", + "24f6"), + ("c8028406 FCMPGT FR4, FR2", + "45f2"), + ("c8019ca4 FDIV FR2, FR12", + "23fc"), + ("c800ce5e FLOAT FPUL, FR2", + "2df2"), + ("xxxxxxxx FMAC FR0, FR1, FR2", + "1ef2"), + ("c800b006 FMUL FR2, FR4", + "22f4"), + ("c805e412 FNEG FR14", + "4dfe"), + ("xxxxxxxx FSQRT FR14", + "6dfe"), + ("c8030400 FSUB FR4, FR2", + "41f2"), + ("c80303ba FTRC FR2, FPUL", + "3df2"), + + ] + + for s, l in reg_tests_sh4: + print "-" * 80 + s = s[12:] + b = h2i((l)) + print b.encode('hex') + mn = mn_sh4.dis(b, None) + print [str(x) for x in mn.args] + print s + print mn + assert(str(mn) == s) + # print hex(b) + # print [str(x.get()) for x in mn.args] + l = mn_sh4.fromstring(s, None) + # print l + assert(str(l) == s) + a = mn_sh4.asm(l, None) + print [x for x in a] + print repr(b) + # print mn.args + assert(b in a) + + # speed test + o = "" + for s, l, in reg_tests_sh4: + s = s[12:] + b = h2i((l)) + o += b + + while len(o) < 1000: + o += o + bs = bin_stream_str(o) + off = 0 + instr_num = 0 + ts = time.time() + while off < bs.getlen(): + mn = mn_sh4.dis(bs, None, off) + # print instr_num, off, mn.l, str(mn) + instr_num += 1 + off += mn.l + print 'instr per sec:', instr_num / (time.time() - ts) + + import cProfile + cProfile.run(r'mn_sh4.dis("\x17\xfe", None)') diff --git a/miasm2/arch/sh4/regs.py b/miasm2/arch/sh4/regs.py new file mode 100644 index 00000000..bfc61b04 --- /dev/null +++ b/miasm2/arch/sh4/regs.py @@ -0,0 +1,80 @@ +from miasm2.expression.expression import * +from miasm2.core.cpu import reg_info, gen_reg + +# GP +gpregs_str = ['R%d' % r for r in xrange(0x10)] +gpregs_expr = [ExprId(x, 32) for x in gpregs_str] +gpregs = reg_info(gpregs_str, gpregs_expr) + +bgpregs_str = ['R%d_BANK' % r for r in xrange(0x8)] +bgpregs_expr = [ExprId(x, 32) for x in bgpregs_str] +bgpregs = reg_info(bgpregs_str, bgpregs_expr) + +fregs_str = ['FR%d' % r for r in xrange(0x10)] +fregs_expr = [ExprId(x, 32) for x in fregs_str] +fregs = reg_info(fregs_str, fregs_expr) + +dregs_str = ['DR%d' % r for r in xrange(0x8)] +dregs_expr = [ExprId(x, 32) for x in dregs_str] +dregs = reg_info(dregs_str, dregs_expr) + + +gen_reg('PC', globals()) +gen_reg('PR', globals()) +gen_reg('R0', globals()) +gen_reg('GBR', globals()) +gen_reg('SR', globals()) +gen_reg('VBR', globals()) +gen_reg('SSR', globals()) +gen_reg('SPC', globals()) +gen_reg('SGR', globals()) +gen_reg('DBR', globals()) +gen_reg('MACH', globals()) +gen_reg('MACL', globals()) +gen_reg('FPUL', globals()) +gen_reg('FR0', globals()) + +R0 = gpregs_expr[0] +R1 = gpregs_expr[1] +R2 = gpregs_expr[2] +R3 = gpregs_expr[3] +R4 = gpregs_expr[4] +R5 = gpregs_expr[5] +R6 = gpregs_expr[6] +R7 = gpregs_expr[7] +R8 = gpregs_expr[8] +R9 = gpregs_expr[9] +R10 = gpregs_expr[10] +R11 = gpregs_expr[11] +R12 = gpregs_expr[12] +R13 = gpregs_expr[13] +R14 = gpregs_expr[14] +R15 = gpregs_expr[15] + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + + +all_regs_ids = [ + R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, + zf, nf, of, cf, + + PC, PR, R0, GBR, SR, VBR, SSR, SPC, + SGR, DBR, MACH, MACL, FPUL, FR0] + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + all_regs_ids_init[i].is_term = True + regs_init[r] = all_regs_ids_init[i] diff --git a/miasm2/arch/x86/__init__.py b/miasm2/arch/x86/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm2/arch/x86/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py new file mode 100644 index 00000000..5ccc4a9c --- /dev/null +++ b/miasm2/arch/x86/arch.py @@ -0,0 +1,3839 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import re +from miasm2.expression.expression import * +from pyparsing import * +from miasm2.core.cpu import * +from collections import defaultdict +import regs as regs_module +from regs import * +from miasm2.ir.ir import * + +log = logging.getLogger("x86_arch") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +f_isad = "AD" +f_s08 = "S08" +f_u08 = "U08" +f_s16 = "S16" +f_u16 = "U16" +f_s32 = "S32" +f_u32 = "U32" +f_s64 = "S64" +f_u64 = "U64" +f_imm = 'IMM' + +f_imm2size = {f_s08: 8, f_s16: 16, f_s32: 32, f_s64: 64, + f_u08: 8, f_u16: 16, f_u32: 32, f_u64: 64} + + +size2gpregs = {8: gpregs08, 16: gpregs16, + 32: gpregs32, 64: gpregs64} + + +replace_regs64 = { + AL: RAX[:8], CL: RCX[:8], DL: RDX[:8], BL: RBX[:8], + AH: RAX[8:16], CH: RCX[8:16], DH: RDX[8:16], BH: RBX[8:16], + SPL: RSP[0:8], BPL: RBP[0:8], SIL: RSI[0:8], DIL: RDI[0:8], + R8B: R8[0:8], R9B: R9[0:8], R10B: R10[0:8], R11B: R11[0:8], + R12B: R12[0:8], R13B: R13[0:8], R14B: R14[0:8], R15B: R15[0:8], + + AX: RAX[:16], CX: RCX[:16], DX: RDX[:16], BX: RBX[:16], + SP: RSP[:16], BP: RBP[:16], SI: RSI[:16], DI: RDI[:16], + R8W: R8[:16], R9W: R9[:16], R10W: R10[:16], R11W: R11[:16], + R12W: R12[:16], R13W: R13[:16], R14W: R14[:16], R15W: R15[:16], + + + EAX: RAX[:32], ECX: RCX[:32], EDX: RDX[:32], EBX: RBX[:32], + ESP: RSP[:32], EBP: RBP[:32], ESI: RSI[:32], EDI: RDI[:32], + R8D: R8[:32], R9D: R9[:32], R10D: R10[:32], R11D: R11[:32], + R12D: R12[:32], R13D: R13[:32], R14D: R14[:32], R15D: R15[:32], + + IP: RIP[:16], EIP: RIP[:32], + +} + +replace_regs32 = { + AL: EAX[:8], CL: ECX[:8], DL: EDX[:8], BL: EBX[:8], + AH: EAX[8:16], CH: ECX[8:16], DH: EDX[8:16], BH: EBX[8:16], + + AX: EAX[:16], CX: ECX[:16], DX: EDX[:16], BX: EBX[:16], + SP: ESP[:16], BP: EBP[:16], SI: ESI[:16], DI: EDI[:16], + + IP: EIP[:16] +} + +replace_regs16 = { + AL: AX[:8], CL: CX[:8], DL: DX[:8], BL: BX[:8], + AH: AX[8:16], CH: CX[8:16], DH: DX[8:16], BH: BX[8:16], + + AX: AX[:16], CX: CX[:16], DX: DX[:16], BX: BX[:16], + SP: SP[:16], BP: BP[:16], SI: SI[:16], DI: DI[:16], +} + +replace_regs = {16: replace_regs16, + 32: replace_regs32, + 64: replace_regs64} + + +# parser helper ########### +PLUS = Suppress("+") +MULT = Suppress("*") + +COLON = Suppress(":") + + +LBRACK = Suppress("[") +RBRACK = Suppress("]") + +dbreg = Group(gpregs16.parser | gpregs32.parser | gpregs64.parser) +gpreg = (gpregs08.parser | gpregs08_64.parser | gpregs16.parser | + gpregs32.parser | gpregs64.parser | gpregs_xmm.parser | + gpregs_mm.parser) + + +def reg2exprid(r): + if not r.name in all_regs_ids_byname: + raise ValueError('unknown reg') + return all_regs_ids_byname[r.name] + + +def parse_deref_reg(s, l, t): + t = t[0][0] + return t[0] + + +def parse_deref_int(s, l, t): + t = t[0] + return t[0] + + +def parse_deref_regint(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + i1 = ExprInt_from(r1, t[1].arg) + return r1 + i1 + + +def parse_deref_regreg(s, l, t): + t = t[0] + return t[0][0] + t[1][0] + + +def parse_deref_regregint(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + r2 = reg2exprid(t[1][0]) + i1 = ExprInt_from(r1, t[2].arg) + return r1 + r2 + i1 + + +def parse_deref_reg_intmreg(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + r2 = reg2exprid(t[1][0]) + i1 = ExprInt_from(r1, t[2].arg) + return r1 + (r2 * i1) + + +def parse_deref_reg_intmreg_int(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + r2 = reg2exprid(t[1][0]) + i1 = ExprInt_from(r1, t[2].arg) + i2 = ExprInt_from(r1, t[3].arg) + return r1 + (r2 * i1) + i2 + + +def parse_deref_intmreg(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + i1 = ExprInt_from(r1, t[1].arg) + return r1 * i1 + + +def parse_deref_intmregint(s, l, t): + t = t[0] + r1 = reg2exprid(t[0][0]) + i1 = ExprInt_from(r1, t[1].arg) + i2 = ExprInt_from(r1, t[1].arg) + return (r1 * i1) + i2 + + +def getreg(s, l, t): + t = t[0] + return t[0] + + +def parse_deref_ptr(s, l, t): + t = t[0] + return ExprMem(ExprOp('segm', t[0], t[1])) + + +variable, operand, base_expr = gen_base_expr() + + +def ast_id2expr(t): + if not t in mn_x86.regs.all_regs_ids_byname: + r = ExprId(t) + else: + r = mn_x86.regs.all_regs_ids_byname[t] + return r + + +def ast_int2expr(a): + return ExprInt64(a) + + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + +int_or_expr = base_expr + +deref_mem_ad = Group(LBRACK + dbreg + RBRACK).setParseAction(parse_deref_reg) +deref_mem_ad |= Group( + LBRACK + int_or_expr + RBRACK).setParseAction(parse_deref_int) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + + int_or_expr + RBRACK).setParseAction(parse_deref_regint) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + + dbreg + RBRACK).setParseAction(parse_deref_regreg) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + dbreg + PLUS + + int_or_expr + RBRACK).setParseAction(parse_deref_regregint) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + dbreg + MULT + + int_or_expr + RBRACK).setParseAction(parse_deref_reg_intmreg) +deref_mem_ad |= Group( + LBRACK + dbreg + PLUS + dbreg + MULT + int_or_expr + + PLUS + int_or_expr + RBRACK).setParseAction(parse_deref_reg_intmreg_int) +deref_mem_ad |= Group( + LBRACK + dbreg + MULT + + int_or_expr + RBRACK).setParseAction(parse_deref_intmreg) +deref_mem_ad |= Group( + LBRACK + dbreg + MULT + int_or_expr + + PLUS + int_or_expr + RBRACK).setParseAction(parse_deref_intmregint) + + +deref_ptr = Group(int_or_expr + COLON + + int_or_expr).setParseAction(parse_deref_ptr) + + +PTR = Suppress('PTR') + + +BYTE = Literal('BYTE') +WORD = Literal('WORD') +DWORD = Literal('DWORD') +QWORD = Literal('QWORD') +TBYTE = Literal('TBYTE') + + +def parse_deref_mem(s, l, t): + sz = {'BYTE': 8, 'WORD': 16, 'DWORD': 32, 'QWORD': 64, 'TBYTE': 80} + t = t[0] + if len(t) == 2: + s, ptr = t + return ExprMem(ptr, sz[s[0]]) + elif len(t) == 3: + s, segm, ptr = t + return ExprMem(ExprOp('segm', segm[0], ptr), sz[s[0]]) + else: + raise ValueError('len(t) > 3') + +mem_size = Group(BYTE | DWORD | QWORD | WORD | TBYTE) +deref_mem = Group(mem_size + PTR + Optional(Group(int_or_expr + COLON)) + + deref_mem_ad).setParseAction(parse_deref_mem) + + +rmarg = Group(gpregs08.parser | + gpregs08_64.parser | + gpregs16.parser | + gpregs32.parser | + gpregs64.parser | + gpregs_mm.parser | + gpregs_xmm.parser + ).setParseAction(getreg) + +rmarg |= deref_mem + + +cl_or_imm = Group(r08_ecx.parser).setParseAction(getreg) +cl_or_imm |= int_or_expr + + +class r_al(reg_noarg, m_arg): + reg_info = r08_eax + parser = reg_info.parser + + +class r_ax(reg_noarg, m_arg): + reg_info = r16_eax + parser = reg_info.parser + + +class r_dx(reg_noarg, m_arg): + reg_info = r16_edx + parser = reg_info.parser + + +class r_eax(reg_noarg, m_arg): + reg_info = r32_eax + parser = reg_info.parser + + +class r_rax(reg_noarg, m_arg): + reg_info = r64_eax + parser = reg_info.parser + + +class r_cl(reg_noarg, m_arg): + reg_info = r08_ecx + parser = reg_info.parser + + +invmode = {16: 32, 32: 16} + + +def opmode_prefix(mode): + size, opmode, admode = mode + if size in [16, 32]: + if opmode: + return invmode[size] + else: + return size + elif size == 64: + if opmode: + return 16 + else: + return 32 + raise NotImplementedError('not fully functional') + + +def admode_prefix(mode): + size, opmode, admode = mode + if size in [16, 32]: + if admode: + return invmode[size] + else: + return size + elif size == 64: + return 64 + raise NotImplementedError('not fully functional') + + +def v_opmode_info(size, opmode, rex_w, stk): + if size in [16, 32]: + if opmode: + return invmode[size] + else: + return size + elif size == 64: + if rex_w == 1: + return 64 + elif stk: + if opmode == 1: + return 16 + else: + return 64 + elif opmode == 1: + return 16 + return 32 + + +def v_opmode(p): + stk = hasattr(p, 'stk') + return v_opmode_info(p.mode, p.opmode, p.rex_w.value, stk) + + +def v_admode_info(size, admode): + if size in [16, 32]: + if admode: + return invmode[size] + else: + return size + elif size == 64: + if admode == 1: + return 32 + return 64 + + +def v_admode(p): + return v_admode_info(p.mode, p.admode) + + +def offsize(p): + if p.opmode: + return 16 + else: + return p.mode + + +def get_prefix(s): + g = re.search('(\S+)(\s+)', s) + if not g: + return None, s + prefix, b = g.groups() + return prefix, s[len(prefix) + len(b):] + + +repeat_mn = ["INS", "OUTS", + "MOVSB", "MOVSW", "MOVSD", "MOVSQ", + "SCASB", "SCASW", "SCASD", "SCASQ", + "LODSB", "LODSW", "LODSD", "LODSQ", + "STOSB", "STOSW", "STOSD", "STOSQ", + "CMPSB", "CMPSW", "CMPSD", "CMPSQ", + ] + +segm2enc = {CS: 1, SS: 2, DS: 3, ES: 4, FS: 5, GS: 6} +enc2segm = dict([(x[1], x[0]) for x in segm2enc.items()]) + + +class group: + + def __init__(self): + self.value = None + + +class additional_info: + + def __init__(self): + self.except_on_instr = False + self.g1 = group() + self.g2 = group() + self.vopmode = None + self.stk = False + self.v_opmode = None + self.v_admode = None + self.prefixed = '' + + +class instruction_x86(instruction): + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_x86, self).__init__(*args, **kargs) + self.additional_info.stk = hasattr(self, 'stk') + + def v_opmode(self): + return self.additional_info.v_opmode + + def v_admode(self): + return self.additional_info.v_admode + + def dstflow(self): + if self.name.startswith('J'): + return True + if self.name.startswith('LOOP'): + return True + # repxx yyy generate split flow + # if self.g1.value & 6 and self.name in repeat_mn: + # return True + return self.name in ['CALL'] + + def dstflow2label(self, symbol_pool): + if self.additional_info.g1.value & 6 and self.name in repeat_mn: + return + e = self.args[0] + if isinstance(e, ExprId) and not e.name in all_regs_ids_byname: + l = symbol_pool.getby_name_create(e.name) + s = ExprId(l, e.size) + self.args[0] = s + elif isinstance(e, ExprInt): + ad = e.arg + int(self.offset) + self.l + l = symbol_pool.getby_offset_create(ad) + s = ExprId(l, e.size) + self.args[0] = s + else: + return + + def breakflow(self): + if self.name.startswith('J'): + return True + if self.name.startswith('LOOP'): + return True + if self.name.startswith('RET'): + return True + if self.name.startswith('INT'): + return True + if self.name.startswith('SYS'): + return True + # repxx yyy generate split flow + # if self.g1.value & 6 and self.name in repeat_mn: + # return True + return self.name in ['CALL', 'HLT', 'IRET', 'ICEBP'] + + def splitflow(self): + if self.name.startswith('JMP'): + return False + if self.name.startswith('J'): + return True + if self.name.startswith('LOOP'): + return True + if self.name.startswith('SYS'): + return True + # repxx yyy generate split flow + # if self.g1.value & 6 and self.name in repeat_mn: + # return True + return self.name in ['CALL'] + + def setdstflow(self, a): + return + + def is_subcall(self): + return self.name in ['CALL'] + + def getdstflow(self, symbol_pool): + if self.additional_info.g1.value & 6 and self.name in repeat_mn: + ad = int(self.offset) + l = symbol_pool.getby_offset_create(ad) + # XXX size ??? + s = ExprId(l, self.v_opmode()) + return [s] + return [self.args[0]] + + def get_symbol_size(self, symbol, symbol_pool): + return self.mode + + def fixDstOffset(self): + e = self.args[0] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + # raise ValueError('dst must be int or label') + log.warning('dynamic dst %r' % e) + return + # return ExprInt32(e.arg - (self.offset + self.l)) + self.args[0] = ExprInt_fromsize( + self.mode, e.arg - (self.offset + self.l)) + + def get_info(self, c): + self.additional_info.g1.value = c.g1.value + self.additional_info.g2.value = c.g2.value + self.additional_info.v_opmode = c.v_opmode() + self.additional_info.v_admode = c.v_admode() + self.additional_info.prefix = c.prefix + self.additional_info.prefixed = getattr(c, "prefixed", "") + + def __str__(self): + o = super(instruction_x86, self).__str__() + if self.additional_info.g1.value & 1: + o = "LOCK %s" % o + if self.additional_info.g1.value & 2: + if getattr(self.additional_info.prefixed, 'default', "") != "\xF2": + o = "REPNE %s" % o + if self.additional_info.g1.value & 4: + if getattr(self.additional_info.prefixed, 'default', "") != "\xF3": + o = "REPE %s" % o + return o + + def get_args_expr(self): + args = [] + for a in self.args: + a = a.replace_expr(replace_regs[self.mode]) + args.append(a) + return args + + +class mn_x86(cls_mn): + name = "x86" + prefix_op_size = False + prefix_ad_size = False + regs = regs_module + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + bintree = {} + num = 0 + delayslot = 0 + pc = {16: IP, 32: EIP, 64: RIP} + sp = {16: SP, 32: ESP, 64: RSP} + instruction = instruction_x86 + max_instruction_len = 15 + + @classmethod + def getpc(cls, attrib): + return cls.pc[attrib] + + @classmethod + def getsp(cls, attrib): + return cls.sp[attrib] + + def v_opmode(self): + if hasattr(self, 'stk'): + stk = 1 + else: + stk = 0 + return v_opmode_info(self.mode, self.opmode, self.rex_w.value, stk) + + def v_admode(self): + size, opmode, admode = self.mode, self.opmode, self.admode + if size in [16, 32]: + if admode: + return invmode[size] + else: + return size + elif size == 64: + if admode == 1: + return 32 + return 64 + + def additional_info(self): + info = additional_info() + info.g1.value = self.g1.value + info.g2.value = self.g2.value + info.v_opmode = self.v_opmode() + info.prefixed = "" + if hasattr(self, 'prefixed'): + info.prefixed = self.prefixed.default + return info + + @classmethod + def check_mnemo(cls, fields): + pass + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + prefix = [d_g1, d_g2, d_rex_p, d_rex_w, d_rex_r, d_rex_x, d_rex_b] + return prefix + fields + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + @classmethod + def fromstring(cls, s, mode): + pref = 0 + prefix, new_s = get_prefix(s) + if prefix == "LOCK": + pref |= 1 + s = new_s + elif prefix == "REPNE": + pref |= 2 + s = new_s + elif prefix == "REPE": + pref |= 4 + s = new_s + c = super(mn_x86, cls).fromstring(s, mode) + c.additional_info.g1.value = pref + return c + + @classmethod + def pre_dis(cls, v, mode, offset): + offset_o = offset + pre_dis_info = {'opmode': 0, + 'admode': 0, + 'g1': 0, + 'g2': 0, + 'rex_p': 0, + 'rex_w': 0, + 'rex_r': 0, + 'rex_x': 0, + 'rex_b': 0, + 'prefix': "", + 'prefixed': "", + } + while True: + c = v.getbytes(offset) + if c == '\x66': + # pre_dis_info.opmode = 1 + pre_dis_info['opmode'] = 1 + elif c == '\x67': + pre_dis_info['admode'] = 1 + elif c == '\xf0': + pre_dis_info['g1'] = 1 + elif c == '\xf2': + pre_dis_info['g1'] = 2 + elif c == '\xf3': + pre_dis_info['g1'] = 4 + + elif c == '\x2e': + pre_dis_info['g2'] = 1 + elif c == '\x36': + pre_dis_info['g2'] = 2 + elif c == '\x3e': + pre_dis_info['g2'] = 3 + elif c == '\x26': + pre_dis_info['g2'] = 4 + elif c == '\x64': + pre_dis_info['g2'] = 5 + elif c == '\x65': + pre_dis_info['g2'] = 6 + + elif mode == 64 and c in '@ABCDEFGHIJKLMNO': + x = ord(c) + pre_dis_info['rex_p'] = 1 + pre_dis_info['rex_w'] = (x >> 3) & 1 + pre_dis_info['rex_r'] = (x >> 2) & 1 + pre_dis_info['rex_x'] = (x >> 1) & 1 + pre_dis_info['rex_b'] = (x >> 0) & 1 + offset += 1 + break + else: + c = '' + break + pre_dis_info['prefix'] += c + offset += 1 + # pre_dis_info.b = v[:offset] + return pre_dis_info, v, mode, offset, offset - offset_o + + @classmethod + def get_cls_instance(cls, cc, mode, infos=None): + for opmode in [0, 1]: + for admode in [0, 1]: + # c = cls.all_mn_inst[cc][0] + c = cc() + c.init_class() + + c.reset_class() + c.add_pre_dis_info() + c.dup_info(infos) + + c.mode = mode + c.opmode = opmode + c.admode = admode + + if hasattr(c, "fopmode") and c.fopmode.mode == 64: + c.rex_w.value = 1 + yield c + + def post_dis(self): + if self.g2.value: + for a in self.args: + if not isinstance(a.expr, ExprMem): + continue + m = a.expr + a.expr = ExprMem( + ExprOp('segm', enc2segm[self.g2.value], m.arg), m.size) + if self.name == 'LEA': + if not isinstance(self.args[1].expr, ExprMem): + return None + return self + + def dup_info(self, infos): + if infos is not None: + self.g1.value = infos.g1.value + self.g2.value = infos.g2.value + + def reset_class(self): + super(mn_x86, self).reset_class() + # self.rex_w.value, self.rex_b.value, + # self.rex_x.value = None, None, None + # self.opmode.value, self.admode.value = None, None + if hasattr(self, "opmode"): + del(self.opmode) + if hasattr(self, "admode"): + del(self.admode) + # self.opmode = 0 + # self.admode = 0 + + def add_pre_dis_info(self, pre_dis_info=None): + # print 'add_pre_dis_info', pre_dis_info + + if pre_dis_info is None: + return True + if hasattr(self, "prefixed") and self.prefixed.default == "\x66": + pre_dis_info['opmode'] = 0 + # if self.opmode != 0: + # return False + + # if pre_dis_info['opmode'] != self.opmode: + # return False + # if pre_dis_info['admode'] != self.admode: + # return False + self.opmode = pre_dis_info['opmode'] + self.admode = pre_dis_info['admode'] + + if hasattr(self, 'no_xmm_pref') and\ + pre_dis_info['prefix'] and\ + pre_dis_info['prefix'][-1] in '\x66\xf2\xf3': + return False + if (hasattr(self, "prefixed") and + not pre_dis_info['prefix'].endswith(self.prefixed.default)): + return False + # print self.rex_w.value, pre_dis_info['rex_w'] + # print 'rex', self.rex_w.value, self.rex_b.value, self.rex_x.value + if (self.rex_w.value is not None and + self.rex_w.value != pre_dis_info['rex_w']): + return False + else: + self.rex_w.value = pre_dis_info['rex_w'] + self.rex_r.value = pre_dis_info['rex_r'] + self.rex_b.value = pre_dis_info['rex_b'] + self.rex_x.value = pre_dis_info['rex_x'] + self.rex_p.value = pre_dis_info['rex_p'] + self.g1.value = pre_dis_info['g1'] + self.g2.value = pre_dis_info['g2'] + self.prefix = pre_dis_info['prefix'] + # self.prefixed = pre_dis_info['prefixed'] + + """ + if hasattr(self, "p_"): + self.prefixed = self.p_.default + if self.p_.default == "\x66": + pre_dis_info['opmode'] = 0 + if self.opmode != 0: + return False + #self.pre_dis_info = pre_dis_info + """ + return True + + def post_asm(self, v): + return v + + def encodefields(self, decoded): + v = super(mn_x86, self).encodefields(decoded) + if hasattr(self, 'prefixed'): + v = self.prefixed.default + v + + rex = 0x40 + if self.g1.value is None: + self.g1.value = 0 + if self.g2.value is None: + self.g2.value = 0 + + if self.rex_w.value: + rex |= 0x8 + if self.rex_r.value: + rex |= 0x4 + if self.rex_x.value: + rex |= 0x2 + if self.rex_b.value: + rex |= 0x1 + if rex != 0x40 or self.rex_p.value == 1: + v = chr(rex) + v + if self.g1.value & 1: + v = "\xf0" + v + if self.g1.value & 2: + if hasattr(self, 'no_xmm_pref'): + return None + v = "\xf2" + v + if self.g1.value & 4: + if hasattr(self, 'no_xmm_pref'): + return None + v = "\xf3" + v + if self.g2.value: + v = {1: '\x2e', 2: '\x36', 3: '\x3e', 4: + '\x26', 5: '\x64', 6: '\x65'}[self.g2.value] + v + # mode prefix + if hasattr(self, "admode") and self.admode: + v = "\x67" + v + + if hasattr(self, "opmode") and self.opmode: + if hasattr(self, 'no_xmm_pref'): + return None + v = "\x66" + v + + return v + + def getnextflow(self, symbol_pool): + raise NotImplementedError('not fully functional') + return self.offset + 4 + + def ir_pre_instruction(self): + return [ExprAff(mRIP[self.mode], + ExprInt_from(mRIP[self.mode], self.offset + self.l))] + + @classmethod + def filter_asm_candidates(cls, instr, candidates): + + cand_same_mode = [] + cand_diff_mode = [] + out = [] + for c, v in candidates: + if (hasattr(c, 'no_xmm_pref') and + (c.g1.value & 2 or c.g1.value & 4 or c.opmode)): + continue + if hasattr(c, "fopmode") and v_opmode(c) != c.fopmode.mode: + # print 'DROP', c, v_opmode(c), c.fopmode.mode + continue + if hasattr(c, "fadmode") and v_admode(c) != c.fadmode.mode: + # print 'DROP', c, v_opmode(c), c.fopmode.mode + continue + # relative dstflow must not have opmode set + # (affect IP instead of EIP for instance) + if (instr.dstflow() and + instr.name not in ["JCXZ", "JECXZ", "JRCXZ"] and + len(instr.args) == 1 and + isinstance(instr.args[0], ExprInt) and c.opmode): + continue + + out.append((c, v)) + candidates = out + # return [x[1][0] for x in candidates] + for c, v in candidates: + if v_opmode(c) == instr.mode: + cand_same_mode += v + for c, v in candidates: + if v_opmode(c) != instr.mode: + cand_diff_mode += v + cand_same_mode.sort(key=lambda x: len(x)) + cand_diff_mode.sort(key=lambda x: len(x)) + return cand_same_mode + cand_diff_mode + + +class bs8(bs): + prio = default_prio + + def __init__(self, v, cls=None, fname=None, **kargs): + super(bs8, self).__init__(int2bin(v, 8), 8, + cls=cls, fname=fname, **kargs) + + +class bs_modname_size(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + fopmode = opmode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) + mode = dct['mode'] + size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] + # no mode64 existance in name means no 64bit version of mnemo + if mode == 64: + if mode in self.args['name']: + nfields = fields[:] + f, i = getfieldindexby_name(nfields, 'rex_w') + # f = bs("1", l=0, fname = 'rex_w') + f = bs("1", l=0, cls=(bs_fbit,), fname="rex_w") + osize = v_opmode_info(size, opmode, 1, 0) + nfields[i] = f + nfields = nfields[:-1] + args = dict(self.args) + ndct = dict(dct) + if osize in self.args['name']: + ndct['name'] = self.args['name'][osize] + out.append((cls, ndct['name'], bases, ndct, nfields)) + + nfields = fields[:] + nfields = nfields[:-1] + f, i = getfieldindexby_name(nfields, 'rex_w') + # f = bs("0", l=0, fname = 'rex_w') + f = bs("0", l=0, cls=(bs_fbit,), fname="rex_w") + osize = v_opmode_info(size, opmode, 0, 0) + nfields[i] = f + args = dict(self.args) + ndct = dict(dct) + if osize in self.args['name']: + ndct['name'] = self.args['name'][osize] + out.append((cls, ndct['name'], bases, ndct, nfields)) + else: + l = opmode_prefix((dct['mode'], dct['opmode'], dct['admode'])) + osize = v_opmode_info(size, opmode, None, 0) + nfields = fields[:-1] + args = dict(self.args) + ndct = dict(dct) + if osize in self.args['name']: + ndct['name'] = self.args['name'][osize] + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class bs_modname_jecx(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + fopmode = opmode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) + mode = dct['mode'] + size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] + + nfields = fields[:] + nfields = nfields[:-1] + args = dict(self.args) + ndct = dict(dct) + if mode == 64: + if admode: + ndct['name'] = "JECXZ" + else: + ndct['name'] = "JRCXZ" + elif mode == 32: + if admode: + ndct['name'] = "JCXZ" + else: + ndct['name'] = "JECXZ" + elif mode == 16: + if admode: + ndct['name'] = "JECXZ" + else: + ndct['name'] = "JCXZ" + else: + raise ValueError('unhandled mode') + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class bs_modname_mode(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + fopmode = opmode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) + size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] + + mode = dct['mode'] + l = opmode_prefix((dct['mode'], dct['opmode'], dct['admode'])) + osize = v_opmode_info(size, opmode, None, 0) + nfields = fields[:-1] + args = dict(self.args) + ndct = dict(dct) + if mode == 64 or osize == 32: + ndct['name'] = self.args['name'][mode] + else: + ndct['name'] = self.args['name'][16] + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class x86_imm(imm_noarg): + parser = base_expr + + def decodeval(self, v): + return swap_uint(self.l, v) + + def encodeval(self, v): + return swap_uint(self.l, v) + + +class x86_imm_fix(imm_noarg): + parser = base_expr + + def decodeval(self, v): + return self.ival + + def encodeval(self, v): + if v != self.ival: + return False + return self.ival + + +class x86_08(x86_imm): + intsize = 8 + intmask = (1 << intsize) - 1 + + +class x86_16(x86_imm): + intsize = 16 + intmask = (1 << intsize) - 1 + + +class x86_32(x86_imm): + intsize = 32 + intmask = (1 << intsize) - 1 + + +class x86_64(x86_imm): + intsize = 64 + intmask = (1 << intsize) - 1 + + +class x86_08_ne(x86_imm): + intsize = 8 + intmask = (1 << intsize) - 1 + + def encode(self): + return True + + def decode(self, v): + v = swap_uint(self.l, v) + p = self.parent + admode = p.v_admode() + e = sign_ext(v, self.intsize, admode) + e = ExprInt_fromsize(admode, e) + self.expr = e + return True + + +class x86_16_ne(x86_08_ne): + intsize = 16 + intmask = (1 << intsize) - 1 + + +class x86_32_ne(x86_08_ne): + intsize = 32 + intmask = (1 << intsize) - 1 + + +class x86_64_ne(x86_08_ne): + intsize = 64 + intmask = (1 << intsize) - 1 + + +class x86_s08to16(x86_imm): + in_size = 8 + out_size = 16 + + def myexpr(self, x): + return ExprInt16(x) + + def int2expr(self, v): + return self.myexpr(v) + + def expr2int(self, e): + if not isinstance(e, ExprInt): + return None + v = int(e.arg) + if v & ~((1 << self.l) - 1) != 0: + return None + return v + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if self.parent.v_opmode() == 64: + self.expr = ExprInt64(sign_ext(v, self.in_size, 64)) + else: + if (1 << (self.l - 1)) & v: + v = sign_ext(v, self.l, self.out_size) + self.expr = self.myexpr(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + opmode = self.parent.v_opmode() + + out_size = self.out_size + if opmode != self.out_size: + if opmode == 32 and self.out_size == 64: + out_size = opmode + if v == sign_ext( + int(v & ((1 << self.in_size) - 1)), self.in_size, out_size): + pass + else: + # print 'cannot encode1', hex(v), + # print hex(sign_ext(int(v&((1<<self.in_size)-1)), + # self.in_size, out_size)) + # test with rex_w + self.parent.rex_w.value = 1 + opmode = self.parent.v_opmode() + out_size = opmode + if (v != sign_ext( + int(v & ((1 << self.in_size) - 1)), + self.in_size, out_size)): + # print 'cannot encode2', hex(v), + # hex(sign_ext(int(v&((1<<self.in_size)-1)), + # self.in_size, out_size)) + return False + else: + pass + else: + pass + if v != sign_ext( + int(v & ((1 << self.in_size) - 1)), self.in_size, out_size): + # print 'cannot encode3', hex(v), + # hex(sign_ext(int(v&((1<<self.in_size)-1)), self.in_size, + # self.out_size)) + return False + v = self.encodeval(v) + self.value = (v & 0xffffffff) & self.lmask + return True + + def decodeval(self, v): + return swap_uint(self.l, v) + + def encodeval(self, v): + return swap_sint(self.l, v) + + +class x86_s08to32(x86_s08to16): + myexpr = lambda self, x: ExprInt32(x) + in_size = 8 + out_size = 32 + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if self.parent.rex_w.value == 1: + v = ExprInt64(sign_ext(v, self.in_size, 64)) + else: + v = ExprInt32(sign_ext(v, self.in_size, 32)) + + self.expr = v + # print "INT1", self.parent.rex_w.value, self.expr, self.expr.size + return True + + +class x86_s08to64(x86_s08to16): + myexpr = lambda self, x: ExprInt64(x) + in_size = 8 + out_size = 64 + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if self.parent.rex_w.value == 1: + v = ExprInt64(sign_ext(v, self.in_size, 64)) + else: + v = ExprInt32(sign_ext(v, self.in_size, 32)) + + self.expr = v + # print "INT1X", self.parent.prefix.rex_w, self.expr, self.expr.size + return True + + +class x86_s32to64(x86_s08to32): + myexpr = lambda self, x: ExprInt64(x) + in_size = 32 + out_size = 64 + + +class bs_eax(m_arg): + reg_info = r_eax_all + rindex = 0 + parser = reg_info.parser + + def decode(self, v): + p = self.parent + e = None + if hasattr(p, 'w8') and p.w8.value == 0: + e = regs08_expr[self.rindex] + else: + e = size2gpregs[p.v_opmode()].expr[self.rindex] + self.expr = e + return True + + def encode(self): + self.value = 0 + p = self.parent + e = self.expr + # print "EEEEE", e, p.w8.value + # print 'XXX', p.mode, p.opmode + osize = p.v_opmode() + if hasattr(p, 'w8'): + if p.w8.value is None: + # XXX TODO: priority in w8 erase? + if e.size == 8: + p.w8.value = 0 + else: + p.w8.value = 1 + if hasattr(p, 'w8') and p.w8.value == 0: + return e == regs08_expr[self.rindex] + elif p.mode in [16, 32]: + return e == size2gpregs[osize].expr[self.rindex] + elif p.mode == 64: + if e == size2gpregs[64].expr[self.rindex]: + p.rex_w.value = 1 + return True + elif e == size2gpregs[osize].expr[self.rindex]: + return True + return False + + +class bs_seg(m_arg): + reg_info = r_eax_all + rindex = 0 + parser = reg_info.parser + + def decode(self, v): + self.expr = self.reg_info.expr[0] + return True + + def encode(self): + self.value = 0 + return self.expr == self.reg_info.expr[0] + + +class bs_edx(bs_eax): + reg_info = r_edx_all + rindex = 2 + parser = reg_info.parser + + +class bs_st(bs_eax): + reg_info = r_st_all + rindex = 0 + parser = reg_info.parser + + +class bs_cs(bs_seg): + reg_info = r_cs_all + rindex = 0 + parser = reg_info.parser + + +class bs_ds(bs_seg): + reg_info = r_ds_all + rindex = 0 + parser = reg_info.parser + + +class bs_es(bs_seg): + reg_info = r_es_all + rindex = 0 + parser = reg_info.parser + + +class bs_ss(bs_seg): + reg_info = r_ss_all + rindex = 0 + parser = reg_info.parser + + +class bs_fs(bs_seg): + reg_info = r_fs_all + rindex = 0 + parser = reg_info.parser + + +class bs_gs(bs_seg): + reg_info = r_gs_all + rindex = 0 + parser = reg_info.parser + + +class x86_reg_st(reg_noarg, m_arg): + reg_info = r_st_all + parser = reg_info.parser + + +class bs_sib_scale(bs_divert): + bsname = "sib_scale" + + def divert(self, i, candidates): + out = [] + done = False + for cls, name, bases, dct, fields in candidates: + if (not (admode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) != 16 and + 'rm' in dct and dct['rm'] == 0b100 and + 'mod' in dct and dct['mod'] != 0b11)): + ndct = dict(dct) + nfields = fields[:] + nfields[i] = None + ndct[self.args['fname']] = None + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + + nfields = fields[:] + args = dict(self.args) + ndct = dict(dct) + f = bs(**args) + nfields[i] = f + ndct[self.args['fname']] = None + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class bs_sib_index(bs_sib_scale): + pass + + +class bs_sib_base(bs_sib_scale): + pass + + +class bs_disp(bs_divert): + + def divert(self, i, candidates): + out = [] + done = False + for cls, name, bases, dct, fields in candidates: + ndct = dict(dct) + nfields = fields[:] + if (admode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) == 16): + if 'mod' in dct and dct['mod'] == 0b00 and \ + 'rm' in dct and dct['rm'] == 0b110: + nfields[i] = bs( + l=16, cls=(x86_16_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b01: + nfields[i] = bs( + l=8, cls=(x86_08_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b10: + nfields[i] = bs( + l=16, cls=(x86_16_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + else: + if 'mod' in dct and dct['mod'] == 0b00 and \ + 'rm' in dct and dct['rm'] == 0b101: + nfields[i] = bs( + l=32, cls=(x86_32_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b01: + nfields[i] = bs( + l=8, cls=(x86_08_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b10: + nfields[i] = bs( + l=32, cls=(x86_32_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + + nfields[i] = None + ndct[self.args['fname']] = None + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +def getmodrm(c): + return (c >> 6) & 3, (c >> 3) & 7, c & 7 + + +def setmodrm(mod, re, rm): + return ((mod & 3) << 6) | ((re & 7) << 3) | (rm & 7) + + +def sib(c): + return modrm(c) + +db_afs_64 = [] +sib_64_s08_ebp = [] + + +def gen_modrm_form(): + global db_afs_64, sib_64_s08_ebp + ebp = 5 + + sib_s08_ebp = [{f_isad: True} for i in range(0x100)] + sib_u32_ebp = [{f_isad: True} for i in range(0x100)] + sib_u32 = [{f_isad: True} for i in range(0x100)] + + sib_u64 = [] + for rex_x in xrange(2): + o = [] + for rex_b in xrange(2): + x = [{f_isad: True} for i in range(0x100)] + o.append(x) + sib_u64.append(o) + + sib_u64_ebp = [] + for rex_x in xrange(2): + o = [] + for rex_b in xrange(2): + x = [{f_isad: True} for i in range(0x100)] + o.append(x) + sib_u64_ebp.append(o) + + sib_64_s08_ebp = [] + for rex_x in xrange(2): + o = [] + for rex_b in xrange(2): + x = [{f_isad: True} for i in range(0x100)] + o.append(x) + sib_64_s08_ebp.append(o) + + for sib_rez in [sib_s08_ebp, + sib_u32_ebp, + sib_u32, + sib_64_s08_ebp, + sib_u64_ebp, + sib_u64, + ]: + for index in range(0x100): + ss, i, b = getmodrm(index) + + if b == 0b101: + if sib_rez == sib_s08_ebp: + sib_rez[index][f_imm] = f_s08 + sib_rez[index][ebp] = 1 + elif sib_rez == sib_u32_ebp: + sib_rez[index][f_imm] = f_u32 + sib_rez[index][ebp] = 1 + elif sib_rez == sib_u32: + sib_rez[index][f_imm] = f_u32 + elif sib_rez == sib_u64_ebp: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_u32 + sib_rez[rex_x][rex_b][index][ebp + 8 * rex_b] = 1 + elif sib_rez == sib_u64: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_u32 + elif sib_rez == sib_64_s08_ebp: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_s08 + sib_rez[rex_x][rex_b][index][ebp + 8 * rex_b] = 1 + + else: + if sib_rez == sib_s08_ebp: + sib_rez[index][b] = 1 + sib_rez[index][f_imm] = f_s08 + elif sib_rez == sib_u32_ebp: + sib_rez[index][b] = 1 + sib_rez[index][f_imm] = f_u32 + elif sib_rez == sib_u32: + sib_rez[index][b] = 1 + elif sib_rez == sib_u64_ebp: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 + sib_rez[rex_x][rex_b][index][f_imm] = f_u32 + elif sib_rez == sib_u64: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 + elif sib_rez == sib_64_s08_ebp: + for rex_b in xrange(2): + for rex_x in xrange(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_s08 + sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 + + if i == 0b100 and sib_rez in [sib_s08_ebp, sib_u32_ebp, sib_u32]: + continue + + if sib_rez in [sib_s08_ebp, sib_u32_ebp, sib_u32]: + tmp = i + if not tmp in sib_rez[index]: + sib_rez[index][tmp] = 0 # 1 << ss + sib_rez[index][tmp] += 1 << ss + else: + for rex_b in xrange(2): + for rex_x in xrange(2): + tmp = i + 8 * rex_x + if i == 0b100 and rex_x == 0: + continue + if not tmp in sib_rez[rex_x][rex_b][index]: + sib_rez[rex_x][rex_b][index][tmp] = 0 # 1 << ss + sib_rez[rex_x][rex_b][index][tmp] += 1 << ss + + # 32bit + db_afs_32 = [None for i in range(0x100)] + for i in range(0x100): + index = i + mod, re, rm = getmodrm(i) + + if mod == 0b00: + if rm == 0b100: + db_afs_32[index] = sib_u32 + elif rm == 0b101: + db_afs_32[index] = {f_isad: True, f_imm: f_u32} + else: + db_afs_32[index] = {f_isad: True, rm: 1} + elif mod == 0b01: + if rm == 0b100: + db_afs_32[index] = sib_s08_ebp + continue + tmp = {f_isad: True, rm: 1, f_imm: f_s08} + db_afs_32[index] = tmp + + elif mod == 0b10: + if rm == 0b100: + db_afs_32[index] = sib_u32_ebp + else: + db_afs_32[index] = {f_isad: True, rm: 1, f_imm: f_u32} + elif mod == 0b11: + db_afs_32[index] = {f_isad: False, rm: 1} + + # 64bit + db_afs_64 = [None for i in range(0x400)] + for i in range(0x400): + index = i + rex_x = (index >> 9) & 1 + rex_b = (index >> 8) & 1 + mod, re, rm = getmodrm(i & 0xff) + + if mod == 0b00: + if rm == 0b100: + db_afs_64[i] = sib_u64[rex_x][rex_b] + elif rm == 0b101: + db_afs_64[i] = {f_isad: True, f_imm: f_u32, 16: 1} + else: + db_afs_64[i] = {f_isad: True, rm + 8 * rex_b: 1} + elif mod == 0b01: + if rm == 0b100: + db_afs_64[i] = sib_64_s08_ebp[rex_x][rex_b] + continue + tmp = {f_isad: True, rm + 8 * rex_b: 1, f_imm: f_s08} + db_afs_64[i] = tmp + + elif mod == 0b10: + if rm == 0b100: + db_afs_64[i] = sib_u64_ebp[rex_x][rex_b] + else: + db_afs_64[i] = {f_isad: True, rm + 8 * rex_b: 1, f_imm: f_u32} + elif mod == 0b11: + db_afs_64[i] = {f_isad: False, rm + 8 * rex_b: 1} + + # 16bit + db_afs_16 = [None for i in range(0x100)] + _si = 6 + _di = 7 + _bx = 3 + _bp = 5 + for i in range(0x100): + index = i + mod, re, rm = getmodrm(i) + + if mod == 0b00: + if rm == 0b100: + db_afs_16[index] = {f_isad: True, _si: 1} + elif rm == 0b101: + db_afs_16[index] = {f_isad: True, _di: 1} + elif rm == 0b110: + db_afs_16[index] = { + f_isad: True, f_imm: f_u16} # {f_isad:True,_bp:1} + elif rm == 0b111: + db_afs_16[index] = {f_isad: True, _bx: 1} + else: + db_afs_16[index] = {f_isad: True, + [_si, _di][rm % 2]: 1, + [_bx, _bp][(rm >> 1) % 2]: 1} + elif mod in [0b01, 0b10]: + if mod == 0b01: + my_imm = f_s08 + else: + my_imm = f_u16 + + if rm == 0b100: + db_afs_16[index] = {f_isad: True, _si: 1, f_imm: my_imm} + elif rm == 0b101: + db_afs_16[index] = {f_isad: True, _di: 1, f_imm: my_imm} + elif rm == 0b110: + db_afs_16[index] = {f_isad: True, _bp: 1, f_imm: my_imm} + elif rm == 0b111: + db_afs_16[index] = {f_isad: True, _bx: 1, f_imm: my_imm} + else: + db_afs_16[index] = {f_isad: True, + [_si, _di][rm % 2]: 1, + [_bx, _bp][(rm >> 1) % 2]: 1, + f_imm: my_imm} + + elif mod == 0b11: + db_afs_16[index] = {f_isad: False, rm: 1} + + byte2modrm = {} + byte2modrm[16] = db_afs_16 + byte2modrm[32] = db_afs_32 + byte2modrm[64] = db_afs_64 + + modrm2byte = {16: defaultdict(list), + 32: defaultdict(list), + 64: defaultdict(list), + } + for size, db_afs in byte2modrm.items(): + for i, modrm in enumerate(db_afs): + if not isinstance(modrm, list): + modrm = modrm.items() + modrm.sort() + modrm = tuple(modrm) + modrm2byte[size][modrm].append(i) + continue + for j, modrm_f in enumerate(modrm): + modrm_f = modrm_f.items() + modrm_f.sort() + modrm_f = tuple(modrm_f) + modrm2byte[size][modrm_f].append((i, j)) + + return byte2modrm, modrm2byte + +byte2modrm, modrm2byte = gen_modrm_form() + + +# ret is modr; ret is displacement +def exprfindmod(e, o=None): + if o is None: + o = {} + if isinstance(e, ExprInt): + return e + if isinstance(e, ExprId): + i = size2gpregs[e.size].expr.index(e) + o[i] = 1 + return None + elif isinstance(e, ExprOp): + out = None + if e.op == '+': + for a in e.args: + r = exprfindmod(a, o) + if out and r1: + raise ValueError('multiple displacement!') + out = r + return out + elif e.op == "*": + mul = int(e.args[1].arg) + a = e.args[0] + i = size2gpregs[a.size].expr.index(a) + o[i] = mul + else: + raise ValueError('bad op') + return None + + +def expr2modrm(e, p, w8, sx=0, xmm=0, mm=0): + o = defaultdict(lambda x: 0) + if e.size == 64 and not e in gpregs_mm.expr: + if hasattr(p, 'sd'): + p.sd.value = 1 + # print 'set64pref', str(e) + elif hasattr(p, 'wd'): + pass + elif hasattr(p, 'stk'): + pass + else: + p.rex_w.value = 1 + opmode = p.v_opmode() + if sx == 1: + opmode = 16 + if sx == 2: + opmode = 32 + if e.size == 8 and w8 != 0: + return None, None, False + + if w8 == 0 and e.size != 8: + return None, None, False + + if not isinstance(e, ExprMem): + o[f_isad] = False + if xmm: + if e in gpregs_xmm.expr: + i = gpregs_xmm.expr.index(e) + o[i] = 1 + return [o], None, True + else: + return None, None, False + if mm: + if e in gpregs_mm.expr: + i = gpregs_mm.expr.index(e) + o[i] = 1 + return [o], None, True + else: + return None, None, False + if w8 == 0: + # if (p.v_opmode() == 64 or p.rex_p.value == 1) and e in + # gpregs08_64.expr: + if p.mode == 64 and e in gpregs08_64.expr: + r = gpregs08_64 + p.rex_p.value = 1 + else: + p.rex_p.value = 0 + p.rex_x.value = 0 + r = size2gpregs[8] + if not e in r.expr: + return None, None, False + i = r.expr.index(e) + o[i] = 1 + return [o], None, True + # print "ttt", opmode, e.size + if opmode != e.size: + # print "FFFF" + return None, None, False + if not e in size2gpregs[opmode].expr: + return None, None, False + i = size2gpregs[opmode].expr.index(e) + # print 'aaa', p.mode, i + if i > 7: + if p.mode == 64: + # p.rex_b.value = 1 + # i -=7 + # print "SET REXB" + pass + else: + return None, None, False + o[i] = 1 + return [o], None, True + if e.is_op_segm() and isinstance(e.arg.args[0], ExprInt): + return None, None, False + + if e.is_op_segm(): + segm = e.arg.args[0] + ptr = e.arg.args[1] + else: + segm = None + ptr = e.arg + + o[f_isad] = True + ad_size = ptr.size + admode = p.v_admode() + if ad_size != admode: + return None, None, False + """ + if e.size == 64: + if hasattr(p, 'sd'): + p.sd.value = 1 + else: + p.rex_w.value = 1 + """ + + if w8 == 1 and e.size != opmode: # p.v_opmode(): + if not (hasattr(p, 'sd') or hasattr(p, 'wd')): + return None, None, False + # print 'tttt' + + if hasattr(p, 'wd'): + s = e.size + if s == 16: + p.wd.value = 1 + elif s == 32: + pass + else: + return None, None, False + + if p.mode == 64 and ptr.size == 32: + if p.admode != 1: + return None, None, False + + o = {f_isad: True} + disp = exprfindmod(ptr, o) + out = [] + if disp is None: + # add 0 disp + disp = ExprInt32(0) + if disp is not None: + for s, x in [(f_s08, ExprInt8), (f_s16, ExprInt16), (f_s32, ExprInt32), + (f_u08, ExprInt8), (f_u16, ExprInt16), (f_u32, ExprInt32)]: + # print "1", disp + v = x(int(disp.arg)) + # print "2", v, hex(sign_ext(int(v.arg), v.size, disp.size)) + if int(disp.arg) != sign_ext(int(v.arg), v.size, disp.size): + # print 'nok' + continue + # print 'ok', s, v + x1 = dict(o) + x1[f_imm] = (s, v) + out.append(x1) + else: + out = [o] + return out, segm, True + + +def modrm2expr(m, p, w8, sx=0, xmm=0, mm=0): + o = [] + if not m[f_isad]: + k = [x[0] for x in m.items() if x[1] == 1] + if len(k) != 1: + raise ValueError('strange reg encoding %r' % m) + k = k[0] + if w8 == 0: + opmode = 8 + elif sx == 1: + opmode = 16 + elif sx == 2: + opmode = 32 + else: + opmode = p.v_opmode() + """ + if k > 7: + # XXX HACK TODO + e = size2gpregs[64].expr[k] + else: + e = size2gpregs[opmode].expr[k] + """ + # print 'yyy', opmode, k + if xmm: + e = gpregs_xmm.expr[k] + elif mm: + e = gpregs_mm.expr[k] + elif opmode == 8 and (p.v_opmode() == 64 or p.rex_p.value == 1): + e = gpregs08_64.expr[k] + else: + e = size2gpregs[opmode].expr[k] + return e + # print "enc", m, p.v_admode(), p.prefix.opmode, p.prefix.admode + admode = p.v_admode() + opmode = p.v_opmode() + for k, v in m.items(): + if type(k) in [int, long]: + e = size2gpregs[admode].expr[k] + if v != 1: + e = ExprInt_fromsize(admode, v) * e + o.append(e) + # print [str(x) for x in o] + if f_imm in m: + if p.disp.value is None: + return None + o.append(ExprInt_fromsize(admode, p.disp.expr.arg)) + e = ExprOp('+', *o) + if w8 == 0: + opmode = 8 + elif sx == 1: + opmode = 16 + elif sx == 2: + opmode = 32 + e = ExprMem(e, size=opmode) + # print "mem size", opmode, e + return e + + +class x86_rm_arg(m_arg): + parser = rmarg + + def fromstring(self, s, parser_result=None): + start, stop = super(x86_rm_arg, self).fromstring(s, parser_result) + e = self.expr + p = self.parent + if start is None: + return None, None + s = e.size + return start, stop + + @staticmethod + def arg2str(e): + if isinstance(e, ExprId): + o = str(e) + elif isinstance(e, ExprMem): + sz = {8: 'BYTE', 16: 'WORD', 32: 'DWORD', + 64: 'QWORD', 80: 'TBYTE'}[e.size] + segm = "" + if e.is_op_segm(): + segm = "%s:" % e.arg.args[0] + e = e.arg.args[1] + else: + e = e.arg + if isinstance(e, ExprOp): + # s = str(e.arg)[1:-1] + s = str(e).replace('(', '').replace(')', '') + else: + s = str(e) + o = sz + ' PTR %s[%s]' % (segm, s) + else: + raise ValueError('check this %r' % e) + return "%s" % o + + def get_modrm(self): + p = self.parent + admode = p.v_admode() + + if not admode in [16, 32, 64]: + raise ValueError('strange admode %r', admode) + v = setmodrm(p.mod.value, 0, p.rm.value) + v |= p.rex_b.value << 8 + v |= p.rex_x.value << 9 + if p.mode == 64: + # XXXx to check + admode = 64 + + xx = byte2modrm[admode][v] + if isinstance(xx, list): + if not p.sib_scale: + return False + v = setmodrm(p.sib_scale.value, + p.sib_index.value, + p.sib_base.value) + # print 'SIB', hex(v) + # v |= p.rex_b.value << 8 + # v |= p.rex_x.value << 9 + # if v >= 0x100: + # pass + xx = xx[v] + return xx + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + mm = hasattr(self.parent, "mm") + xmm = hasattr(self.parent, "xmm") + e = modrm2expr(xx, p, 1, xmm=xmm, mm=mm) + if e is None: + return False + self.expr = e + return True + + def gen_cand(self, v_cand, admode): + # print "GEN CAND" + if not admode in modrm2byte: + # XXX TODO: 64bit + raise StopIteration + if not v_cand: + raise StopIteration + + p = self.parent + o_rex_x = p.rex_x.value + o_rex_b = p.rex_b.value + # add candidate without 0 imm + new_v_cand = [] + moddd = False + for v in v_cand: + new_v_cand.append(v) + # print 'CANDI', v, admode + if f_imm in v and int(v[f_imm][1].arg) == 0: + v = dict(v) + del(v[f_imm]) + new_v_cand.append(v) + moddd = True + + v_cand = new_v_cand + + out_c = [] + for v in v_cand: + disp = None + # patch value in modrm + if f_imm in v: + size, disp = v[f_imm] + disp = int(disp.arg) + # disp = swap_uint(f_imm2size[size], int(disp)) + + v[f_imm] = size + vo = v + # print 'vv', v, disp + v = v.items() + v.sort() + v = tuple(v) + # print "II", e, admode + # print 'III', v + # if (8, 1) in v: + # pass + if not v in modrm2byte[admode]: + # print 'cannot find' + continue + # print "FOUND1", v + xx = modrm2byte[admode][v] + # if opmode == 64 and admode == 64: + # pdb.set_trace() + + # print "FOUND2", xx + # default case + for x in xx: + if type(x) == tuple: + modrm, sib = x + else: + modrm = x + sib = None + # print 'mod sib', hex(modrm), sib + # print p.sib_scale + # print p.sib_base + # print p.sib_index + + # 16 bit cannot have sib + if (not sib is None) and admode == 16: + continue + # if ((p.sib_scale and sib is None) or + # (p.sib_scale is None and sib)): + # log.debug('dif sib %r %r'%(p.sib_scale, sib)) + # continue + # print hex(modrm), sib + # p.mod.value, dum, p.rm.value = getmodrm(modrm) + rex = modrm >> 8 # 0# XXX HACK REM temporary REX modrm>>8 + if rex and admode != 64: + continue + # print 'prefix', hex(rex) + # p.rex_x.value = o_rex_x + # p.rex_b.value = o_rex_b + + p.rex_x.value = (rex >> 1) & 1 + p.rex_b.value = rex & 1 + + if o_rex_x is not None and p.rex_x.value != o_rex_x: + continue + if o_rex_b is not None and p.rex_b.value != o_rex_b: + continue + + mod, re, rm = getmodrm(modrm) + # check re on parent + if re != p.reg.value: + continue + # p.mod.value.append(mod) + # p.rm.value.append(rm) + + if sib: + # print 'REX', p.rex_x.value, p.rex_b.value + # print hex(modrm), hex(sib) + # if (modrm & 0xFF == 4 and sib & 0xFF == 0x5 + # and p.rex_b.value ==1 and p.rex_x.value == 0): + # pass + s_scale, s_index, s_base = getmodrm(sib) + # p.sib_scale.value, p.sib_index.value, + # p.sib_base.value = getmodrm(sib) + # p.sib_scale.decode(mod) + # p.sib_index.decode(re) + # p.sib_base.decode(rm) + # p.sib_scale.value.append(mod) + # p.sib_index.value.append(re) + # p.sib_base.value.append(rm) + else: + # p.sib_scale.value.append(None) + # p.sib_index.value.append(None) + # p.sib_base.value.append(None) + s_scale, s_index, s_base = None, None, None + + # print 'IIII', repr(p.disp), f_imm in v + # if p.disp and not f_imm in vo: + # continue + # if not p.disp and f_imm in vo: + # continue + # if p.disp: + # if p.disp.l != f_imm2size[vo[f_imm]]: + # continue + # print "DISP", repr(p.disp), p.disp.l + # p.disp.value = int(disp.arg) + # print 'append' + # print mod, rm, s_scale, s_index, s_base, disp + # print p.mod, p.rm + # out_c.append((mod, rm, s_scale, s_index, s_base, disp)) + p.mod.value = mod + p.rm.value = rm + p.sib_scale.value = s_scale + p.sib_index.value = s_index + p.sib_base.value = s_base + p.disp.value = disp + if disp is not None: + p.disp.l = f_imm2size[vo[f_imm]] + + yield True + + raise StopIteration + + def encode(self): + e = self.expr + # print "eee", e + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + admode = p.v_admode() + mode = e.size + mm = hasattr(self.parent, 'mm') + xmm = hasattr(self.parent, 'xmm') + v_cand, segm, ok = expr2modrm(e, p, 1, xmm=xmm, mm=mm) + if segm: + p.g2.value = segm2enc[segm] + # print "REZ1", v_cand, ok + for x in self.gen_cand(v_cand, admode): + yield x + + +class x86_rm_w8(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, p.w8.value) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + if p.w8.value is None: + if e.size == 8: + p.w8.value = 0 + else: + p.w8.value = 1 + + # print 'TTTTT', e + v_cand, segm, ok = expr2modrm(e, p, p.w8.value) + if segm: + p.g2.value = segm2enc[segm] + # print "REZ2", v_cand, ok + for x in self.gen_cand(v_cand, p.v_admode()): + # print 'REZ', p.rex_x.value + yield x + + +class x86_rm_sx(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, p.w8.value, 1) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + if p.w8.value is None: + if e.size == 8: + p.w8.value = 0 + else: + p.w8.value = 1 + v_cand, segm, ok = expr2modrm(e, p, p.w8.value, 1) + if segm: + p.g2.value = segm2enc[segm] + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_sxd(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, 1, 2) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + v_cand, segm, ok = expr2modrm(e, p, 1, 2) + if segm: + p.g2.value = segm2enc[segm] + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_sd(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, 1) + if not isinstance(e, ExprMem): + return False + if p.sd.value == 0: + e = ExprMem(e.arg, 32) + else: + e = ExprMem(e.arg, 64) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + if not e.size in [32, 64]: + raise StopIteration + p.sd.value = 0 + v_cand, segm, ok = expr2modrm(e, p, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_wd(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, 1) + if not isinstance(e, ExprMem): + return False + if p.wd.value == 0: + e = ExprMem(e.arg, 32) + else: + e = ExprMem(e.arg, 16) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + p = self.parent + p.wd.value = 0 + v_cand, segm, ok = expr2modrm(e, p, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_m80(x86_rm_arg): + msize = 80 + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + # print "aaa", xx + e = modrm2expr(xx, p, 1) + if not isinstance(e, ExprMem): + return False + e = ExprMem(e.arg, self.msize) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if isinstance(e, ExprInt): + raise StopIteration + if not isinstance(e, ExprMem) or e.size != self.msize: + raise StopIteration + p = self.parent + mode = p.mode + if mode == 64: + mode = 32 + e = ExprMem(e.arg, mode) + v_cand, segm, ok = expr2modrm(e, p, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_m08(x86_rm_arg): + msize = 8 + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + e = modrm2expr(xx, p, 0) + self.expr = e + return e is not None + + def encode(self): + e = self.expr + if e.size != 8: + raise StopIteration + """ + if not isinstance(e, ExprMem) or e.size != self.msize: + raise StopIteration + """ + p = self.parent + mode = p.mode + # if mode == 64: + # mode = 32 + # e = ExprMem(e.arg, mode) + v_cand, segm, ok = expr2modrm(e, p, 0) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_m16(x86_rm_m80): + msize = 16 + + +class x86_rm_m64(x86_rm_m80): + msize = 64 + + +class x86_rm_reg_noarg(object): + prio = default_prio + 1 + + parser = gpreg + + def fromstring(self, s, parser_result=None): + # print 'parsing reg', s, opmode + if not hasattr(self.parent, 'sx') and hasattr(self.parent, "w8"): + self.parent.w8.value = 1 + if parser_result: + e, start, stop = parser_result[self.parser] + # print 'reg result', e, start, stop + if e is None: + return None, None + self.expr = e + if self.expr.size == 8: + if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): + return None, None + self.parent.w8.value = 0 + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + self.expr = v[0] + if self.expr.size == 0: + if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): + return None, None + self.parent.w8.value = 0 + + # print 'parsed', s, self.expr + return start, stop + + def getrexsize(self): + return self.parent.rex_r.value + + def setrexsize(self, v): + self.parent.rex_r.value = v + + def decode(self, v): + v = v & self.lmask + p = self.parent + opmode = p.v_opmode() + # if hasattr(p, 'sx'): + # opmode = 16 + if not hasattr(p, 'sx') and (hasattr(p, 'w8') and p.w8.value == 0): + opmode = 8 + r = size2gpregs[opmode] + if p.mode == 64 and self.getrexsize(): + v |= 0x8 + # print "XXX", p.v_opmode(), p.rex_p.value + if p.v_opmode() == 64 or p.rex_p.value == 1: + if not hasattr(p, 'sx') and (hasattr(p, 'w8') and p.w8.value == 0): + # if (hasattr(p, 'w8') and p.w8.value == 0): + r = gpregs08_64 + """ + if v < 8: + self.expr = r.expr[v] + else: + self.expr = size2gpregs[64].expr[v] + """ + if hasattr(p, "xmm") or hasattr(p, "xmmreg"): + e = gpregs_xmm.expr[v] + elif hasattr(p, "mm") or hasattr(p, "mmreg"): + e = gpregs_mm.expr[v] + else: + e = r.expr[v] + self.expr = e + return True + + def encode(self): + if not isinstance(self.expr, ExprId): + return False + if self.expr in gpregs64.expr and not hasattr(self.parent, 'stk'): + self.parent.rex_w.value = 1 + # print self.parent.opmode + # fd + opmode = self.parent.v_opmode() + # if hasattr(self.parent, 'sx'): + # opmode = 16 + # print 'reg encode', self.expr, opmode + if not hasattr(self.parent, 'sx') and hasattr(self.parent, 'w8'): + self.parent.w8.value = 1 + if self.expr.size == 8: + if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): + return False + self.parent.w8.value = 0 + opmode = 8 + r = size2gpregs[opmode] + # print "YYY", opmode, self.expr + if ((hasattr(self.parent, 'xmm') or hasattr(self.parent, 'xmmreg')) + and self.expr in gpregs_xmm.expr): + i = gpregs_xmm.expr.index(self.expr) + elif ((hasattr(self.parent, 'mm') or hasattr(self.parent, 'mmreg')) + and self.expr in gpregs_mm.expr): + i = gpregs_mm.expr.index(self.expr) + elif self.expr in r.expr: + i = r.expr.index(self.expr) + elif (opmode == 8 and self.parent.mode == 64 and + self.expr in gpregs08_64.expr): + i = gpregs08_64.expr.index(self.expr) + self.parent.rex_p.value = 1 + else: + log.debug("cannot encode reg %r" % self.expr) + return False + # print "zzz", opmode, self.expr, i, self.parent.mode + if self.parent.v_opmode() == 64: + if i > 7: + self.setrexsize(1) + i -= 8 + elif self.parent.mode == 64 and i > 7: + i -= 8 + # print 'rrr', self.getrexsize() + # self.parent.rex_b.value = 1 + self.setrexsize(1) + if hasattr(self.parent, 'xmm') or hasattr(self.parent, 'mm'): + if i > 7: + i -= 8 + self.value = i + if self.value > self.lmask: + log.debug("cannot encode field value %x %x" % + (self.value, self.lmask)) + return False + # print 'RR ok' + return True + + +class x86_rm_reg(x86_rm_reg_noarg, m_arg): + pass + + +class x86_reg(x86_rm_reg): + + def getrexsize(self): + return self.parent.rex_b.value + + def setrexsize(self, v): + self.parent.rex_b.value = v + + +class x86_reg_noarg(x86_rm_reg_noarg): + + def getrexsize(self): + return self.parent.rex_b.value + + def setrexsize(self, v): + self.parent.rex_b.value = v + + +class x86_rm_segm(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = segmreg + parser = reg_info.parser + + +class x86_rm_cr(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = crregs + parser = reg_info.parser + + +class x86_rm_dr(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = drregs + parser = reg_info.parser + + +class x86_rm_flt(reg_noarg, m_arg): + prio = default_prio + 1 + reg_info = fltregs + parser = reg_info.parser + + +class bs_fbit(bsi): + + def decode(self, v): + # value already decoded in pre_dis_info + # print "jj", self.value + return True + + +class bs_cl1(bsi, m_arg): + parser = cl_or_imm + + def decode(self, v): + if v == 1: + self.expr = regs08_expr[1] + else: + self.expr = ExprInt8(1) + return True + + def encode(self): + if self.expr == regs08_expr[1]: + self.value = 1 + elif isinstance(self.expr, ExprInt) and int(self.expr.arg) == 1: + self.value = 0 + else: + return False + return True + + +def sib_cond(cls, mode, v): + if admode_prefix((mode, v["opmode"], v["admode"])) == 16: + return None + if v['mod'] == 0b11: + return None + elif v['rm'] == 0b100: + return cls.ll + else: + return None + return v['rm'] == 0b100 + + +class bs_cond_scale(bs_cond): + # cond must return field len + ll = 2 + + @classmethod + def flen(cls, mode, v): + return sib_cond(cls, mode, v) + + def encode(self): + if self.value is None: + self.value = 0 + self.l = 0 + return True + return super(bs_cond, self).encode() + + def decode(self, v): + self.value = v + return True + + +class bs_cond_index(bs_cond_scale): + ll = 3 + + @classmethod + def flen(cls, mode, v): + return sib_cond(cls, mode, v) + + +class bs_cond_disp(bs_cond): + # cond must return field len + + @classmethod + def flen(cls, mode, v): + # print 'disp cond', mode, + # print v, v_admode_info(mode, v['opmode'], v['admode']) + # if v_admode_info(mode, v['opmode'], v['admode']) ==16: + if admode_prefix((mode, v['opmode'], v['admode'])) == 16: + if v['mod'] == 0b00: + if v['rm'] == 0b110: + return 16 + else: + return None + elif v['mod'] == 0b01: + return 8 + elif v['mod'] == 0b10: + return 16 + return None + # 32, 64 + if 'sib_base' in v and v['sib_base'] == 0b101: + if v['mod'] == 0b00: + return 32 + elif v['mod'] == 0b01: + return 8 + elif v['mod'] == 0b10: + return 32 + else: + return None + + if v['mod'] == 0b00: + if v['rm'] == 0b101: + return 32 + else: + return None + elif v['mod'] == 0b01: + return 8 + elif v['mod'] == 0b10: + return 32 + else: + return None + + def encode(self): + if self.value is None: + self.value = 0 + self.l = 0 + return True + self.value = swap_uint(self.l, self.value) + return True + + def decode(self, v): + admode = self.parent.v_admode() + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, admode) + v = ExprInt_fromsize(admode, v) + self.expr = v + return True + + +class bs_cond_imm(bs_cond_scale, m_arg): + parser = int_or_expr + max_size = 32 + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + else: + try: + e, start, stop = self.parser.scanString(s).next() + except StopIteration: + e = None + self.expr = e + + if len(self.parent.args) > 1: + l = self.parent.args[0].expr.size + else: + l = self.parent.v_opmode() + # l = min(l, self.max_size) + # l = offsize(self.parent) + if isinstance(self.expr, ExprInt): + v = int(self.expr.arg) + mask = ((1 << l) - 1) + v = v & mask + e = ExprInt_fromsize(l, v) + self.expr = e + + if self.expr is None: + log.debug('cannot fromstring int %r' % s) + return None, None + return start, stop + + @classmethod + def flen(cls, mode, v): + if 'w8' not in v or v['w8'] == 1: + if 'se' in v and v['se'] == 1: + return 8 + else: + # osize = v_opmode_info(mode, v['opmode'], v['admode']) + # osize = opmode_prefix((mode, v['opmode'], v['admode'])) + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + osize = min(osize, cls.max_size) + return osize + return 8 + + def getmaxlen(self): + return 32 + + def encode(self): + if not isinstance(self.expr, ExprInt): + raise StopIteration + arg0_expr = self.parent.args[0].expr + self.parent.rex_w.value = 0 + # special case for push + if len(self.parent.args) == 1: + v = int(self.expr.arg) + l = self.parent.v_opmode() + l = min(l, self.max_size) + + self.l = l + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + # print hex(self.value) + yield True + raise StopIteration + + # assume 2 args; use first arg to guess op size + if arg0_expr.size == 64: + self.parent.rex_w.value = 1 + + l = self.parent.v_opmode() # self.parent.args[0].expr.size + # print 'imm enc', l, self.parent.rex_w.value + v = int(self.expr.arg) + # print "imms size", l, hex(v), arg0_expr.size + if arg0_expr.size == 8: + if not hasattr(self.parent, 'w8'): + raise StopIteration + self.parent.w8.value = 0 + l = 8 + if hasattr(self.parent, 'se'): + self.parent.se.value = 0 + elif hasattr(self.parent, 'se'): + if hasattr(self.parent, 'w8'): + self.parent.w8.value = 1 + # print 'test', 8, hex(v), + # print hex(sign_ext(v & 0xFF, 8, arg0_expr.size)) + # try to generate signed extended version + if v == sign_ext(v & 0xFF, 8, arg0_expr.size): + # print 'setse' + self.parent.se.value = 1 + self.l = 8 + self.value = v & 0xFF + yield True + self.parent.se.value = 0 + else: + if hasattr(self.parent, 'w8'): + self.parent.w8.value = 1 + if l == 64: + self.l = self.getmaxlen() + else: + self.l = l + # l = min(l, self.max_size) + + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + # print hex(self.value) + yield True + + def decode(self, v): + opmode = self.parent.v_opmode() + v = swap_uint(self.l, v) + self.value = v + l_out = opmode + if hasattr(self.parent, 'w8') and self.parent.w8.value == 0: + l_out = 8 + v = sign_ext(v, self.l, l_out) + v = ExprInt_fromsize(l_out, v) + self.expr = v + # print self.expr, repr(self.expr) + return True + + +class bs_cond_imm64(bs_cond_imm): + max_size = 64 + + def getmaxlen(self): + return 64 + + @classmethod + def flen(cls, mode, v): + if 'w8' not in v or v['w8'] == 1: + if 'se' in v and v['se'] == 1: + return 8 + else: + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + return osize + else: + return 8 + + +class bs_rel_off(bs_cond_imm): # m_arg): + parser = int_or_expr + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + else: + try: + e, start, stop = self.parser.scanString(s).next() + except StopIteration: + e = None + self.expr = e + l = self.parent.mode + if isinstance(self.expr, ExprInt): + v = int(self.expr.arg) + mask = ((1 << l) - 1) + v = v & mask + e = ExprInt_fromsize(l, v) + self.expr = e + return start, stop + + @classmethod + def flen(cls, mode, v): + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + if osize == 16: + return 16 + else: + return 32 + + def encode(self): + if not isinstance(self.expr, ExprInt): + raise StopIteration + arg0_expr = self.parent.args[0].expr + if self.l != 0: + l = self.l + else: + l = self.parent.v_opmode() # self.parent.args[0].expr.size + self.l = l + # if l == 16: + # self.l = 16 + # else: + # self.l = 32 + l = offsize(self.parent) + + # l = self.parent.v_opmode()#self.parent.args[0].expr.size + # print 'imm enc', l, self.parent.rex_w.value + v = int(self.expr.arg) + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if self.l > l: + raise StopIteration + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + # print hex(self.value) + yield True + + def decode(self, v): + v = swap_uint(self.l, v) + size = offsize(self.parent) + v = sign_ext(v, self.l, size) + v = ExprInt_fromsize(size, v) + self.expr = v + # print self.expr, repr(self.expr) + return True + + +class bs_rel_off08(bs_rel_off): + + @classmethod + def flen(cls, mode, v): + return 8 + + +class bs_moff(bsi): + + @classmethod + def flen(cls, mode, v): + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + if osize == 16: + return 16 + else: + return 32 + + def encode(self): + if not hasattr(self.parent, "mseg"): + raise StopIteration + m = self.parent.mseg.expr + if (not (isinstance(m, ExprMem) and m.is_op_segm() and + isinstance(m.arg.args[0], ExprInt))): + raise StopIteration + l = self.parent.v_opmode() # self.parent.args[0].expr.size + if l == 16: + self.l = 16 + else: + self.l = 32 + # print 'imm enc', l, self.parent.rex_w.value + v = int(m.arg.args[1].arg) + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + # print hex(self.value) + yield True + + def decode(self, v): + opmode = self.parent.v_opmode() + if opmode == 64: + return False + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, opmode) + v = ExprInt_fromsize(opmode, v) + self.expr = v + # print self.expr, repr(self.expr) + return True + + +class bs_movoff(m_arg): + parser = deref_mem + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + if e is None: + return None, None + # print 'fromstring', hex(e), self.int2expr + if not isinstance(e, ExprMem): + return None, None + self.expr = e + if self.expr is None: + return None, None + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + if not isinstance(e, ExprMem): + return None, None + e = v[0] + if e is None: + log.debug('cannot fromstring int %r' % s) + return None, None + self.expr = e + return start, stop + + @classmethod + def flen(cls, mode, v): + if mode == 64: + if v['admode']: + return 32 + else: + return 64 + asize = v_admode_info(mode, v['admode']) + return asize + + def encode(self): + e = self.expr + p = self.parent + if not isinstance(e, ExprMem) or not isinstance(e.arg, ExprInt): + raise StopIteration + self.l = p.v_admode() + # print 'imm enc', l, self.parent.rex_w.value + v = int(e.arg.arg) + mask = ((1 << self.l) - 1) + if v != mask & v: + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + if self.parent.mode == 64: + if self.parent.admode == 1: + l = 32 + else: + l = 64 + else: + l = self.parent.v_admode() + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, l) + v = ExprInt_fromsize(l, v) + size = self.parent.v_opmode() + if self.parent.w8.value == 0: + size = 8 + self.expr = ExprMem(v, size) + # print self.expr, repr(self.expr) + return True + + @staticmethod + def arg2str(e): + sz = {8: 'BYTE', 16: 'WORD', 32: 'DWORD', 64: 'QWORD', 80: 'TBYTE'} + o = sz[e.size] + ' PTR [%s]' % e.arg + return "%s" % o + + +class bs_msegoff(m_arg): + parser = deref_ptr + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + if e is None: + return None, None + self.expr = e + if self.expr is None: + return None, None + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + e = v[0] + if e is None: + log.debug('cannot fromstring int %r' % s) + return None, None + self.expr = e + return start, stop + + def encode(self): + if not (isinstance(self.expr, ExprMem) and self.expr.is_op_segm()): + raise StopIteration + if not isinstance(self.expr.arg.args[0], ExprInt): + raise StopIteration + if not isinstance(self.expr.arg.args[1], ExprInt): + raise StopIteration + l = self.parent.v_opmode() # self.parent.args[0].expr.size + # print 'imm enc', l, self.parent.rex_w.value + v = int(self.expr.arg.args[0].arg) + mask = ((1 << self.l) - 1) + # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), + # self.l, l)) + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + opmode = self.parent.v_opmode() + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, opmode) + v = ExprInt_fromsize(opmode, v) + e = ExprMem(ExprOp('segm', v, self.parent.off.expr)) + self.expr = e + # print self.expr, repr(self.expr) + return True + + @staticmethod + def arg2str(e): + return "%s:%s" % (e.arg.args[0], e.arg.args[1]) + + +d_rex_p = bs(l=0, cls=(bs_fbit,), fname="rex_p") +d_rex_w = bs(l=0, cls=(bs_fbit,), fname="rex_w") +d_rex_r = bs(l=0, cls=(bs_fbit,), fname="rex_r") +d_rex_x = bs(l=0, cls=(bs_fbit,), fname="rex_x") +d_rex_b = bs(l=0, cls=(bs_fbit,), fname="rex_b") + +d_g1 = bs(l=0, cls=(bs_fbit,), fname="g1") +d_g2 = bs(l=0, cls=(bs_fbit,), fname="g2") + + +d_cl1 = bs(l=1, cls=(bs_cl1,), fname="cl1") + + +w8 = bs(l=1, fname="w8") +se = bs(l=1, fname="se") + +sx = bs(l=0, fname="sx") +sxd = bs(l=0, fname="sx") + + +xmm = bs(l=0, fname="xmm") +mm = bs(l=0, fname="mm") +xmmreg = bs(l=0, fname="xmmreg") +mmreg = bs(l=0, fname="mmreg") + +pref_f2 = bs(l=0, fname="prefixed", default="\xf2") +pref_f3 = bs(l=0, fname="prefixed", default="\xf3") +pref_66 = bs(l=0, fname="prefixed", default="\x66") +no_xmm_pref = bs(l=0, fname="no_xmm_pref") + +sib_scale = bs(l=2, cls=(bs_cond_scale,), fname = "sib_scale") +sib_index = bs(l=3, cls=(bs_cond_index,), fname = "sib_index") +sib_base = bs(l=3, cls=(bs_cond_index,), fname = "sib_base") + +disp = bs(l=0, cls=(bs_cond_disp,), fname = "disp") + + +u08 = bs(l=8, cls=(x86_08, m_arg)) +u07 = bs(l=7, cls=(x86_08, m_arg)) +u16 = bs(l=16, cls=(x86_16, m_arg)) +u32 = bs(l=32, cls=(x86_32, m_arg)) +s3264 = bs(l=32, cls=(x86_s32to64, m_arg)) + +u08_3 = bs(l=0, cls=(x86_imm_fix, m_arg), ival = 3) + +d0 = bs("000", fname='reg') +d1 = bs("001", fname='reg') +d2 = bs("010", fname='reg') +d3 = bs("011", fname='reg') +d4 = bs("100", fname='reg') +d5 = bs("101", fname='reg') +d6 = bs("110", fname='reg') +d7 = bs("111", fname='reg') + +sd = bs(l=1, fname="sd") +wd = bs(l=1, fname="wd") + +stk = bs(l=0, fname="stk") + + +class field_size: + prio = default_prio + + def __init__(self, d=None): + if d is None: + d = {} + self.d = d + + def get(self, opm, adm=None): + return self.d[opm] + +d_imm64 = bs(l=0, fname="imm64") + +# d_eax = bs_eax(l=0) +d_eax = bs(l=0, cls=(bs_eax, ), fname='eax') +d_edx = bs(l=0, cls=(bs_edx, ), fname='edx') +d_st = bs(l=0, cls=(x86_reg_st, ), fname='st') +# d_imm = bs(l=0, cls=(bs_cond_imm,), fname="imm") +d_imm = bs(l=0, cls=(bs_cond_imm,), fname="imm") +d_imm64 = bs(l=0, cls=(bs_cond_imm64,), fname="imm") +d_ax = bs(l=0, cls=(r_ax, ), fname='ax') +d_dx = bs(l=0, cls=(r_dx, ), fname='dx') +d_cl = bs(l=0, cls=(r_cl, ), fname='cl') + +d_cs = bs(l=0, cls=(bs_cs, ), fname='cs') +d_ds = bs(l=0, cls=(bs_ds, ), fname='ds') +d_es = bs(l=0, cls=(bs_es, ), fname='es') +d_ss = bs(l=0, cls=(bs_ss, ), fname='ss') +d_fs = bs(l=0, cls=(bs_fs, ), fname='fs') +d_gs = bs(l=0, cls=(bs_gs, ), fname='gs') + +rel_off = bs(l=0, cls=(bs_rel_off,), fname="off") +rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off") +moff = bs(l=0, cls=(bs_moff,), fname="off") +msegoff = bs(l=16, cls=(bs_msegoff,), fname="mseg") +movoff = bs(l=0, cls=(bs_movoff,), fname="off") +mod = bs(l=2, fname="mod") + +rmreg = bs(l=3, cls=(x86_rm_reg, ), order =1, fname = "reg") +reg = bs(l=3, cls=(x86_reg, ), order =1, fname = "reg") +regnoarg = bs(l=3, default_val="000", order=1, fname="reg") +segm = bs(l=3, cls=(x86_rm_segm, ), order =1, fname = "reg") +crreg = bs(l=3, cls=(x86_rm_cr, ), order =1, fname = "reg") +drreg = bs(l=3, cls=(x86_rm_dr, ), order =1, fname = "reg") + +fltreg = bs(l=3, cls=(x86_rm_flt, ), order =1, fname = "reg") + +rm = bs(l=3, fname="rm") + +rm_arg = bs(l=0, cls=(x86_rm_arg,), fname='rmarg') +rm_arg_w8 = bs(l=0, cls=(x86_rm_w8,), fname='rmarg') +rm_arg_sx = bs(l=0, cls=(x86_rm_sx,), fname='rmarg') +rm_arg_sxd = bs(l=0, cls=(x86_rm_sxd,), fname='rmarg') +rm_arg_sd = bs(l=0, cls=(x86_rm_sd,), fname='rmarg') +rm_arg_wd = bs(l=0, cls=(x86_rm_wd,), fname='rmarg') +rm_arg_m80 = bs(l=0, cls=(x86_rm_m80,), fname='rmarg') +rm_arg_m64 = bs(l=0, cls=(x86_rm_m64,), fname='rmarg') +rm_arg_m08 = bs(l=0, cls=(x86_rm_m08,), fname='rmarg') +rm_arg_m16 = bs(l=0, cls=(x86_rm_m16,), fname='rmarg') + +swapargs = bs_swapargs(l=1, fname="swap", mn_mod=range(1 << 1)) + + +cond_list = ["O", "NO", "B", "AE", + "Z", "NZ", "BE", "A", + "S", "NS", "PE", "NP", + #"L", "NL", "NG", "G"] + "L", "GE", "LE", "G"] +cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) + + +def rmmod(r, rm_arg_x=rm_arg): + return [mod, r, rm, sib_scale, sib_index, sib_base, disp, rm_arg_x] + +# +# mode | reg | rm # +# + +# +# scale | index | base # +# + +# +# Prefix | REX prefix | Opcode | mod/rm | sib | displacement | immediate # +# + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_x86,), dct) +""" +class ia32_aaa(mn_x86): + fields = [bs8(0x37)] +""" +addop("aaa", [bs8(0x37)]) +addop("aas", [bs8(0x3F)]) +addop("aad", [bs8(0xd5), u08]) +addop("aam", [bs8(0xd4), u08]) + +addop("adc", [bs("0001010"), w8, d_eax, d_imm]) +addop("adc", [bs("100000"), se, w8] + rmmod(d2, rm_arg_w8) + [d_imm]) +addop("adc", [bs("000100"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("add", [bs("0000010"), w8, d_eax, d_imm]) +addop("add", [bs("100000"), se, w8] + rmmod(d0, rm_arg_w8) + [d_imm]) +addop("add", [bs("000000"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("and", [bs("0010010"), w8, d_eax, d_imm]) +addop("and", [bs("100000"), se, w8] + rmmod(d4, rm_arg_w8) + [d_imm]) +addop("and", [bs("001000"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("bsf", [bs8(0x0f), bs8(0xbc)] + rmmod(rmreg)) +addop("bsr", [bs8(0x0f), bs8(0xbd), mod, + rmreg, rm, sib_scale, sib_index, sib_base, disp, rm_arg]) + +addop("bswap", [bs8(0x0f), bs('11001'), reg]) + +addop("bt", [bs8(0x0f), bs8(0xa3)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("bt", [bs8(0x0f), bs8(0xba)] + rmmod(d4) + [u08]) +addop("btc", [bs8(0x0f), bs8(0xbb)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("btc", [bs8(0x0f), bs8(0xba)] + rmmod(d7) + [u08]) + + +addop("btr", [bs8(0x0f), bs8(0xb3)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("btr", [bs8(0x0f), bs8(0xba)] + rmmod(d6) + [u08]) +addop("bts", [bs8(0x0f), bs8(0xab)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("bts", [bs8(0x0f), bs8(0xba)] + rmmod(d5) + [u08]) + +addop("call", [bs8(0xe8), rel_off]) +addop("call", [bs8(0xff), stk] + rmmod(d2)) +addop("call", [bs8(0x9a), moff, msegoff]) + + +class bs_op_mode(bsi): + + def decode(self, v): + opmode = self.parent.v_opmode() + # print "MODE", opmode, self.mode + return opmode == self.mode + + +class bs_ad_mode(bsi): + + def decode(self, v): + admode = self.parent.v_admode() + # print "MODE", opmode, self.mode + return admode == self.mode + + +class bs_op_mode_no64(bsi): + + def encode(self): + if self.parent.mode == 64: + return False + return super(bs_op_mode_no64, self).encode() + + def decode(self, v): + if self.parent.mode == 64: + return False + opmode = self.parent.v_opmode() + # print "MODE", opmode, self.mode + return opmode == self.mode + + +bs_opmode16 = bs(l=0, cls=(bs_op_mode,), mode = 16, fname="fopmode") +bs_opmode32 = bs(l=0, cls=(bs_op_mode,), mode = 32, fname="fopmode") +bs_opmode64 = bs(l=0, cls=(bs_op_mode,), mode = 64, fname="fopmode") + + +bs_admode16 = bs(l=0, cls=(bs_ad_mode,), mode = 16, fname="fadmode") +bs_admode32 = bs(l=0, cls=(bs_ad_mode,), mode = 32, fname="fadmode") +bs_admode64 = bs(l=0, cls=(bs_ad_mode,), mode = 64, fname="fadmode") + +bs_opmode16_no64 = bs(l=0, cls=(bs_op_mode_no64,), mode = 16, fname="fopmode") +bs_opmode32_no64 = bs(l=0, cls=(bs_op_mode_no64,), mode = 32, fname="fopmode") + +# class ia32_call(mn_x86): +# fields = [bs8(0xff)] + rmmod(d3) +# conv_name = {16:'CBW', 32:'CWDE', 64:'CDQE'} +# bs_conv_name = bs_modname_size(l=0, name=conv_name) +addop("cbw", [bs8(0x98), bs_opmode16]) +addop("cwde", [bs8(0x98), bs_opmode32]) +addop("cdqe", [bs8(0x98), bs_opmode64]) + +addop("clc", [bs8(0xf8)]) +addop("cld", [bs8(0xfc)]) +addop("cli", [bs8(0xfa)]) +addop("clts", [bs8(0x0f), bs8(0x06)]) +addop("cmc", [bs8(0xf5)]) + +addop("cmov", [bs8(0x0f), bs('0100'), cond] + rmmod(rmreg)) + +addop("cmp", [bs("0011110"), w8, d_eax, d_imm]) +addop("cmp", [bs("100000"), se, w8] + rmmod(d7, rm_arg_w8) + [d_imm]) +addop("cmp", [bs("001110"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + + +addop("cmpsb", [bs8(0xa6)]) +# cmps_name = {16:'CMPSW', 32:'CMPSD', 64:'CMPSQ'} +# bs_cmps_name = bs_modname_size(l=0, name=cmps_name) +# addop("cmps", [bs8(0xa7), bs_cmps_name]) +addop("cmpsw", [bs8(0xa7), bs_opmode16]) +addop("cmpsd", [bs8(0xa7), bs_opmode32]) +addop("cmpsq", [bs8(0xa7), bs_opmode64]) + +addop("cmpxchg", [bs8(0x0f), bs('1011000'), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +# XXX TODO CMPXCHG8/16 +addop("cpuid", [bs8(0x0f), bs8(0xa2)]) + +# convbis_name = {16:'CWD', 32:'CDQ', 64:'CQO'} +# bs_convbis_name = bs_modname_size(l=0, name=convbis_name) +# addop("convbis", [bs8(0x99), bs_convbis_name]) +addop("cwd", [bs8(0x99), bs_opmode16]) +addop("cdq", [bs8(0x99), bs_opmode32]) +addop("cqo", [bs8(0x99), bs_opmode64]) + + +addop("daa", [bs8(0x27)]) +addop("das", [bs8(0x2f)]) +addop("dec", [bs('1111111'), w8] + rmmod(d1, rm_arg_w8)) +addop("dec", [bs('01001'), reg]) +addop("div", [bs('1111011'), w8] + rmmod(d6, rm_arg_w8)) +addop("enter", [bs8(0xc8), u16, u08]) + +# float ##### +addop("fwait", [bs8(0x9b)]) + +addop("f2xm1", [bs8(0xd9), bs8(0xf0)]) +addop("fabs", [bs8(0xd9), bs8(0xe1)]) + +addop("fadd", [bs("11011"), sd, bs("00")] + rmmod(d0, rm_arg_sd)) +addop("fadd", [bs("11011"), swapargs, bs("00"), + bs("11000"), d_st, fltreg], [d_st, fltreg]) +addop("faddp", [bs8(0xde), bs("11000"), fltreg, d_st]) +addop("fiadd", [bs("11011"), wd, bs("10")] + rmmod(d0, rm_arg_wd)) + +addop("fbld", [bs8(0xdf)] + rmmod(d4, rm_arg_m80)) +addop("fbldp", [bs8(0xdf)] + rmmod(d6, rm_arg_m80)) +addop("fchs", [bs8(0xd9), bs8(0xe0)]) +# addop("fclex", [bs8(0x9b), bs8(0xdb), bs8(0xe2)]) +addop("fnclex", [bs8(0xdb), bs8(0xe2)]) + +addop("fcmovb", [bs8(0xda), bs("11000"), d_st, fltreg]) +addop("fcmove", [bs8(0xda), bs("11001"), d_st, fltreg]) +addop("fcmovbe", [bs8(0xda), bs("11010"), d_st, fltreg]) +addop("fcmovu", [bs8(0xda), bs("11011"), d_st, fltreg]) +addop("fcmovnb", [bs8(0xdb), bs("11000"), d_st, fltreg]) +addop("fcmovne", [bs8(0xdb), bs("11001"), d_st, fltreg]) +addop("fcmovnbe", [bs8(0xdb), bs("11010"), d_st, fltreg]) +addop("fcmovnu", [bs8(0xdb), bs("11011"), d_st, fltreg]) + +addop("fcom", [bs("11011"), sd, bs("00")] + rmmod(d2, rm_arg_sd)) +addop("fcom", [bs("11011"), swapargs, bs("00"), + bs("11010"), d_st, fltreg], [d_st, fltreg]) +addop("fcomp", [bs("11011"), sd, bs("00")] + rmmod(d3, rm_arg_sd)) +addop("fcomp", + [bs("11011"), swapargs, bs("00"), bs("11011"), + d_st, fltreg], [d_st, fltreg]) +addop("fcompp", [bs8(0xde), bs8(0xd9)]) + +addop("fcomi", [bs8(0xdb), bs("11110"), d_st, fltreg]) +addop("fcomip", [bs8(0xdf), bs("11110"), d_st, fltreg]) +addop("fucomi", [bs8(0xdb), bs("11101"), d_st, fltreg]) +addop("fucomip", [bs8(0xdf), bs("11101"), d_st, fltreg]) + +addop("fcos", [bs8(0xd9), bs8(0xff)]) +addop("fdecstp", [bs8(0xd9), bs8(0xf6)]) + + +addop("fdiv", [bs("11011"), sd, bs("00")] + rmmod(d6, rm_arg_sd)) +addop("fdiv", [bs8(0xd8), bs("11110"), d_st, fltreg]) +addop("fdiv", [bs8(0xdc), bs("11111"), fltreg, d_st]) +addop("fdivp", [bs8(0xde), bs("11111"), fltreg, d_st]) +addop("fidiv", [bs("11011"), wd, bs("10")] + rmmod(d6, rm_arg_wd)) + +addop("fdivr", [bs("11011"), sd, bs("00")] + rmmod(d7, rm_arg_sd)) +addop("fdivr", [bs8(0xd8), bs("11111"), d_st, fltreg]) +addop("fdivr", [bs8(0xdc), bs("11110"), fltreg, d_st]) +addop("fdivrp", [bs8(0xde), bs("11110"), fltreg, d_st]) +addop("fidivr", [bs("11011"), wd, bs("10")] + rmmod(d7, rm_arg_wd)) + +addop("ffree", [bs8(0xdd), bs("11000"), fltreg]) +addop("ficom", [bs("11011"), wd, bs("10")] + rmmod(d2, rm_arg_wd)) +addop("ficomp", [bs("11011"), wd, bs("10")] + rmmod(d3, rm_arg_wd)) +addop("fild", [bs("11011"), wd, bs("11")] + rmmod(d0, rm_arg_wd)) +addop("fild", [bs8(0xdf)] + rmmod(d5, rm_arg_m64)) + +addop("fincstp", [bs8(0xd9), bs8(0xf7)]) + +# addop("finit", [bs8(0x9b), bs8(0xdb), bs8(0xe3)]) +addop("fninit", [bs8(0xdb), bs8(0xe3)]) + +addop("fist", [bs("11011"), wd, bs("11")] + rmmod(d2, rm_arg_wd)) +addop("fistp", [bs("11011"), wd, bs("11")] + rmmod(d3, rm_arg_wd)) +addop("fistp", [bs8(0xdf)] + rmmod(d7, rm_arg_m64)) + +addop("fisttp", [bs("11011"), wd, bs("11")] + rmmod(d1, rm_arg_wd)) +addop("fisttp", [bs8(0xdd)] + rmmod(d1, rm_arg_m64)) + +addop("fld", [bs("11011"), sd, bs("01")] + rmmod(d0, rm_arg_sd)) +addop("fld", [bs8(0xdb)] + rmmod(d5, rm_arg_m80)) +addop("fld", [bs8(0xd9), bs("11000"), fltreg]) + +addop("fld1", [bs8(0xd9), bs8(0xe8)]) +addop("fldl2t", [bs8(0xd9), bs8(0xe9)]) +addop("fldl2e", [bs8(0xd9), bs8(0xea)]) +addop("fldpi", [bs8(0xd9), bs8(0xeb)]) +addop("fldlg2", [bs8(0xd9), bs8(0xec)]) +addop("fldln2", [bs8(0xd9), bs8(0xed)]) +addop("fldz", [bs8(0xd9), bs8(0xee)]) + +addop("fldcw", [bs8(0xd9)] + rmmod(d5, rm_arg_m16)) +addop("fldenv", [bs8(0xd9)] + rmmod(d4, rm_arg_m80)) # XXX TODO: m14? + +addop("fmul", [bs("11011"), sd, bs("00")] + rmmod(d1, rm_arg_sd)) +addop("fmul", [bs("11011"), swapargs, bs("00"), + bs("11001"), d_st, fltreg], [d_st, fltreg]) +addop("fmulp", [bs8(0xde), bs("11001"), fltreg, d_st]) +addop("fimul", [bs("11011"), wd, bs("10")] + rmmod(d1, rm_arg_wd)) + +addop("fnop", [bs8(0xd9), bs8(0xd0)]) +addop("fpatan", [bs8(0xd9), bs8(0xf3)]) +addop("fprem", [bs8(0xd9), bs8(0xf8)]) +addop("fprem1", [bs8(0xd9), bs8(0xf5)]) +addop("fptan", [bs8(0xd9), bs8(0xf2)]) +addop("frndint", [bs8(0xd9), bs8(0xfc)]) +addop("frstor", [bs8(0xdd)] + rmmod(d4, rm_arg_m80)) # XXX TODO: m94 ? +# addop("fsave", [bs8(0x9b), bs8(0xdd)] + rmmod(d6, rm_arg_m80)) # XXX +# TODO: m94 ? +addop("fnsave", [bs8(0xdd)] + rmmod(d6, rm_arg_m80)) # XXX TODO: m94 ? + +addop("fscale", [bs8(0xd9), bs8(0xfd)]) +addop("fsin", [bs8(0xd9), bs8(0xfe)]) +addop("fsincos", [bs8(0xd9), bs8(0xfb)]) +addop("fsqrt", [bs8(0xd9), bs8(0xfa)]) + +addop("fst", [bs("11011"), sd, bs("01")] + rmmod(d2, rm_arg_sd)) +addop("fst", [bs8(0xdd), bs("11010"), fltreg]) +addop("fstp", [bs("11011"), sd, bs("01")] + rmmod(d3, rm_arg_sd)) +addop("fstp", [bs8(0xdb)] + rmmod(d7, rm_arg_m80)) +addop("fstp", [bs8(0xdd), bs("11011"), fltreg]) + +# addop("fstcw", [bs8(0x9b), bs8(0xd9)] + rmmod(d7, rm_arg_m16)) +addop("fnstcw", [bs8(0xd9)] + rmmod(d7, rm_arg_m16)) +# addop("fstenv", [bs8(0x9b), bs8(0xd9)] + rmmod(d6, rm_arg_m80)) # XXX +# TODO: m14? +addop("fnstenv", [bs8(0xd9)] + rmmod(d6, rm_arg_m80)) # XXX TODO: m14? +# addop("fstsw", [bs8(0x9b), bs8(0xdd)] + rmmod(d7, rm_arg_m16)) +addop("fnstsw", [bs8(0xdd)] + rmmod(d7, rm_arg_m16)) +# addop("fstsw", [bs8(0x9b), bs8(0xdf), bs8(0xe0), d_ax]) +addop("fnstsw", [bs8(0xdf), bs8(0xe0), d_ax]) + +addop("fsub", [bs("11011"), sd, bs("00")] + rmmod(d4, rm_arg_sd)) +addop("fsub", [bs8(0xd8), bs("11100"), d_st, fltreg]) +addop("fsub", [bs8(0xdc), bs("11101"), fltreg, d_st]) +addop("fsubp", [bs8(0xde), bs("11101"), fltreg, d_st]) +addop("fisub", [bs("11011"), wd, bs("10")] + rmmod(d4, rm_arg_wd)) + +addop("fsubr", [bs("11011"), sd, bs("00")] + rmmod(d5, rm_arg_sd)) +addop("fsubr", [bs8(0xd8), bs("11101"), d_st, fltreg]) +addop("fsubr", [bs8(0xdc), bs("11100"), fltreg, d_st]) +addop("fsubrp", [bs8(0xde), bs("11100"), fltreg, d_st]) +addop("fisubr", [bs("11011"), wd, bs("10")] + rmmod(d5, rm_arg_wd)) +addop("ftst", [bs8(0xd9), bs8(0xe4)]) + + +addop("fucom", [bs8(0xdd), bs("11100"), fltreg]) +addop("fucomp", [bs8(0xdd), bs("11101"), fltreg]) +addop("fucompp", [bs8(0xda), bs8(0xe9)]) + +addop("fxam", [bs8(0xd9), bs8(0xe5)]) +addop("fxch", [bs8(0xd9), bs("11001"), fltreg]) +addop("fxrstor", [bs8(0x0f), bs8(0xae)] + + rmmod(d1, rm_arg_m80)) # XXX TODO m512 +addop("fxsave", [bs8(0x0f), bs8(0xae)] + + rmmod(d0, rm_arg_m80)) # XXX TODO m512 + +addop("fxtract", [bs8(0xd9), bs8(0xf4)]) +addop("fyl2x", [bs8(0xd9), bs8(0xf1)]) +addop("fyl2xp1", [bs8(0xd9), bs8(0xf9)]) + +addop("hlt", [bs8(0xf4)]) +addop("icebp", [bs8(0xf1)]) + +addop("idiv", [bs('1111011'), w8] + rmmod(d7, rm_arg_w8)) + +addop("imul", [bs('1111011'), w8] + rmmod(d5, rm_arg_w8)) +addop("imul", [bs8(0x0f), bs8(0xaf)] + rmmod(rmreg)) + +addop("imul", [bs("011010"), se, bs('1')] + rmmod(rmreg) + [d_imm]) + +addop("in", [bs("1110010"), w8, d_eax, u08]) +addop("in", [bs("1110110"), w8, d_eax, d_edx]) + +addop("inc", [bs('1111111'), w8] + rmmod(d0, rm_arg_w8)) +addop("inc", [bs('01000'), reg]) + +addop("insb", [bs8(0x6c)]) +# ins_name = {16:'INSW', 32:'INSD', 64:'INSD'} +# bs_ins_name = bs_modname_size(l=0, name=ins_name) +# addop("ins", [bs8(0x6d), bs_ins_name]) +addop("insw", [bs8(0x6d), bs_opmode16]) +addop("insd", [bs8(0x6d), bs_opmode32]) +addop("insd", [bs8(0x6d), bs_opmode64]) + +addop("int", [bs8(0xcc), u08_3]) +addop("int", [bs8(0xcd), u08]) +addop("into", [bs8(0xce)]) +addop("invd", [bs8(0x0f), bs8(0x08)]) +addop("invlpg", [bs8(0x0f), bs8(0x01)] + rmmod(d7)) + +# iret_name = {16:'IRET', 32:'IRETD', 64:'IRETQ'} +# bs_iret_name = bs_modname_size(l=0, name=iret_name) +# addop("iret", [bs8(0xcf), stk, bs_iret_name]) +addop("iret", [bs8(0xcf), stk, bs_opmode16]) +addop("iretd", [bs8(0xcf), stk, bs_opmode32]) +addop("iretq", [bs8(0xcf), stk, bs_opmode64]) + +addop("j", [bs('0111'), cond, rel_off08]) +# bs_jecxz_name = bs_modname_jecx(l=0) +# addop("jecxz", [bs8(0xe3), rel_off08, bs_jecxz_name]) + +addop("jcxz", [bs8(0xe3), rel_off08, bs_admode16]) +addop("jecxz", [bs8(0xe3), rel_off08, bs_admode32]) +addop("jrcxz", [bs8(0xe3), rel_off08, bs_admode64]) + +addop("j", [bs8(0x0f), bs('1000'), cond, rel_off]) +addop("jmp", [bs8(0xeb), rel_off08]) +addop("jmp", [bs8(0xe9), rel_off]) +# TODO XXX replace stk force64? +addop("jmp", [bs8(0xff), stk] + rmmod(d4)) +addop("jmp", [bs8(0xea), moff, msegoff]) + +addop("jmpf", [bs8(0xff), stk] + rmmod(d5)) + +addop("lahf", [bs8(0x9f)]) +addop("lar", [bs8(0x0f), bs8(0x02)] + rmmod(rmreg)) + +# XXX TODO LDS LES ... +addop("lea", [bs8(0x8d)] + rmmod(rmreg)) +addop("leave", [bs8(0xc9)]) + +addop("lodsb", [bs8(0xac)]) +# lods_name = {16:'LODSW', 32:'LODSD', 64:'LODSQ'} +# bs_lods_name = bs_modname_size(l=0, name=lods_name) +# addop("lods", [bs8(0xad), bs_lods_name]) +addop("lodsw", [bs8(0xad), bs_opmode16]) +addop("lodsd", [bs8(0xad), bs_opmode32]) +addop("lodsq", [bs8(0xad), bs_opmode64]) + +addop("loop", [bs8(0xe2), rel_off08]) +addop("loope", [bs8(0xe1), rel_off08]) +addop("loopne", [bs8(0xe0), rel_off08]) +addop("lsl", [bs8(0x0f), bs8(0x03)] + rmmod(rmreg)) +addop("monitor", [bs8(0x0f), bs8(0x01), bs8(0xc8)]) + +addop("mov", [bs("100010"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("mov", [bs("100011"), swapargs, bs('0')] + rmmod(segm), [rm_arg, segm]) +addop("mov", [bs("101000"), swapargs, w8, d_eax, movoff], [d_eax, movoff]) +addop("mov", [bs("1011"), w8, reg, d_imm64]) +addop("mov", [bs("1100011"), w8] + rmmod(d0, rm_arg_w8) + [d_imm]) +addop("mov", [bs8(0x0f), bs("001000"), swapargs, bs('0')] + + rmmod(crreg), [rm_arg, crreg]) +addop("mov", [bs8(0x0f), bs("001000"), swapargs, bs('1')] + + rmmod(drreg), [rm_arg, drreg]) +addop("movsb", [bs8(0xa4)]) +# movs_name = {16:'MOVSW', 32:'MOVSD', 64:'MOVSQ'} +# bs_movs_name = bs_modname_size(l=0, name=movs_name) +# addop("movs", [bs8(0xa5), bs_movs_name]) +addop("movsw", [bs8(0xa5), bs_opmode16]) +addop("movsd", [bs8(0xa5), bs_opmode32]) +addop("movsq", [bs8(0xa5), bs_opmode64]) + +addop("movsx", [bs8(0x0f), bs("1011111"), w8, sx] + rmmod(rmreg, rm_arg_sx)) +# addop("movsxd", [bs8(0x63), sxd] + rmmod(rmreg, rm_arg_sxd)) +type("movsxd", (mn_x86,), { + "fields": [bs8(0x63), sxd] + rmmod(rmreg, rm_arg_sxd), + "modes": [64], 'alias': False}) + +addop("movups", + [bs8(0x0f), bs8(0x10), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("movsd", [bs8(0x0f), bs("0001000"), swapargs, xmm, pref_f2] + + rmmod(rmreg, rm_arg), [xmm, rm_arg]) +addop("movss", [bs8(0x0f), bs8(0x10), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("movupd", [bs8(0x0f), bs8(0x10), xmm, pref_66] + rmmod(rmreg, rm_arg)) + + +addop("addss", [bs8(0x0f), bs8(0x58), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("addsd", [bs8(0x0f), bs8(0x58), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + +addop("subss", [bs8(0x0f), bs8(0x5c), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("subsd", [bs8(0x0f), bs8(0x5c), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + +addop("mulss", [bs8(0x0f), bs8(0x59), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("mulsd", [bs8(0x0f), bs8(0x59), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + +addop("divss", [bs8(0x0f), bs8(0x5e), xmm, pref_f3] + rmmod(rmreg, rm_arg)) +addop("divsd", [bs8(0x0f), bs8(0x5e), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + + +addop("pminsw", [bs8(0x0f), bs8(0xea), mm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("pminsw", [bs8(0x0f), bs8(0xea), xmm, pref_66] + rmmod(rmreg, rm_arg)) + + +addop("pxor", [bs8(0x0f), bs8(0xef), xmm] + rmmod(rmreg, rm_arg)) + +addop("ucomiss", + [bs8(0x0f), bs8(0x2e), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("ucomisd", [bs8(0x0f), bs8(0x2e), xmm, pref_66] + rmmod(rmreg, rm_arg)) + +addop("andps", [bs8(0x0f), bs8(0x54), xmm, no_xmm_pref] + rmmod(rmreg, rm_arg)) +addop("andpd", [bs8(0x0f), bs8(0x54), xmm, pref_66] + rmmod(rmreg, rm_arg)) + + +addop("maxsd", [bs8(0x0f), bs8(0x5f), xmm, pref_f2] + rmmod(rmreg, rm_arg)) + +addop("cvtsi2sd", + [bs8(0x0f), bs8(0x2a), xmmreg, pref_f2] + rmmod(rmreg, rm_arg)) +addop("cvtsi2ss", + [bs8(0x0f), bs8(0x2a), xmmreg, pref_f3] + rmmod(rmreg, rm_arg)) + + +addop("cvttsd2ss", + [bs8(0x0f), bs8(0x2c), xmmreg, pref_f2] + rmmod(rmreg, rm_arg)) +addop("cvttss2si", + [bs8(0x0f), bs8(0x2c), xmmreg, pref_f3] + rmmod(rmreg, rm_arg)) + + +# type("movupd", (mn_x86,), {"fields":[bs8(0x0f), bs8(0x10), xmm, pref_f2] +# + rmmod(rmreg, rm_arg_sxd), 'prefixed':'\xf2'}) + +addop("movzx", [bs8(0x0f), bs("1011011"), w8, sx] + rmmod(rmreg, rm_arg_sx)) +addop("mul", [bs('1111011'), w8] + rmmod(d4, rm_arg_w8)) + +addop("neg", [bs('1111011'), w8] + rmmod(d3, rm_arg_w8)) +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d0, rm_arg)) # XXX TODO m512 +addop("not", [bs('1111011'), w8] + rmmod(d2, rm_arg_w8)) +addop("or", [bs("0000110"), w8, d_eax, d_imm]) +addop("or", [bs("100000"), se, w8] + rmmod(d1, rm_arg_w8) + [d_imm]) +addop("or", [bs("000010"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("out", [bs("1110011"), w8, u08, d_eax]) +addop("out", [bs("1110111"), w8, d_edx, d_eax]) + +addop("outsb", [bs8(0x6e)]) +# outs_name = {16:'OUTSW', 32:'OUTSD', 64:'OUTSD'} +# bs_outs_name = bs_modname_size(l=0, name=outs_name) +# addop("outs", [bs8(0x6f), bs_outs_name]) +addop("outsw", [bs8(0x6f), bs_opmode16]) +addop("outsd", [bs8(0x6f), bs_opmode32]) +addop("outsd", [bs8(0x6f), bs_opmode64]) + + +# addop("pause", [bs8(0xf3), bs8(0x90)]) + +addop("pop", [bs8(0x8f), stk] + rmmod(d0)) +addop("pop", [bs("01011"), stk, reg]) +addop("pop", [bs8(0x1f), d_ds]) +addop("pop", [bs8(0x07), d_es]) +addop("pop", [bs8(0x17), d_ss]) +addop("pop", [bs8(0x0f), bs8(0xa1), d_fs]) +addop("pop", [bs8(0x0f), bs8(0xa9), d_gs]) + +# popa_name = {16:'POPA', 32:'POPAD'} +# bs_popa_name = bs_modname_size(l=0, name=popa_name) +# addop("popa", [bs8(0x61), bs_popa_name]) +addop("popa", [bs8(0x61), bs_opmode16]) +addop("popad", [bs8(0x61), bs_opmode32]) + +# popf_name = {16:'POPF', 32:'POPFD', 64:'POPFQ'} +# bs_popf_name = bs_modname_size(l=0, name=popf_name) +# addop("popf", [bs8(0x9d), bs_popf_name]) +addop("popf", [bs8(0x9d), bs_opmode16]) +addop("popfd", [bs8(0x9d), bs_opmode32]) +addop("popfq", [bs8(0x9d), bs_opmode64]) + +addop("prefetch0", [bs8(0x0f), bs8(0x18)] + rmmod(d1, rm_arg_m08)) +addop("prefetch1", [bs8(0x0f), bs8(0x18)] + rmmod(d2, rm_arg_m08)) +addop("prefetch2", [bs8(0x0f), bs8(0x18)] + rmmod(d3, rm_arg_m08)) +addop("prefetchnta", [bs8(0x0f), bs8(0x18)] + rmmod(d0, rm_arg_m08)) + +addop("push", [bs8(0xff), stk] + rmmod(d6)) +addop("push", [bs("01010"), stk, reg]) +addop("push", [bs8(0x6a), rel_off08, stk]) +addop("push", [bs8(0x68), d_imm, stk]) +addop("push", [bs8(0x0e), d_cs]) +addop("push", [bs8(0x16), d_ss]) +addop("push", [bs8(0x1e), d_ds]) +addop("push", [bs8(0x06), d_es]) +addop("push", [bs8(0x0f), bs8(0xa0), d_fs]) +addop("push", [bs8(0x0f), bs8(0xa8), d_gs]) + +# pusha_name = {16:'PUSHA', 32:'PUSHAD'} +# bs_pusha_name = bs_modname_size(l=0, name=pusha_name) +# addop("pusha", [bs8(0x60), bs_pusha_name]) +addop("pusha", [bs8(0x60), bs_opmode16_no64]) +addop("pushad", [bs8(0x60), bs_opmode32_no64]) + + +# pushf_name = {16:'PUSHF', 32:'PUSHFD', 64:'PUSHFQ'} +# bs_pushf_name = bs_modname_size(l=0, name=pushf_name) +# addop("pushf", [bs8(0x9c), bs_pushf_name]) +addop("pushf", [bs8(0x9c), bs_opmode16]) +addop("pushfd", [bs8(0x9c), bs_opmode32]) +addop("pushfq", [bs8(0x9c), bs_opmode64]) + +addop("rcl", [bs('110100'), d_cl1, w8] + + rmmod(d2, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("rcl", [bs('1100000'), w8] + rmmod(d2, rm_arg_w8) + [u08]) +addop("rcr", [bs('110100'), d_cl1, w8] + + rmmod(d3, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("rcr", [bs('1100000'), w8] + rmmod(d3, rm_arg_w8) + [u08]) +addop("rol", [bs('110100'), d_cl1, w8] + + rmmod(d0, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("rol", [bs('1100000'), w8] + rmmod(d0, rm_arg_w8) + [u08]) +addop("ror", [bs('110100'), d_cl1, w8] + + rmmod(d1, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("ror", [bs('1100000'), w8] + rmmod(d1, rm_arg_w8) + [u08]) + +addop("rdmsr", [bs8(0x0f), bs8(0x32)]) +addop("rdpmc", [bs8(0x0f), bs8(0x33)]) +addop("rdtsc", [bs8(0x0f), bs8(0x31)]) +addop("ret", [bs8(0xc3), stk]) +addop("ret", [bs8(0xc2), stk, u16]) +addop("retf", [bs8(0xcb), stk]) +addop("retf", [bs8(0xca), stk, u16]) + +addop("rsm", [bs8(0x0f), bs8(0xaa)]) +addop("sahf", [bs8(0x9e)]) + +# XXX tipo in doc: /4 instead of /6 +addop("sal", [bs('110100'), d_cl1, w8] + + rmmod(d6, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("sal", [bs('1100000'), w8] + rmmod(d6, rm_arg_w8) + [u08]) +addop("sar", [bs('110100'), d_cl1, w8] + + rmmod(d7, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("sar", [bs('1100000'), w8] + rmmod(d7, rm_arg_w8) + [u08]) + +addop("scasb", [bs8(0xae)]) +# scas_name = {16:'SCASW', 32:'SCASD', 64:'SCASQ'} +# bs_scas_name = bs_modname_size(l=0, name=scas_name) +# addop("scas", [bs8(0xaf), bs_scas_name]) +addop("scasw", [bs8(0xaf), bs_opmode16]) +addop("scasd", [bs8(0xaf), bs_opmode32]) +addop("scasq", [bs8(0xaf), bs_opmode64]) + +addop("shl", [bs('110100'), d_cl1, w8] + + rmmod(d4, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("shl", [bs('1100000'), w8] + rmmod(d4, rm_arg_w8) + [u08]) +addop("shr", [bs('110100'), d_cl1, w8] + + rmmod(d5, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("shr", [bs('1100000'), w8] + rmmod(d5, rm_arg_w8) + [u08]) + +addop("sbb", [bs("0001110"), w8, d_eax, d_imm]) +addop("sbb", [bs("100000"), se, w8] + rmmod(d3, rm_arg_w8) + [d_imm]) +addop("sbb", [bs("000110"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("set", [bs8(0x0f), bs('1001'), cond] + rmmod(regnoarg, rm_arg_m08)) +addop("sgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d0)) +addop("shld", [bs8(0x0f), bs8(0xa4)] + + rmmod(rmreg) + [u08], [rm_arg, rmreg, u08]) +addop("shld", [bs8(0x0f), bs8(0xa5)] + + rmmod(rmreg) + [d_cl], [rm_arg, rmreg, d_cl]) +addop("shrd", [bs8(0x0f), bs8(0xac)] + + rmmod(rmreg) + [u08], [rm_arg, rmreg, u08]) +addop("shrd", [bs8(0x0f), bs8(0xad)] + + rmmod(rmreg) + [d_cl], [rm_arg, rmreg, d_cl]) +addop("sidt", [bs8(0x0f), bs8(0x01)] + rmmod(d1)) +addop("sldt", [bs8(0x0f), bs8(0x00)] + rmmod(d0)) +addop("smsw", [bs8(0x0f), bs8(0x01)] + rmmod(d4)) +addop("stc", [bs8(0xf9)]) +addop("std", [bs8(0xfd)]) +addop("sti", [bs8(0xfb)]) +addop("stosb", [bs8(0xaa)]) +# stos_name = {16:'STOSW', 32:'STOSD', 64:'STOSQ'} +# bs_stos_name = bs_modname_size(l=0, name=stos_name) +# addop("stos", [bs8(0xab), bs_stos_name]) +addop("stosw", [bs8(0xab), bs_opmode16]) +addop("stosd", [bs8(0xab), bs_opmode32]) +addop("stosq", [bs8(0xab), bs_opmode64]) + +addop("str", [bs8(0x0f), bs8(0x00)] + rmmod(d1)) + +addop("sub", [bs("0010110"), w8, d_eax, d_imm]) +addop("sub", [bs("100000"), se, w8] + rmmod(d5, rm_arg_w8) + [d_imm]) +addop("sub", [bs("001010"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("syscall", [bs8(0x0f), bs8(0x05)]) +addop("sysenter", [bs8(0x0f), bs8(0x34)]) +addop("sysexit", [bs8(0x0f), bs8(0x35)]) +addop("sysret", [bs8(0x0f), bs8(0x07)]) +addop("test", [bs("1010100"), w8, d_eax, d_imm]) +addop("test", [bs("1111011"), w8] + rmmod(d0, rm_arg_w8) + [d_imm]) +addop("test", [bs("1000010"), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("ud2", [bs8(0x0f), bs8(0x0b)]) +addop("verr", [bs8(0x0f), bs8(0x00)] + rmmod(d4)) +addop("verw", [bs8(0x0f), bs8(0x00)] + rmmod(d5)) +addop("wbind", [bs8(0x0f), bs8(0x09)]) +addop("wrmsr", [bs8(0x0f), bs8(0x30)]) +addop("xadd", [bs8(0x0f), bs("1100000"), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("nop", [bs8(0x90)], alias=True) + +addop("xchg", [bs('10010'), d_eax, reg]) +addop("xchg", [bs('1000011'), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("xlat", [bs8(0xd7)]) + + +addop("xor", [bs("0011010"), w8, d_eax, d_imm]) +addop("xor", [bs("100000"), se, w8] + rmmod(d6, rm_arg_w8) + [d_imm]) +addop("xor", [bs("001100"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + + +# xorps_name = {16:'XORPD', 32:'XORPS', 64:'XORPS'} +# bs_xorps_name = bs_modname_size(l=0, name=xorps_name) +# addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [ +# bs_xorps_name] ) +addop("xorpd", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode16]) +addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode32]) +addop("xorps", [bs8(0x0f), bs8(0x57), xmm] + rmmod(rmreg) + [bs_opmode64]) + +# movaps_name = {16:'MOVAPD', 32:'MOVAPS', 64:'MOVAPS'} +# bs_movaps_name = bs_modname_size(l=0, name=movaps_name) +# addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm] + rmmod(rmreg, +# rm_arg) + [ bs_movaps_name], [rmreg, rm_arg]) +addop("movapd", [bs8(0x0f), bs("0010100"), swapargs, xmm] + + rmmod(rmreg, rm_arg) + [bs_opmode16], [rmreg, rm_arg]) +addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm] + + rmmod(rmreg, rm_arg) + [bs_opmode32], [rmreg, rm_arg]) +addop("movaps", [bs8(0x0f), bs("0010100"), swapargs, xmm] + + rmmod(rmreg, rm_arg) + [bs_opmode64], [rmreg, rm_arg]) + +mn_x86.bintree = factor_one_bit(mn_x86.bintree) +# mn_x86.bintree = factor_fields_all(mn_x86.bintree) +""" +mod reg r/m + XX XXX XXX + +""" + + +def print_size(e): + print e, e.size + return e diff --git a/miasm2/arch/x86/disasm.py b/miasm2/arch/x86/disasm.py new file mode 100644 index 00000000..7185a973 --- /dev/null +++ b/miasm2/arch/x86/disasm.py @@ -0,0 +1,51 @@ +from miasm2.core.asmbloc import asm_constraint, asm_label, disasmEngine +from miasm2.expression.expression import ExprId +from arch import mn_x86 + + +def cb_x86_callpop(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + """ + 1000: call 1005 + 1005: pop + """ + if len(cur_bloc.lines) < 1: + return + l = cur_bloc.lines[-1] + if l.name != 'CALL': + return + dst = l.args[0] + if not (isinstance(dst, ExprId) and isinstance(dst.name, asm_label)): + return + if dst.name.offset != l.offset + l.l: + return + l.name = 'PUSH' + # cur_bloc.bto.pop() + cur_bloc.bto[0].c_bto = asm_constraint.c_next + + +cb_x86_funcs = [cb_x86_callpop] + + +def cb_x86_disasm(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + for func in cb_x86_funcs: + func(mn, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) + + +class dis_x86(disasmEngine): + attrib = None + + def __init__(self, bs=None, **kwargs): + super(dis_x86, self).__init__(mn_x86, self.attrib, bs, **kwargs) + self.dis_bloc_callback = cb_x86_disasm + + +class dis_x86_16(dis_x86): + attrib = 16 + + +class dis_x86_32(dis_x86): + attrib = 32 + + +class dis_x86_64(dis_x86): + attrib = 64 diff --git a/miasm2/arch/x86/ira.py b/miasm2/arch/x86/ira.py new file mode 100644 index 00000000..04cb4cca --- /dev/null +++ b/miasm2/arch/x86/ira.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +from miasm2.expression.expression import * +from miasm2.core.graph import DiGraph +from miasm2.ir.ir import ir, irbloc +from miasm2.ir.analysis import ira +from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 + + +class ir_a_x86_16(ir_x86_16, ira): + + def __init__(self, symbol_pool=None): + ir_x86_16.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.AX + + # for test XXX TODO + def set_dead_regs(self, b): + b.rw[-1][1].add(self.arch.regs.zf) + b.rw[-1][1].add(self.arch.regs.of) + b.rw[-1][1].add(self.arch.regs.pf) + b.rw[-1][1].add(self.arch.regs.cf) + b.rw[-1][1].add(self.arch.regs.nf) + b.rw[-1][1].add(self.arch.regs.af) + + def get_out_regs(self, b): + return set([self.ret_reg, self.sp]) + + def add_unused_regs(self): + leaves = [self.blocs[n] for n in self.g.leafs()] + for b in leaves: + self.set_dead_regs(b) + + def call_effects(self, ad): + irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp)), + ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ]] + return irs + + def post_add_bloc(self, bloc, ir_blocs): + ir.post_add_bloc(self, bloc, ir_blocs) + if not bloc.lines: + return + l = bloc.lines[-1] + sub_call_dst = None + if not l.is_subcall(): + return + sub_call_dst = l.args[0] + if self.ExprIsLabel(sub_call_dst): + sub_call_dst = sub_call_dst.name + for b in ir_blocs: + l = b.lines[-1] + sub_call_dst = None + if not l.is_subcall(): + continue + sub_call_dst = l.args[0] + if self.ExprIsLabel(sub_call_dst): + sub_call_dst = sub_call_dst.name + lbl = bloc.get_next() + new_lbl = self.gen_label() + irs = self.call_effects(l.args[0]) + nbloc = irbloc(new_lbl, ExprId(lbl, size=self.pc.size), irs) + nbloc.lines = [l] + self.blocs[new_lbl] = nbloc + b.dst = ExprId(new_lbl, size=self.pc.size) + return + + +class ir_a_x86_32(ir_x86_32, ir_a_x86_16): + + def __init__(self, symbol_pool=None): + ir_x86_32.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.EAX + + +class ir_a_x86_64(ir_x86_64, ir_a_x86_16): + + def __init__(self, symbol_pool=None): + ir_x86_64.__init__(self, symbol_pool) + self.ret_reg = self.arch.regs.RAX + + def call_effects(self, ad): + irs = [[ExprAff(self.ret_reg, ExprOp('call_func_ret', ad, self.sp, + self.arch.regs.RCX, + self.arch.regs.RDX, + self.arch.regs.R8, + self.arch.regs.R9, + )), + ExprAff(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ]] + return irs diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py new file mode 100644 index 00000000..532b4f0c --- /dev/null +++ b/miasm2/arch/x86/regs.py @@ -0,0 +1,405 @@ +from miasm2.expression.expression import * +from miasm2.core.cpu import reg_info + + +IP = ExprId('IP', 16) +EIP = ExprId('EIP', 32) +RIP = ExprId('RIP', 64) +exception_flags = ExprId('exception_flags', 32) + +# GP + + +regs08_str = ["AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH"] + \ + ["R%dB" % (i + 8) for i in xrange(8)] +regs08_expr = [ExprId(x, 8) for x in regs08_str] + +regs08_64_str = ["AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL"] + \ + ["R%dB" % (i + 8) for i in xrange(8)] +regs08_64_expr = [ExprId(x, 8) for x in regs08_64_str] + + +regs16_str = ["AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI"] + \ + ["R%dW" % (i + 8) for i in xrange(8)] +regs16_expr = [ExprId(x, 16) for x in regs16_str] + +regs32_str = ["EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"] + \ + ["R%dD" % (i + 8) for i in xrange(8)] +regs32_expr = [ExprId(x, 32) for x in regs32_str] + +regs64_str = ["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", + "RIP"] +regs64_expr = [ExprId(x, 64) for x in regs64_str] + + +regs_xmm_str = ["XMM%d" % i for i in xrange(16)] +regs_xmm_expr = [ExprId(x, 128) for x in regs_xmm_str] + +regs_mm_str = ["MM%d" % i for i in xrange(16)] +regs_mm_expr = [ExprId(x, 64) for x in regs_mm_str] + + +gpregs08 = reg_info(regs08_str, regs08_expr) +gpregs08_64 = reg_info(regs08_64_str, regs08_64_expr) +gpregs16 = reg_info(regs16_str, regs16_expr) +gpregs32 = reg_info(regs32_str, regs32_expr) +gpregs64 = reg_info(regs64_str, regs64_expr) + +gpregs_xmm = reg_info(regs_xmm_str, regs_xmm_expr) +gpregs_mm = reg_info(regs_mm_str, regs_mm_expr) + +r08_eax = reg_info([regs08_str[0]], [regs08_expr[0]]) +r16_eax = reg_info([regs16_str[0]], [regs16_expr[0]]) +r32_eax = reg_info([regs32_str[0]], [regs32_expr[0]]) +r64_eax = reg_info([regs64_str[0]], [regs64_expr[0]]) + +r08_ecx = reg_info([regs08_str[1]], [regs08_expr[1]]) + +r_eax_all = reg_info( + [regs08_str[0], regs16_str[0], regs32_str[0], regs64_str[0]], + [regs08_expr[0], regs16_expr[0], regs32_expr[0], regs64_expr[0]]) +r_edx_all = reg_info( + [regs08_str[2], regs16_str[2], regs32_str[2], regs64_str[2]], + [regs08_expr[2], regs16_expr[2], regs32_expr[2], regs64_expr[2]]) + +r16_edx = reg_info([regs16_str[2]], [regs16_expr[2]]) + + +selectr_str = ["ES", "CS", "SS", "DS", "FS", "GS"] +selectr_expr = [ExprId(x, 16) for x in selectr_str] +segmreg = reg_info(selectr_str, selectr_expr) + +crregs32_str = ["CR%d" % i for i in xrange(8)] +crregs32_expr = [ExprId(x, 32) for x in crregs32_str] +crregs = reg_info(crregs32_str, crregs32_expr) + + +drregs32_str = ["DR%d" % i for i in xrange(8)] +drregs32_expr = [ExprId(x, 32) for x in drregs32_str] +drregs = reg_info(drregs32_str, drregs32_expr) + + +fltregs32_str = ["ST(%d)" % i for i in xrange(8)] +fltregs32_expr = [ExprId(x, 64) for x in fltregs32_str] +fltregs = reg_info(fltregs32_str, fltregs32_expr) + +r_st_all = reg_info(['ST'], + [ExprId('ST', 64)]) + +r_cs_all = reg_info(['CS'], + [ExprId('CS', 16)]) +r_ds_all = reg_info(['DS'], + [ExprId('DS', 16)]) +r_es_all = reg_info(['ES'], + [ExprId('ES', 16)]) +r_ss_all = reg_info(['SS'], + [ExprId('SS', 16)]) +r_fs_all = reg_info(['FS'], + [ExprId('FS', 16)]) +r_gs_all = reg_info(['GS'], + [ExprId('GS', 16)]) + + +AL = regs08_expr[0] +CL = regs08_expr[1] +DL = regs08_expr[2] +BL = regs08_expr[3] +AH = regs08_expr[4] +CH = regs08_expr[5] +DH = regs08_expr[6] +BH = regs08_expr[7] +R8B = regs08_expr[8] +R9B = regs08_expr[9] +R10B = regs08_expr[10] +R11B = regs08_expr[11] +R12B = regs08_expr[12] +R13B = regs08_expr[13] +R14B = regs08_expr[14] +R15B = regs08_expr[15] +R15B = regs08_expr[15] + +SPL = regs08_64_expr[4] +BPL = regs08_64_expr[5] +SIL = regs08_64_expr[6] +DIL = regs08_64_expr[7] + + +AX = regs16_expr[0] +CX = regs16_expr[1] +DX = regs16_expr[2] +BX = regs16_expr[3] +SP = regs16_expr[4] +BP = regs16_expr[5] +SI = regs16_expr[6] +DI = regs16_expr[7] +R8W = regs16_expr[8] +R9W = regs16_expr[9] +R10W = regs16_expr[10] +R11W = regs16_expr[11] +R12W = regs16_expr[12] +R13W = regs16_expr[13] +R14W = regs16_expr[14] +R15W = regs16_expr[15] + + +EAX = regs32_expr[0] +ECX = regs32_expr[1] +EDX = regs32_expr[2] +EBX = regs32_expr[3] +ESP = regs32_expr[4] +EBP = regs32_expr[5] +ESI = regs32_expr[6] +EDI = regs32_expr[7] +R8D = regs32_expr[8] +R9D = regs32_expr[9] +R10D = regs32_expr[10] +R11D = regs32_expr[11] +R12D = regs32_expr[12] +R13D = regs32_expr[13] +R14D = regs32_expr[14] +R15D = regs32_expr[15] + + +RAX = regs64_expr[0] +RCX = regs64_expr[1] +RDX = regs64_expr[2] +RBX = regs64_expr[3] +RSP = regs64_expr[4] +RBP = regs64_expr[5] +RSI = regs64_expr[6] +RDI = regs64_expr[7] +R8 = regs64_expr[8] +R9 = regs64_expr[9] +R10 = regs64_expr[10] +R11 = regs64_expr[11] +R12 = regs64_expr[12] +R13 = regs64_expr[13] +R14 = regs64_expr[14] +R15 = regs64_expr[15] + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_pf = 'pf' +reg_of = 'of' +reg_cf = 'cf' +reg_tf = 'tf' +reg_if = 'i_f' +reg_df = 'df' +reg_af = 'af' +reg_iopl = 'iopl_f' +reg_nt = 'nt' +reg_rf = 'rf' +reg_vm = 'vm' +reg_ac = 'ac' +reg_vif = 'vif' +reg_vip = 'vip' +reg_id = 'i_d' + + +reg_es = "ES" +reg_cs = "CS" +reg_ss = "SS" +reg_ds = "DS" +reg_fs = "FS" +reg_gs = "GS" + +reg_dr0 = 'DR0' +reg_dr1 = 'DR1' +reg_dr2 = 'DR2' +reg_dr3 = 'DR3' +reg_dr4 = 'DR4' +reg_dr5 = 'DR5' +reg_dr6 = 'DR6' +reg_dr7 = 'DR7' + +reg_cr0 = 'CR0' +reg_cr1 = 'CR1' +reg_cr2 = 'CR2' +reg_cr3 = 'CR3' +reg_cr4 = 'CR4' +reg_cr5 = 'CR5' +reg_cr6 = 'CR6' +reg_cr7 = 'CR7' + +reg_mm0 = 'MM0' +reg_mm1 = 'MM1' +reg_mm2 = 'MM2' +reg_mm3 = 'MM3' +reg_mm4 = 'MM4' +reg_mm5 = 'MM5' +reg_mm6 = 'MM6' +reg_mm7 = 'MM7' + + +reg_tsc1 = "tsc1" +reg_tsc2 = "tsc2" + +reg_float_c0 = 'float_c0' +reg_float_c1 = 'float_c1' +reg_float_c2 = 'float_c2' +reg_float_c3 = 'float_c3' +reg_float_stack_ptr = "float_stack_ptr" +reg_float_control = 'reg_float_control' +reg_float_eip = 'reg_float_eip' +reg_float_cs = 'reg_float_cs' +reg_float_address = 'reg_float_address' +reg_float_ds = 'reg_float_ds' + + +reg_float_st0 = 'float_st0' +reg_float_st1 = 'float_st1' +reg_float_st2 = 'float_st2' +reg_float_st3 = 'float_st3' +reg_float_st4 = 'float_st4' +reg_float_st5 = 'float_st5' +reg_float_st6 = 'float_st6' +reg_float_st7 = 'float_st7' + + +dr0 = ExprId(reg_dr0) +dr1 = ExprId(reg_dr1) +dr2 = ExprId(reg_dr2) +dr3 = ExprId(reg_dr3) +dr4 = ExprId(reg_dr4) +dr5 = ExprId(reg_dr5) +dr6 = ExprId(reg_dr6) +dr7 = ExprId(reg_dr7) + +cr0 = ExprId(reg_cr0) +cr1 = ExprId(reg_cr1) +cr2 = ExprId(reg_cr2) +cr3 = ExprId(reg_cr3) +cr4 = ExprId(reg_cr4) +cr5 = ExprId(reg_cr5) +cr6 = ExprId(reg_cr6) +cr7 = ExprId(reg_cr7) + +mm0 = ExprId(reg_mm0, 64) +mm1 = ExprId(reg_mm1, 64) +mm2 = ExprId(reg_mm2, 64) +mm3 = ExprId(reg_mm3, 64) +mm4 = ExprId(reg_mm4, 64) +mm5 = ExprId(reg_mm5, 64) +mm6 = ExprId(reg_mm6, 64) +mm7 = ExprId(reg_mm7, 64) + + +# tmp1= ExprId(reg_tmp1) +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +pf = ExprId(reg_pf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) +tf = ExprId(reg_tf, size=1) +i_f = ExprId(reg_if, size=1) +df = ExprId(reg_df, size=1) +af = ExprId(reg_af, size=1) +iopl = ExprId(reg_iopl, size=2) +nt = ExprId(reg_nt, size=1) +rf = ExprId(reg_rf, size=1) +vm = ExprId(reg_vm, size=1) +ac = ExprId(reg_ac, size=1) +vif = ExprId(reg_vif, size=1) +vip = ExprId(reg_vip, size=1) +i_d = ExprId(reg_id, size=1) + +ES = ExprId(reg_es, size=16) +CS = ExprId(reg_cs, size=16) +SS = ExprId(reg_ss, size=16) +DS = ExprId(reg_ds, size=16) +FS = ExprId(reg_fs, size=16) +GS = ExprId(reg_gs, size=16) + +tsc1 = ExprId(reg_tsc1, size=32) +tsc2 = ExprId(reg_tsc2, size=32) + +float_c0 = ExprId(reg_float_c0, size=1) +float_c1 = ExprId(reg_float_c1, size=1) +float_c2 = ExprId(reg_float_c2, size=1) +float_c3 = ExprId(reg_float_c3, size=1) +float_stack_ptr = ExprId(reg_float_stack_ptr, size=3) +float_control = ExprId(reg_float_control, 16) +float_eip = ExprId(reg_float_eip) +float_cs = ExprId(reg_float_cs, size=16) +float_address = ExprId(reg_float_address) +float_ds = ExprId(reg_float_ds, size=16) + +float_st0 = ExprId(reg_float_st0, 64) +float_st1 = ExprId(reg_float_st1, 64) +float_st2 = ExprId(reg_float_st2, 64) +float_st3 = ExprId(reg_float_st3, 64) +float_st4 = ExprId(reg_float_st4, 64) +float_st5 = ExprId(reg_float_st5, 64) +float_st6 = ExprId(reg_float_st6, 64) +float_st7 = ExprId(reg_float_st7, 64) + +EAX_init = ExprId('EAX_init') +EBX_init = ExprId('EBX_init') +ECX_init = ExprId('ECX_init') +EDX_init = ExprId('EDX_init') +ESI_init = ExprId('ESI_init') +EDI_init = ExprId('EDI_init') +ESP_init = ExprId('ESP_init') +EBP_init = ExprId('EBP_init') + + +RAX_init = ExprId('RAX_init', 64) +RBX_init = ExprId('RBX_init', 64) +RCX_init = ExprId('RCX_init', 64) +RDX_init = ExprId('RDX_init', 64) +RSI_init = ExprId('RSI_init', 64) +RDI_init = ExprId('RDI_init', 64) +RSP_init = ExprId('RSP_init', 64) +RBP_init = ExprId('RBP_init', 64) + + +all_regs_ids = [ + AL, CL, DL, BL, AH, CH, DH, BH, + R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B, R15B, + SPL, BPL, SIL, DIL, + AX, CX, DX, BX, SP, BP, SI, DI, + R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W, + IP, + EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, + R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D, + EIP, + + RAX, RBX, RCX, RDX, RSP, RBP, RIP, RSI, RDI, + R8, R9, R10, R11, R12, R13, R14, R15, R15, + zf, nf, pf, of, cf, af, df, + tf, i_f, iopl, nt, rf, vm, ac, vif, vip, i_d, + float_control, float_eip, float_cs, float_address, float_ds, + tsc1, tsc2, + ES, CS, SS, DS, FS, GS, + float_st0, float_st1, float_st2, float_st3, + float_st4, float_st5, float_st6, float_st7, + float_c0, float_c1, float_c2, float_c3, + cr0, cr3, + dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7, + float_stack_ptr, + mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, + + exception_flags, +] + fltregs32_expr + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + all_regs_ids_init[i].is_term = True + regs_init[r] = all_regs_ids_init[i] + + +mRAX = {16: AX, 32: EAX, 64: RAX} +mRBX = {16: BX, 32: EBX, 64: RBX} +mRCX = {16: CX, 32: ECX, 64: RCX} +mRDX = {16: DX, 32: EDX, 64: RDX} +mRSI = {16: SI, 32: ESI, 64: RSI} +mRDI = {16: DI, 32: EDI, 64: RDI} +mRBP = {16: BP, 32: EBP, 64: RBP} +mRSP = {16: SP, 32: ESP, 64: RSP} +mRIP = {16: IP, 32: EIP, 64: RIP} diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py new file mode 100644 index 00000000..4b8a357b --- /dev/null +++ b/miasm2/arch/x86/sem.py @@ -0,0 +1,3029 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +from miasm2.expression.expression import * +from miasm2.expression.simplifications import expr_simp +from miasm2.arch.x86.regs import * +from miasm2.arch.x86.arch import mn_x86, repeat_mn, replace_regs +from miasm2.core.asmbloc import expr_is_int_or_label +from miasm2.ir.ir import ir, irbloc +import math +import struct +from regs import * + +# interrupt with eip update after instr +EXCEPT_SOFT_BP = (1 << 1) +EXCEPT_INT_XX = (1 << 2) + +EXCEPT_BREAKPOINT_INTERN = (1 << 10) + +EXCEPT_NUM_UPDT_EIP = (1 << 11) +# interrupt with eip at instr +EXCEPT_UNK_MEM_AD = (1 << 12) +EXCEPT_THROW_SEH = (1 << 13) +EXCEPT_UNK_EIP = (1 << 14) +EXCEPT_ACCESS_VIOL = (1 << 14) +EXCEPT_INT_DIV_BY_ZERO = (1 << 16) +EXCEPT_PRIV_INSN = (1 << 17) +EXCEPT_ILLEGAL_INSN = (1 << 18) +EXCEPT_UNK_MNEMO = (1 << 19) + + +""" +http://www.emulators.com/docs/nx11_flags.htm + +CF(A+B) = (((A XOR B) XOR D) < 0) XOR (((A XOR D) AND NOT (A XOR B)) < 0) +CF(A-B) = (((A XOR B) XOR D) < 0) XOR (((A XOR D) AND (A XOR B)) < 0) + +OF(A+B) = ((A XOR D) AND NOT (A XOR B)) < 0 +OF(A-B) = ((A XOR D) AND (A XOR B)) < 0 +""" + + +float_list = [ + float_st0, + float_st1, + float_st2, + float_st3, + float_st4, + float_st5, + float_st6, + float_st7, +] + + +# XXX TODO make default check against 0 or not 0 (same eq as in C) + + +def update_flag_zf(a): + return [ExprAff(zf, ExprCond(a, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + +def update_flag_nf(a): + return [ExprAff(nf, a.msb())] + + +def update_flag_pf(a): + return [ExprAff(pf, ExprOp('parity', a & ExprInt_from(a, 0xFF)))] + + +def update_flag_af(a): + return [ExprAff(af, ExprCond((a & ExprInt_from(a, 0x10)), + ExprInt_from(af, 1), ExprInt_from(af, 0)))] + + +def update_flag_znp(a): + e = [] + e += update_flag_zf(a) + e += update_flag_nf(a) + e += update_flag_pf(a) + return e + + +def update_flag_logic(a): + e = [] + e += update_flag_znp(a) + e.append(ExprAff(of, ExprInt_from(of, 0))) + e.append(ExprAff(cf, ExprInt_from(cf, 0))) + return e + + +def update_flag_arith(a): + e = [] + e += update_flag_znp(a) + return e + + +def check_ops_msb(a, b, c): + if not a or not b or not c or a != b or a != c: + raise ValueError('bad ops size %s %s %s' % (a, b, c)) + + +def arith_flag(a, b, c): + a_s, b_s, c_s = a.size, b.size, c.size + check_ops_msb(a_s, b_s, c_s) + a_s, b_s, c_s = a.msb(), b.msb(), c.msb() + return a_s, b_s, c_s + +# checked: ok for adc add because b & c before +cf + + +def update_flag_add_cf(a, b, c): + return ExprAff(cf, (((a ^ b) ^ c) ^ ((a ^ c) & (~(a ^ b)))).msb()) + + +def update_flag_add_of(a, b, c): + return ExprAff(of, (((a ^ c) & (~(a ^ b)))).msb()) + + +# checked: ok for sbb add because b & c before +cf +def update_flag_sub_cf(a, b, c): + return ExprAff(cf, (((a ^ b) ^ c) ^ ((a ^ c) & (a ^ b))).msb()) + + +def update_flag_sub_of(a, b, c): + return ExprAff(of, (((a ^ c) & (a ^ b))).msb()) + +# z = x+y (+cf?) + + +def update_flag_add(x, y, z): + e = [] + e.append(update_flag_add_cf(x, y, z)) + e.append(update_flag_add_of(x, y, z)) + return e + +# z = x-y (+cf?) + + +def update_flag_sub(x, y, z): + e = [] + e.append(update_flag_sub_cf(x, y, z)) + e.append(update_flag_sub_of(x, y, z)) + return e + + +def set_float_cs_eip(instr): + e = [] + # XXX TODO check float updt + e.append(ExprAff(float_eip, ExprInt_from(float_eip, instr.offset))) + e.append(ExprAff(float_cs, CS)) + return e + + +def mov(ir, instr, a, b): + if a in [ES, CS, SS, DS, FS, GS]: + b = b[:a.size] + if b in [ES, CS, SS, DS, FS, GS]: + b = b.zeroExtend(a.size) + e = [ExprAff(a, b)] + return None, e, [] + + +def xchg(ir, instr, a, b): + e = [] + e.append(ExprAff(a, b)) + e.append(ExprAff(b, a)) + return None, e, [] + + +def movzx(ir, instr, a, b): + e = [ExprAff(a, b.zeroExtend(a.size))] + return None, e, [] + + +def movsx(ir, instr, a, b): + e = [ExprAff(a, b.signExtend(a.size))] + return None, e, [] + + +def lea(ir, instr, a, b): + src = b.arg + if src.size > a.size: + src = src[:a.size] + e = [ExprAff(a, src)] + return None, e, [] + + +def add(ir, instr, a, b): + e = [] + c = a + b + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_add(a, b, c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def xadd(ir, instr, a, b): + e = [] + c = a + b + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_add(b, a, c) + e.append(ExprAff(b, a)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def adc(ir, instr, a, b): + e = [] + c = a + (b + ExprCompose([(ExprInt_fromsize(a.size - 1, 0), 1, a.size), + (cf, 0, 1)])) + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_add(a, b, c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def sub(ir, instr, a, b): + e = [] + c = a - b + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_sub(a, b, c) + e.append(ExprAff(a, c)) + return None, e, [] + +# a-(b+cf) + + +def sbb(ir, instr, a, b): + e = [] + c = a - (b + ExprCompose([(ExprInt_fromsize(a.size - 1, 0), 1, a.size), + (cf, 0, 1)])) + e += update_flag_arith(c) + e += update_flag_af(c) + e += update_flag_sub(a, b, c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def neg(ir, instr, b): + e = [] + a = ExprInt_from(b, 0) + + c = a - b + e += update_flag_arith(c) + e += update_flag_sub(a, b, c) + e += update_flag_af(c) + e.append(ExprAff(b, c)) + return None, e, [] + + +def l_not(ir, instr, b): + e = [] + c = ~b + e.append(ExprAff(b, c)) + return None, e, [] + + +def l_cmp(ir, instr, a, b): + e = [] + c = a - b + e += update_flag_arith(c) + e += update_flag_sub(a, b, c) + e += update_flag_af(c) + return None, e, [] + + +def xor(ir, instr, a, b): + e = [] + c = a ^ b + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def l_or(ir, instr, a, b): + e = [] + c = a | b + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def l_and(ir, instr, a, b): + e = [] + c = a & b + e += update_flag_logic(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def l_test(ir, instr, a, b): + e = [] + c = a & b + e += update_flag_logic(c) + return None, e, [] + + +def l_rol(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + c = ExprOp('<<<', a, b) + + new_cf = c[:1] + e.append(ExprAff(cf, new_cf)) + # hack (only valid if b=1) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def l_ror(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + c = ExprOp('>>>', a, b) + + e.append(ExprAff(cf, c.msb())) + # hack (only valid if b=1): when count == 1: a = msb-1(dest) + e.append(ExprAff(of, (c ^ a).msb())) + e.append(ExprAff(a, c)) + return None, e, [] + + +def rcl(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + c = ExprOp('<<<c_rez', a, b, cf.zeroExtend(a.size)) + new_cf = ExprOp('<<<c_cf', a, b, cf.zeroExtend(a.size))[:1] + + e.append(ExprAff(cf, new_cf)) + # hack (only valid if b=1) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def rcr(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + c = ExprOp('>>>c_rez', a, b, cf.zeroExtend(a.size)) + new_cf = ExprOp('>>>c_cf', a, b, cf.zeroExtend(a.size))[:1] + + e.append(ExprAff(cf, new_cf)) + # hack (only valid if b=1) + e.append(ExprAff(of, (a ^ c).msb())) + e.append(ExprAff(a, c)) + + return None, e, [] + + +def get_shift(a, b): + # b.size must match a + b = b.zeroExtend(a.size) + if a.size == 64: + shift = b & ExprInt_from(b, 0x3f) + else: + shift = b & ExprInt_from(b, 0x1f) + shift = expr_simp(shift) + return shift + + +def sar(ir, instr, a, b): + + shifter = get_shift(a, b) + c = ExprOp('a>>', a, shifter) + + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + + new_cf = ExprOp('a>>', a, (shifter - ExprInt_from(a, 1)))[:1] + + e_do = [ + ExprAff(cf, new_cf), + ExprAff(of, ExprInt_from(of, 0)), + ExprAff(a, c) + ] + + e_do += update_flag_znp(c) + + # dont generate conditional shifter on constant + if isinstance(shifter, ExprInt): + if int(shifter.arg) != 0: + return None, e_do, [] + else: + raise NotImplementedError("TODO check me") + + return ExprCond(shifter, lbl_do, lbl_skip), + [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def shr(ir, instr, a, b): + e = [] + # TODO FIX AS SAR! + shifter = get_shift(a, b) + c = ExprOp('>>', a, shifter) + + new_cf = ExprOp('>>', a, (shifter - ExprInt_from(a, 1)))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e.append(ExprAff(of, a.msb())) + e += update_flag_znp(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def shrd_cl(ir, instr, a, b): + e = [] + opmode, admode = s, instr.v_admode() + shifter = mRCX[instr.mode][:8].zeroExtend(a.size) & ExprInt_from(a, 0x1f) + c = (a >> shifter) | (b << (ExprInt_from(a, a.size) - shifter)) + new_cf = (a >> (shifter - ExprInt_from(a, 1)))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e.append(ExprAff(of, a.msb())) + e += update_flag_znp(c) + e.append(ExprAff(a, c)) + return None, e, [] + + +def shrd(ir, instr, a, b, c): + e = [] + shifter = get_shift(a, c) + + d = (a >> shifter) | (b << (ExprInt_from(a, a.size) - shifter)) + new_cf = (a >> (shifter - ExprInt_from(a, 1)))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e.append(ExprAff(of, a.msb())) + e += update_flag_znp(d) + e.append(ExprAff(a, d)) + return None, e, [] + + +def sal(ir, instr, a, b): + e = [] + shifter = get_shift(a, b) + c = ExprOp('a<<', a, shifter) + new_cf = (a >> (ExprInt_from(a, a.size) - shifter))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e += update_flag_znp(c) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def shl(ir, instr, a, b): + e = [] + shifter = get_shift(a, b) + c = a << shifter + new_cf = (a >> (ExprInt_from(a, a.size) - shifter))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + e += update_flag_znp(c) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def shld_cl(ir, instr, a, b): + return shld(ir, instr, a, b, ecx) + + +def shld(ir, instr, a, b, c): + e = [] + shifter = c.zeroExtend(a.size) & ExprInt_from(a, 0x1f) + c = ExprOp('|', + a << shifter, + b >> (ExprInt_from(a, a.size) - shifter) + ) + + new_cf = (a >> (ExprInt_from(a, a.size) - shifter))[:1] + e.append(ExprAff(cf, ExprCond(shifter, + new_cf, + cf) + ) + ) + # XXX todo: don't update flag if shifter is 0 + e += update_flag_znp(c) + e.append(ExprAff(of, c.msb() ^ new_cf)) + e.append(ExprAff(a, ExprCond(shifter, + c, + a))) + return None, e, [] + + +# XXX todo ### +def cmc(ir, instr): + e = [ExprAff(cf, ExprCond(cf, ExprInt_from(cf, 0), ExprInt_from(cf, 1)))] + return None, e, [] + + +def clc(ir, instr): + e = [ExprAff(cf, ExprInt_from(cf, 0))] + return None, e, [] + + +def stc(ir, instr): + e = [ExprAff(cf, ExprInt_from(cf, 1))] + return None, e, [] + + +def cld(ir, instr): + e = [ExprAff(df, ExprInt_from(df, 0))] + return None, e, [] + + +def std(ir, instr): + e = [ExprAff(df, ExprInt_from(df, 1))] + return None, e, [] + + +def cli(ir, instr): + e = [ExprAff(i_f, ExprInt_from(i_f, 0))] + return None, e, [] + + +def sti(ir, instr): + e = [ExprAff(exception_flags, ExprInt32(EXCEPT_PRIV_INSN))] + e = [] # XXX TODO HACK + return None, e, [] + + +def inc(ir, instr, a): + e = [] + b = ExprInt_from(a, 1) + c = a + b + e += update_flag_arith(c) + e += update_flag_af(c) + + e.append(update_flag_add_of(a, b, c)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def dec(ir, instr, a): + e = [] + b = ExprInt_from(a, -1) + c = a + b + e += update_flag_arith(c) + e += update_flag_af(c) + + e.append(update_flag_add_of(a, b, c)) + e.append(ExprAff(a, c)) + return None, e, [] + + +def push(ir, instr, a): + e = [] + s = instr.mode + size = instr.v_opmode() + opmode, admode = s, instr.v_admode() + # special case segment regs + if a in [ES, CS, SS, DS, FS, GS]: + pass + if not s in [16, 32, 64]: + raise ValueError('bad size stacker!') + if isinstance(a, ExprInt): + a = ExprInt_fromsize(s, a.arg) + + c = mRSP[instr.mode][:s] - ExprInt_fromsize(s, s / 8) + e.append(ExprAff(mRSP[instr.mode][:s], c)) + # we sub vopmode to stack, but mem access is arg size wide + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(ExprMem(c, a.size), a)) + return None, e, [] + + +def pop(ir, instr, a): + e = [] + s = instr.mode + size = instr.v_opmode() + opmode, admode = s, instr.v_admode() + # special case segment regs + if a in [ES, CS, SS, DS, FS, GS]: + s = admode + if not s in [16, 32, 64]: + raise ValueError('bad size stacker!') + new_esp = mRSP[instr.mode][:s] + ExprInt_fromsize(s, s / 8) + e.append(ExprAff(mRSP[instr.mode][:s], new_esp)) + # XXX FIX XXX for pop [esp] + if isinstance(a, ExprMem): + a = a.replace_expr({mRSP[instr.mode]: new_esp}) + c = mRSP[instr.mode][:s] + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(a, ExprMem(c, a.size))) + return None, e, [] + + +def sete(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(zf, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setnz(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(zf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setl(ir, instr, a): + e = [] + e.append( + ExprAff(a, ExprCond(nf - of, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setg(ir, instr, a): + e = [] + a0 = ExprInt_from(a, 0) + a1 = ExprInt_from(a, 1) + e.append(ExprAff(a, ExprCond(zf, a0, a1) & ExprCond(nf - of, a0, a1))) + return None, e, [] + + +def setge(ir, instr, a): + e = [] + e.append( + ExprAff(a, ExprCond(nf - of, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def seta(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf | zf, + ExprInt_from(a, 0), + ExprInt_from(a, 1)))) + + return None, e, [] + + +def setae(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setb(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setbe(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf | zf, + ExprInt_from(a, 1), + ExprInt_from(a, 0))) + ) + return None, e, [] + + +def setns(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(nf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def sets(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(nf, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def seto(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(of, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setp(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(pf, ExprInt_from(a, 1), ExprInt_from(a, 0)))) + return None, e, [] + + +def setnp(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(pf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setle(ir, instr, a): + e = [] + a0 = ExprInt_from(a, 0) + a1 = ExprInt_from(a, 1) + e.append(ExprAff(a, ExprCond(zf, a1, a0) | ExprCond(nf ^ of, a1, a0))) + return None, e, [] + + +def setna(ir, instr, a): + e = [] + a0 = ExprInt_from(a, 0) + a1 = ExprInt_from(a, 1) + e.append(ExprAff(a, ExprCond(cf, a1, a0) & ExprCond(zf, a1, a0))) + return None, e, [] + + +def setnbe(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf | zf, + ExprInt_from(a, 0), + ExprInt_from(a, 1))) + ) + return None, e, [] + + +def setno(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(of, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setnb(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprCond(cf, ExprInt_from(a, 0), ExprInt_from(a, 1)))) + return None, e, [] + + +def setalc(ir, instr): + a = mRAX[instr.mode][0:8] + e = [] + e.append( + ExprAff(a, ExprCond(cf, ExprInt_from(a, 0xff), ExprInt_from(a, 0)))) + return None, e, [] + + +def bswap(ir, instr, a): + e = [] + if a.size == 16: + c = ExprCompose([(a[:8], 8, 16), + (a[8:16], 0, 8), + ]) + elif a.size == 32: + c = ExprCompose([(a[:8], 24, 32), + (a[8:16], 16, 24), + (a[16:24], 8, 16), + (a[24:32], 0, 8), + ]) + elif a.size == 64: + c = ExprCompose([(a[:8], 56, 64), + (a[8:16], 48, 56), + (a[16:24], 40, 48), + (a[24:32], 32, 40), + (a[32:40], 24, 32), + (a[40:48], 16, 24), + (a[48:56], 8, 16), + (a[56:64], 0, 8), + ]) + else: + raise ValueError('the size DOES matter') + e.append(ExprAff(a, c)) + return None, e, [] + + +def cmps(ir, instr, size): + lbl_cmp = ExprId(ir.gen_label(), instr.mode) + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + + s = instr.v_admode() + a = ExprMem(mRDI[instr.mode][:s], size) + b = ExprMem(mRSI[instr.mode][:s], size) + + dummy, e, extra = l_cmp(ir, instr, a, b) + + e0 = [] + e0.append(ExprAff(a.arg, a.arg + ExprInt_from(a.arg, size / 8))) + e0.append(ExprAff(b.arg, b.arg + ExprInt_from(b.arg, size / 8))) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(a.arg, a.arg - ExprInt_from(a.arg, size / 8))) + e1.append(ExprAff(b.arg, b.arg - ExprInt_from(b.arg, size / 8))) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def scas(ir, instr, size): + lbl_cmp = ExprId(ir.gen_label(), instr.mode) + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + + s = instr.v_admode() + a = ExprMem(mRDI[instr.mode][:s], size) + + dummy, e, extra = l_cmp(ir, instr, mRAX[instr.mode][:size], a) + + e0 = [] + e0.append(ExprAff(a.arg, a.arg + ExprInt_from(a.arg, size / 8))) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(a.arg, a.arg - ExprInt_from(a.arg, size / 8))) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def compose_eflag(s=32): + args = [] + + regs = [cf, ExprInt1(1), pf, ExprInt1( + 0), af, ExprInt1(0), zf, nf, tf, i_f, df, of] + for i in xrange(len(regs)): + args.append((regs[i], i, i + 1)) + + args.append((iopl, 12, 14)) + + if s == 32: + regs = [nt, ExprInt1(0), rf, vm, ac, vif, vip, i_d] + elif s == 16: + regs = [nt, ExprInt1(0)] + else: + raise ValueError('unk size') + for i in xrange(len(regs)): + args.append((regs[i], i + 14, i + 15)) + if s == 32: + args.append((ExprInt_fromsize(10, 0), 22, 32)) + return ExprCompose(args) + + +def pushfd(ir, instr): + return push(ir, instr, compose_eflag()) + + +def pushfw(ir, instr): + return push(ir, instr, compose_eflag(16)) + + +def popfd(ir, instr): + tmp = ExprMem(mRSP[instr.mode]) + e = [] + e.append(ExprAff(cf, ExprSlice(tmp, 0, 1))) + e.append(ExprAff(pf, ExprSlice(tmp, 2, 3))) + e.append(ExprAff(af, ExprSlice(tmp, 4, 5))) + e.append(ExprAff(zf, ExprSlice(tmp, 6, 7))) + e.append(ExprAff(nf, ExprSlice(tmp, 7, 8))) + e.append(ExprAff(tf, ExprSlice(tmp, 8, 9))) + e.append(ExprAff(i_f, ExprSlice(tmp, 9, 10))) + e.append(ExprAff(df, ExprSlice(tmp, 10, 11))) + e.append(ExprAff(of, ExprSlice(tmp, 11, 12))) + e.append(ExprAff(iopl, ExprSlice(tmp, 12, 14))) + e.append(ExprAff(nt, ExprSlice(tmp, 14, 15))) + e.append(ExprAff(rf, ExprSlice(tmp, 16, 17))) + e.append(ExprAff(vm, ExprSlice(tmp, 17, 18))) + e.append(ExprAff(ac, ExprSlice(tmp, 18, 19))) + e.append(ExprAff(vif, ExprSlice(tmp, 19, 20))) + e.append(ExprAff(vip, ExprSlice(tmp, 20, 21))) + e.append(ExprAff(i_d, ExprSlice(tmp, 21, 22))) + e.append(ExprAff(mRSP[instr.mode], mRSP[instr.mode] + ExprInt32(4))) + e.append(ExprAff(exception_flags, + ExprCond(ExprSlice(tmp, 8, 9), + ExprInt32(EXCEPT_SOFT_BP), + exception_flags + ) + ) + ) + return None, e, [] + + +def popfw(ir, instr): + tmp = ExprMem(esp) + e = [] + e.append(ExprAff(cf, ExprSlice(tmp, 0, 1))) + e.append(ExprAff(pf, ExprSlice(tmp, 2, 3))) + e.append(ExprAff(af, ExprSlice(tmp, 4, 5))) + e.append(ExprAff(zf, ExprSlice(tmp, 6, 7))) + e.append(ExprAff(nf, ExprSlice(tmp, 7, 8))) + e.append(ExprAff(tf, ExprSlice(tmp, 8, 9))) + e.append(ExprAff(i_f, ExprSlice(tmp, 9, 10))) + e.append(ExprAff(df, ExprSlice(tmp, 10, 11))) + e.append(ExprAff(of, ExprSlice(tmp, 11, 12))) + e.append(ExprAff(iopl, ExprSlice(tmp, 12, 14))) + e.append(ExprAff(nt, ExprSlice(tmp, 14, 15))) + e.append(ExprAff(esp, esp + ExprInt32(2))) + return None, e, [] + + +def pushad(ir, instr): + e = [] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + if not s in [16, 32, 64]: + raise ValueError('bad size stacker!') + + regs = [ + mRAX[instr.mode][:s], mRCX[instr.mode][ + :s], mRDX[instr.mode][:s], mRBX[instr.mode][:s], + mRSP[instr.mode][:s], mRBP[instr.mode][:s], + mRSI[instr.mode][:s], mRDI[instr.mode][:s]] + + for i in xrange(len(regs)): + c = mRSP[instr.mode][:s] + ExprInt_fromsize(s, -(s / 8) * (i + 1)) + e.append(ExprAff(ExprMem(c, s), regs[i])) + e.append(ExprAff(mRSP[instr.mode][:s], c)) + return None, e, [] + + +def popad(ir, instr): + e = [] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + if not s in [16, 32, 64]: + raise ValueError('bad size stacker!') + regs = [ + mRAX[instr.mode][:s], mRCX[instr.mode][ + :s], mRDX[instr.mode][:s], mRBX[instr.mode][:s], + mRSP[instr.mode][:s], mRBP[instr.mode][:s], + mRSI[instr.mode][:s], mRDI[instr.mode][:s]] + myesp = mRSP[instr.mode][:s] + regs.reverse() + for i in xrange(len(regs)): + if regs[i] == myesp: + continue + c = myesp + ExprInt_from(myesp, ((s / 8) * i)) + e.append(ExprAff(regs[i], ExprMem(c, s))) + + c = myesp + ExprInt_from(myesp, ((s / 8) * (i + 1))) + e.append(ExprAff(myesp, c)) + + return None, e, [] + + +def call(ir, instr, dst): + e = [] + # opmode, admode = instr.opmode, instr.admode + s = dst.size + meip = mRIP[instr.mode] + opmode, admode = s, instr.v_admode() + myesp = mRSP[instr.mode][:opmode] + n = ExprId(ir.get_next_label(instr), instr.mode) + + c = myesp + ExprInt_fromsize(s, (-s / 8)) + e.append(ExprAff(myesp, c)) + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(ExprMem(c, size=s), n)) + e.append(ExprAff(meip, dst.zeroExtend(instr.mode))) + if not expr_is_int_or_label(dst): + dst = meip + return dst, e, [] + + +def ret(ir, instr, a=None): + e = [] + s = instr.mode + meip = mRIP[instr.mode] + opmode, admode = instr.v_opmode(), instr.v_admode() + s = opmode + myesp = mRSP[instr.mode][:s] + + if a is None: + a = ExprInt_fromsize(s, 0) + e.append(ExprAff(myesp, (myesp + (ExprInt_fromsize(s, (s / 8)))))) + else: + a = a.zeroExtend(s) + e.append(ExprAff(myesp, (myesp + (ExprInt_fromsize(s, (s / 8)) + a)))) + c = myesp + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(meip, ExprMem(c, size=s).zeroExtend(s))) + return meip, e, [] + + +def retf(ir, instr, a=None): + e = [] + s = instr.mode + meip = mRIP[instr.mode] + opmode, admode = instr.v_opmode(), instr.v_admode() + if a is None: + a = ExprInt_fromsize(s, 0) + s = opmode + myesp = mRSP[instr.mode][:s] + + a = a.zeroExtend(s) + + e.append(ExprAff(myesp, (myesp + (ExprInt_fromsize(s, (s / 8)) + a)))) + + c = myesp + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(meip, ExprMem(c, size=s).zeroExtend(s))) + # e.append(ExprAff(meip, ExprMem(c, size = s))) + c = myesp + ExprInt_fromsize(s, (s / 8)) + if ir.do_stk_segm: + c = ExprOp('segm', SS, c) + e.append(ExprAff(CS, ExprMem(c, size=16))) + + return meip, e, [] + + +def leave(ir, instr): + opmode, admode = instr.v_opmode(), instr.v_admode() + s = opmode + myesp = mRSP[instr.mode] + + e = [] + e.append(ExprAff(mRBP[s], ExprMem(mRBP[instr.mode], size=s))) + e.append(ExprAff(myesp, + ExprInt_fromsize(instr.mode, instr.mode / 8) + mRBP[instr.mode])) + return None, e, [] + + +def enter(ir, instr, a, b): + opmode, admode = instr.v_opmode(), instr.v_admode() + s = opmode + myesp = mRSP[instr.mode][:s] + myebp = mRBP[instr.mode][:s] + + a = a.zeroExtend(s) + + e = [] + esp_tmp = myesp - ExprInt_fromsize(s, s / 8) + e.append(ExprAff(ExprMem(esp_tmp, + size=s), + myebp)) + e.append(ExprAff(myebp, esp_tmp)) + e.append(ExprAff(myesp, myesp - (a + ExprInt_fromsize(s, s / 8)))) + return None, e, [] + + +def jmp(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + e.append(ExprAff(meip, dst)) # dst.zeroExtend(instr.mode))) + if isinstance(dst, ExprMem): + dst = meip + return dst, e, [] + + +def jmpf(ir, instr, a): + e = [] + meip = mRIP[instr.mode] + assert(isinstance(a, ExprMem) and + isinstance(a.arg, ExprOp) and + a.arg.op == "segm") + segm = a.arg.args[0] + base = a.arg.args[1] + s = instr.mode + print segm, base + m1 = ExprMem(ExprOp('segm', segm, base), 16) + m2 = ExprMem(ExprOp('segm', segm, base + ExprInt_from(base, 2)), s) + + e.append(ExprAff(meip, m1)) + e.append(ExprAff(CS, m2)) + return meip, e, [] + + +def jz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(zf, dst, n).zeroExtend(instr.mode) + e = [ExprAff(meip, dst_o)] + return dst_o, e, [] + + +def jcxz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(mRCX[instr.mode][:16], n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jecxz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(mRCX[instr.mode][:32], n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jrcxz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(mRCX[instr.mode], n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jnz(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(zf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jp(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(pf, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jnp(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(pf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def ja(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(cf | zf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jae(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(cf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jb(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(cf, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jbe(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(cf | zf, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jge(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(nf - of, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jg(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(zf | (nf - of), n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jl(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(nf - of, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jle(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(zf | (nf - of), dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def js(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(nf, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jns(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(nf, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jo(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(of, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def jno(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + n = ExprId(ir.get_next_label(instr), instr.mode) + dst_o = ExprCond(of, n, dst).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + return dst_o, e, [] + + +def loop(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + myecx = mRCX[instr.mode][:admode] + + n = ExprId(ir.get_next_label(instr), instr.mode) + c = myecx - ExprInt_from(myecx, 1) + e.append(ExprAff(myecx, c)) + e.append(ExprAff(meip, ExprCond(c, dst, n).zeroExtend(instr.mode))) + dst_o = ExprCond(myecx, dst, n).zeroExtend(instr.mode) + return dst_o, e, [] + + +def loopne(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + myecx = mRCX[instr.mode][:admode] + + n = ExprId(ir.get_next_label(instr), instr.mode) + + c = ExprOp('==', + mRCX[instr.mode][:s] - ExprInt_fromsize(s, 1), + ExprInt_fromsize(s, 0)) ^ ExprInt1(1) + c &= zf ^ ExprInt1(1) + + e.append(ExprAff(myecx, myecx - ExprInt_from(myecx, 1))) + e.append(ExprAff(meip, ExprCond(c, dst, n).zeroExtend(instr.mode))) + + # for dst, ecx has been modified! + c = ExprOp('==', + mRCX[instr.mode][:s], + ExprInt_fromsize(s, 0)) ^ ExprInt1(1) + c &= zf ^ ExprInt1(1) + dst_o = ExprCond(c, dst, n).zeroExtend(instr.mode) + return dst_o, e, [] + + +def loope(ir, instr, dst): + e = [] + meip = mRIP[instr.mode] + s = instr.v_opmode() + opmode, admode = s, instr.v_admode() + myecx = mRCX[instr.mode][:admode] + + n = ExprId(ir.get_next_label(instr), instr.mode) + c = ExprOp('==', + mRCX[instr.mode][:s] - ExprInt_fromsize(s, 1), + ExprInt_fromsize(s, 0)) ^ ExprInt1(1) + c &= zf + e.append(ExprAff(myecx, myecx - ExprInt_from(myecx, 1))) + dst_o = ExprCond(c, dst, n).zeroExtend(instr.mode) + e.append(ExprAff(meip, dst_o)) + + # for dst, ecx has been modified! + c = ExprOp('==', + mRCX[instr.mode][:s], + ExprInt_fromsize(s, 0)) ^ ExprInt1(1) + c &= zf + dst_o = ExprCond(c, dst, n).zeroExtend(instr.mode) + return dst_o, e, [] + + +# XXX size to do; eflag +def div(ir, instr, a): + e = [] + s = a.size + if s == 8: + b = mRAX[instr.mode][:16] + elif s == 16: + s1, s2 = mRDX[instr.mode][:16], mRAX[instr.mode][:16] + b = ExprCompose([(s1, 0, 16), + (s1, 16, 32)]) + elif s == 32: + s1, s2 = mRDX[instr.mode][:32], mRAX[instr.mode][:32] + b = ExprCompose([(s2, 0, 32), + (s1, 32, 64)]) + elif s == 64: + s1, s2 = mRDX[instr.mode], mRAX[instr.mode] + b = ExprCompose([(s2, 0, 64), + (s1, 64, 128)]) + else: + raise ValueError('div arg not impl', a) + + c_d = ExprOp('udiv', b, a.zeroExtend(b.size)) + c_r = ExprOp('umod', b, a.zeroExtend(b.size)) + + # if 8 bit div, only ax is affected + if s == 8: + e.append(ExprAff(b, ExprCompose([(c_d[:8], 0, 8), + (c_r[:8], 8, 16)]))) + else: + e.append(ExprAff(s1, c_r[:s])) + e.append(ExprAff(s2, c_d[:s])) + return None, e, [] + +# XXX size to do; eflag + + +def idiv(ir, instr, a): + e = [] + s = a.size + + if s == 8: + b = mRAX[instr.mode][:16] + elif s == 16: + s1, s2 = mRDX[instr.mode][:16], mRAX[instr.mode][:16] + b = ExprCompose([(s1, 0, 16), + (s1, 16, 32)]) + elif s == 32: + s1, s2 = mRDX[instr.mode][:32], mRAX[instr.mode][:32] + b = ExprCompose([(s2, 0, 32), + (s1, 32, 64)]) + else: + raise ValueError('div arg not impl', a) + + c_d = ExprOp('idiv', b, a.signExtend(b.size)) + c_r = ExprOp('imod', b, a.signExtend(b.size)) + + # if 8 bit div, only ax is affected + if s == 8: + e.append(ExprAff(b, ExprCompose([(c_d[:8], 0, 8), + (c_r[:8], 8, 16)]))) + else: + e.append(ExprAff(s1, c_r[:s])) + e.append(ExprAff(s2, c_d[:s])) + return None, e, [] + +# XXX size to do; eflag + + +def mul(ir, instr, a): + e = [] + size = a.size + if a.size in [16, 32, 64]: + result = ExprOp('*', + mRAX[instr.mode][:size].zeroExtend(size * 2), + a.zeroExtend(size * 2)) + e.append(ExprAff(mRAX[instr.mode][:size], result[:size])) + e.append(ExprAff(mRDX[instr.mode][:size], result[size:size * 2])) + + elif a.size == 8: + result = ExprOp('*', + mRAX[instr.mode][:8].zeroExtend(16), + a.zeroExtend(16)) + e.append(ExprAff(mRAX[instr.mode][:16], result)) + else: + raise ValueError('unknow size') + + e.append(ExprAff(of, ExprCond(result[size:size * 2], + ExprInt1(1), + ExprInt1(0)))) + e.append(ExprAff(cf, ExprCond(result[size:size * 2], + ExprInt1(1), + ExprInt1(0)))) + + return None, e, [] + + +def imul(ir, instr, a, b=None, c=None): + e = [] + size = a.size + if b is None: + if size in [16, 32, 64]: + result = ExprOp('*', + mRAX[instr.mode][:size].signExtend(size * 2), + a.signExtend(size * 2)) + e.append(ExprAff(mRAX[instr.mode][:size], result[:size])) + e.append(ExprAff(mRDX[instr.mode][:size], result[size:size * 2])) + elif size == 8: + dst = mRAX[instr.mode][:16] + result = ExprOp('*', + mRAX[instr.mode][:8].signExtend(16), + a.signExtend(16)) + + e.append(ExprAff(dst, result)) + e.append( + ExprAff(cf, ExprCond(result - result[:size].signExtend(size * 2), + ExprInt1(1), + ExprInt1(0)))) + e.append( + ExprAff(of, ExprCond(result - result[:size].signExtend(size * 2), + ExprInt1(1), + ExprInt1(0)))) + + else: + if c is None: + c = b + b = a + result = ExprOp('*', + b.signExtend(size * 2), + c.signExtend(size * 2)) + e.append(ExprAff(a, result[:size])) + + e.append( + ExprAff(cf, ExprCond(result - result[:size].signExtend(size * 2), + ExprInt1(1), + ExprInt1(0)))) + e.append( + ExprAff(of, ExprCond(result - result[:size].signExtend(size * 2), + ExprInt1(1), + ExprInt1(0)))) + return None, e, [] + + +def cbw(ir, instr): + e = [] + tempAL = mRAX[instr.mode][:8] + tempAX = mRAX[instr.mode][:16] + e.append(ExprAff(tempAX, tempAL.signExtend(16))) + return None, e, [] + + +def cwde(ir, instr): + e = [] + tempAX = mRAX[instr.mode][:16] + tempEAX = mRAX[instr.mode][:32] + e.append(ExprAff(tempEAX, tempAX.signExtend(32))) + return None, e, [] + + +def cdqe(ir, instr): + e = [] + tempEAX = mRAX[instr.mode][:32] + tempRAX = mRAX[instr.mode][:64] + e.append(ExprAff(tempRAX, tempEAX.signExtend(64))) + return None, e, [] + + +def cwd(ir, instr): + e = [] + tempAX = mRAX[instr.mode][:16] + tempDX = mRDX[instr.mode][:16] + c = tempAX.signExtend(32) + e.append(ExprAff(tempAX, c[:16])) + e.append(ExprAff(tempDX, c[16:32])) + return None, e, [] + + +def cdq(ir, instr): + e = [] + tempEAX = mRAX[instr.mode][:32] + tempEDX = mRDX[instr.mode][:32] + c = tempEAX.signExtend(64) + e.append(ExprAff(tempEAX, c[:32])) + e.append(ExprAff(tempEDX, c[32:64])) + return None, e, [] + + +def cqo(ir, instr): + e = [] + tempRAX = mRAX[instr.mode][:64] + tempRDX = mRDX[instr.mode][:64] + c = tempEAX.signExtend(128) + e.append(ExprAff(tempRAX, c[:64])) + e.append(ExprAff(tempRDX, c[64:127])) + return None, e, [] + + +def stos(ir, instr, size): + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + + s = instr.v_admode() + + addr_o = mRDI[instr.mode][:s] + addr = addr_o + addr_p = addr + ExprInt_from(addr, size / 8) + addr_m = addr - ExprInt_from(addr, size / 8) + if ir.do_str_segm: + mss = ES + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + addr = ExprOp('segm', mss, addr) + + b = mRAX[instr.mode][:size] + + e0 = [] + e0.append(ExprAff(addr_o, addr_p)) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(addr_o, addr_m)) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + e = [] + e.append(ExprAff(ExprMem(addr, size), b)) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def lods(ir, instr, size): + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + e = [] + s = instr.v_admode() + + addr_o = mRSI[instr.mode][:s] + addr = addr_o + addr_p = addr + ExprInt_from(addr, size / 8) + addr_m = addr - ExprInt_from(addr, size / 8) + if ir.do_str_segm: + mss = DS + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + addr = ExprOp('segm', mss, addr) + + b = mRAX[instr.mode][:size] + + e0 = [] + e0.append(ExprAff(addr_o, addr_p)) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(addr_o, addr_m)) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + e = [] + e.append(ExprAff(b, ExprMem(addr, size))) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def movs(ir, instr, size): + lbl_df_0 = ExprId(ir.gen_label(), instr.mode) + lbl_df_1 = ExprId(ir.gen_label(), instr.mode) + lbl_next = ExprId(ir.get_next_label(instr), instr.mode) + + s = instr.v_admode() + # a = ExprMem(mRDI[instr.mode][:s], size) + # b = ExprMem(mRSI[instr.mode][:s], size) + + a = mRDI[instr.mode][:s] + b = mRSI[instr.mode][:s] + + e = [] + src = b + dst = a + if ir.do_str_segm: + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + src = ExprOp('segm', DS, src) + dst = ExprOp('segm', ES, dst) + e.append(ExprAff(ExprMem(dst, size), ExprMem(src, size))) + + e0 = [] + e0.append(ExprAff(a, a + ExprInt_from(a, size / 8))) + e0.append(ExprAff(b, b + ExprInt_from(b, size / 8))) + e0 = irbloc(lbl_df_0.name, lbl_next, [e0]) + + e1 = [] + e1.append(ExprAff(a, a - ExprInt_from(a, size / 8))) + e1.append(ExprAff(b, b - ExprInt_from(b, size / 8))) + e1 = irbloc(lbl_df_1.name, lbl_next, [e1]) + + return ExprCond(df, lbl_df_1, lbl_df_0), e, [e0, e1] + + +def float_prev(flt): + if not flt in float_list: + return None + i = float_list.index(flt) + if i == 0: + raise ValueError('broken index') + flt = float_list[i - 1] + return flt + + +def float_pop(avoid_flt=None): + avoid_flt = float_prev(avoid_flt) + e = [] + if avoid_flt != float_st0: + e.append(ExprAff(float_st0, float_st1)) + if avoid_flt != float_st1: + e.append(ExprAff(float_st1, float_st2)) + if avoid_flt != float_st2: + e.append(ExprAff(float_st2, float_st3)) + if avoid_flt != float_st3: + e.append(ExprAff(float_st3, float_st4)) + if avoid_flt != float_st4: + e.append(ExprAff(float_st4, float_st5)) + if avoid_flt != float_st5: + e.append(ExprAff(float_st5, float_st6)) + if avoid_flt != float_st6: + e.append(ExprAff(float_st6, float_st7)) + if avoid_flt != float_st7: + e.append(ExprAff(float_st7, ExprInt_from(float_st7, 0))) + e.append( + ExprAff(float_stack_ptr, float_stack_ptr - ExprInt_fromsize(3, 1))) + return e + +# XXX TODO + + +def fcom(ir, instr, a, b): + e = [] + """ + if isinstance(a, ExprMem): + src = ExprOp('mem_%.2d_to_double'%a.size, a) + else: + src = a + """ + src = b + e.append(ExprAff(float_c0, ExprOp('fcom_c0', a, src.zeroExtend(a.size)))) + e.append(ExprAff(float_c1, ExprOp('fcom_c1', a, src.zeroExtend(a.size)))) + e.append(ExprAff(float_c2, ExprOp('fcom_c2', a, src.zeroExtend(a.size)))) + e.append(ExprAff(float_c3, ExprOp('fcom_c3', a, src.zeroExtend(a.size)))) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def ficom(ir, instr, a): + e = [] + e += set_float_cs_eip(instr) + return None, e, [] + + +def fcomi(ir, instr, a): + # Invalid emulation + InvalidEmulation + + +def fcomip(ir, instr, a): + # Invalid emulation + InvalidEmulation + + +def fucomi(ir, instr, a): + # Invalid emulation + InvalidEmulation + + +def fucomip(ir, instr, a): + # Invalid emulation, only read/write analysis is valid + cond = ExprOp('fcomp', float_st0, a) + e = [] + e.append( + ExprAff(zf, ExprCond(cond, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))) + e.append( + ExprAff(pf, ExprCond(cond, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))) + e.append( + ExprAff(cf, ExprCond(cond, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))) + return None, e, [] + + +def fcomp(ir, instr, a, b): + dst, e, extra = fcom(ir, instr, a, b) + e += float_pop() + e += set_float_cs_eip(instr) + return dst, e, extra + + +def fld(ir, instr, a): + if isinstance(a, ExprMem): + src = ExprOp('mem_%.2d_to_double' % a.size, a) + else: + src = a + + e = [] + e.append(ExprAff(float_st7, float_st6)) + e.append(ExprAff(float_st6, float_st5)) + e.append(ExprAff(float_st5, float_st4)) + e.append(ExprAff(float_st4, float_st3)) + e.append(ExprAff(float_st3, float_st2)) + e.append(ExprAff(float_st2, float_st1)) + e.append(ExprAff(float_st1, float_st0)) + e.append(ExprAff(float_st0, src)) + e.append( + ExprAff(float_stack_ptr, float_stack_ptr + ExprInt_fromsize(3, 1))) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def fst(ir, instr, a): + e = [] + if isinstance(a, ExprMem): + src = ExprOp('double_to_mem_%2d' % a.size, float_st0) + else: + src = float_st0 + e.append(ExprAff(a, src)) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def fstp(ir, instr, a): + dst, e, extra = fst(ir, instr, a) + e += float_pop(a) + return dst, e, extra + + +def fist(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprOp('double_to_int_%d' % a.size, float_st0))) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def fistp(ir, instr, a): + dst, e, extra = fist(ir, instr, a) + e += float_pop(a) + return dst, e, extra + + +def fild(ir, instr, a): + # XXXXX + src = ExprOp('int_%.2d_to_double' % a.size, a) + e = [] + e += set_float_cs_eip(instr) + dst, e_fld, extra = fld(ir, instr, src) + e += e_fld + return dst, e, extra + + +def fldz(ir, instr): + return fld(ir, instr, ExprOp('int_32_to_double', ExprInt32(0))) + + +def fld1(ir, instr): + return fld(ir, instr, ExprOp('int_32_to_double', ExprInt32(1))) + + +def fldl2e(ir, instr): + x = struct.pack('d', 1 / math.log(2)) + x = struct.unpack('Q', x)[0] + return fld(ir, instr, ExprOp('mem_64_to_double', ExprInt64(x))) + + +def fldlg2(ir, instr): + x = struct.pack('d', math.log10(2)) + x = struct.unpack('Q', x)[0] + return fld(ir, instr, ExprOp('mem_64_to_double', ExprInt64(x))) + + +def fadd(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fadd', a, src))) + + e += set_float_cs_eip(instr) + return None, e, [] + + +def faddp(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(float_prev(a), ExprOp('fadd', a, src))) + e += set_float_cs_eip(instr) + e += float_pop(a) + return None, e, [] + + +def fninit(ir, instr): + e = [] + e += set_float_cs_eip(instr) + return None, e, [] + + +def fnstenv(ir, instr, a): + e = [] + # XXX TODO tag word, ... + status_word = ExprCompose([(ExprInt8(0), 0, 8), + (float_c0, 8, 9), + (float_c1, 9, 10), + (float_c2, 10, 11), + (float_stack_ptr, 11, 14), + (float_c3, 14, 15), + (ExprInt1(0), 15, 16), + ]) + + s = instr.mode + ad = ExprMem(a.arg, size=16) + e.append(ExprAff(ad, float_control)) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 1), size=16) + e.append(ExprAff(ad, status_word)) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 3), size=s) + e.append(ExprAff(ad, float_eip[:s])) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 4), size=16) + e.append(ExprAff(ad, float_cs)) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 5), size=s) + e.append(ExprAff(ad, float_address[:s])) + ad = ExprMem(a.arg + ExprInt_from(a.arg, s / 8 * 6), size=16) + e.append(ExprAff(ad, float_ds)) + return None, e, [] + + +def fsub(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fsub', a, src))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fmul(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fmul', a, src))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fdiv(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fdiv', a, src))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fdivr(ir, instr, a, b=None): + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(a, ExprOp('fdiv', src, a))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fdivp(ir, instr, a, b=None): + # Invalid emulation + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(float_prev(a), ExprOp('fdiv', a, src))) + e += set_float_cs_eip(instr) + e += float_pop(a) + return None, e, [] + + +def fmulp(ir, instr, a, b=None): + # Invalid emulation + if b is None: + b = a + a = float_st0 + e = [] + if isinstance(b, ExprMem): + src = ExprOp('mem_%.2d_to_double' % b.size, b) + else: + src = b + e.append(ExprAff(float_prev(a), ExprOp('fmul', a, src))) + e += set_float_cs_eip(instr) + e += float_pop(a) + return None, e, [] + + +def ftan(ir, instr, a): + e = [] + if isinstance(a, ExprMem): + src = ExprOp('mem_%.2d_to_double' % a.size, a) + else: + src = a + e.append(ExprAff(float_st0, ExprOp('ftan', src))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fxch(ir, instr, a): + e = [] + if isinstance(a, ExprMem): + src = ExprOp('mem_%.2d_to_double' % a.size, a) + else: + src = a + e.append(ExprAff(float_st0, src)) + e.append(ExprAff(src, float_st0)) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fptan(ir, instr): + e = [] + e.append(ExprAff(float_st7, float_st6)) + e.append(ExprAff(float_st6, float_st5)) + e.append(ExprAff(float_st5, float_st4)) + e.append(ExprAff(float_st4, float_st3)) + e.append(ExprAff(float_st3, float_st2)) + e.append(ExprAff(float_st2, float_st1)) + e.append(ExprAff(float_st1, ExprOp('ftan', float_st0))) + e.append(ExprAff(float_st0, ExprOp('int_32_to_double', ExprInt32(1)))) + e.append( + ExprAff(float_stack_ptr, float_stack_ptr + ExprInt_fromsize(3, 1))) + return None, e, [] + + +def frndint(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('frndint', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fsin(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fsin', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fcos(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fcos', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fscale(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fscale', float_st0, float_st1))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def f2xm1(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('f2xm1', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fsqrt(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fsqrt', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fabs(ir, instr): + e = [] + e.append(ExprAff(float_st0, ExprOp('fabs', float_st0))) + e += set_float_cs_eip(instr) + return None, e, [] + + +def fnstsw(ir, instr, dst): + args = [(ExprInt8(0), 0, 8), + (float_c0, 8, 9), + (float_c1, 9, 10), + (float_c2, 10, 11), + (float_stack_ptr, 11, 14), + (float_c3, 14, 15), + (ExprInt1(0), 15, 16)] + e = [ExprAff(dst, ExprCompose(args))] + return None, e, [] + + +def fnstcw(ir, instr, a): + e = [] + e.append(ExprAff(a, float_control)) + return None, e, [] + + +def fldcw(ir, instr, a): + e = [] + e.append(ExprAff(float_control, a)) + return None, e, [] + + +def fwait(ir, instr): + return None, [], None + + +def nop(ir, instr, a=None): + return None, [], [] + + +def hlt(ir, instr): + e = [] + except_int = EXCEPT_PRIV_INSN + e.append(ExprAff(exception_flags, ExprInt32(except_int))) + return None, e, [] + + +def rdtsc(ir, instr): + e = [] + myEAX = mRAX[instr.mode] + myEDX = mRDX[instr.mode] + e.append(ExprAff(tsc1, tsc1 + ExprInt32(1))) + e.append(ExprAff(myEAX, tsc1)) + e.append(ExprAff(myEDX, tsc2)) + return None, e, [] + + +# XXX TODO +def daa(ir, instr): + return None, [], None + + +def aam(ir, instr, a): + e = [] + tempAL = mRAX[instr.mode][0:8] + newEAX = ExprCompose([ + (tempAL % a, 0, 8), + (tempAL / a, 8, 16), + (mRAX[instr.mode][16:], 16, mRAX[instr.mode].size), + ]) + e += [ExprAff(mRAX[instr.mode], newEAX)] + e += update_flag_arith(newEAX) + return None, e, [] + + +def aad(ir, instr, a): + e = [] + tempAL = mRAX[instr.mode][0:8] + tempAH = mRAX[instr.mode][8:16] + newEAX = ExprCompose([ + ((tempAL + (tempAH * a)) & ExprInt8(0xFF), 0, 8), + (ExprInt8(0), 8, 16), + (mRAX[instr.mode][16:], + 16, mRAX[instr.mode].size), + ]) + e += [ExprAff(mRAX[instr.mode], newEAX)] + e += update_flag_arith(newEAX) + return None, e, [] + + +def aaa(ir, instr, ): + e = [] + c = (mRAX[instr.mode][:8] & ExprInt8(0xf)) - ExprInt8(9) + + c = ExprCond(c.msb(), + ExprInt1(0), + ExprInt1(1)) & \ + ExprCond(c, + ExprInt1(1), + ExprInt1(0)) + + c |= af & ExprInt1(1) + # set AL + m_al = ExprCond(c, + (mRAX[instr.mode][:8] + ExprInt8(6)) & ExprInt8(0xF), + mRAX[instr.mode][:8] & ExprInt8(0xF)) + m_ah = ExprCond(c, + mRAX[instr.mode][8:16] + ExprInt8(1), + mRAX[instr.mode][8:16]) + + e.append(ExprAff(mRAX[instr.mode], ExprCompose([ + (m_al, 0, 8), (m_ah, 8, 16), + (mRAX[instr.mode][16:], 16, mRAX[instr.mode].size)]))) + e.append(ExprAff(af, c)) + e.append(ExprAff(cf, c)) + return None, e, [] + + +def aas(ir, instr, ): + e = [] + c = (mRAX[instr.mode][:8] & ExprInt8(0xf)) - ExprInt8(9) + + c = ExprCond(c.msb(), + ExprInt1(0), + ExprInt1(1)) & \ + ExprCond(c, + ExprInt1(1), + ExprInt1(0)) + + c |= af & ExprInt1(1) + # set AL + m_al = ExprCond(c, + (mRAX[instr.mode][:8] - ExprInt8(6)) & ExprInt8(0xF), + mRAX[instr.mode][:8] & ExprInt8(0xF)) + m_ah = ExprCond(c, + mRAX[instr.mode][8:16] - ExprInt8(1), + mRAX[instr.mode][8:16]) + + e.append(ExprAff(mRAX[instr.mode], ExprCompose([ + (m_al, 0, 8), (m_ah, 8, 16), + (mRAX[instr.mode][16:], 16, mRAX[instr.mode].size)]))) + e.append(ExprAff(af, c)) + e.append(ExprAff(cf, c)) + return None, e, [] + + +def bsf(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + + e = [ExprAff(zf, ExprCond(b, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + e_do = [] + e_do.append(ExprAff(a, ExprOp('bsf', b))) + return ExprCond(b, + lbl_do, lbl_skip), e, [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def bsr(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + + e = [ExprAff(zf, ExprCond(b, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))] + + e_do = [] + e_do.append(ExprAff(a, ExprOp('bsr', b))) + return ExprCond(b, + lbl_do, lbl_skip), e, [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def arpl(ir, instr, a, b): + e = [] + e.append(ExprAff(exception_flags, ExprInt32(1 << 7))) + return None, e, [] + + +def ins(ir, instr, size): + e = [] + e.append(ExprAff(exception_flags, ExprInt32(1 << 7))) + return None, e, [] + + +def sidt(ir, instr, a): + e = [] + if not isinstance(a, ExprMem) or a.size != 32: + raise ValueError('not exprmem 32bit instance!!') + b = a.arg + print "DEFAULT SIDT ADDRESS %s!!" % str(a) + e.append(ExprAff(ExprMem(b, 32), ExprInt32(0xe40007ff))) + e.append( + ExprAff(ExprMem(ExprOp("+", b, + ExprInt_from(b, 4)), 16), ExprInt16(0x8245))) + return None, e, [] + + +def sldt(ir, instr, a): + # XXX TOOD + e = [ExprAff(exception_flags, ExprInt32(EXCEPT_PRIV_INSN))] + return None, e, [] + + +def cmovz(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(zf, + lbl_do, lbl_skip), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovnz(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(zf, a, b))) + return None, e, [] + + +def cmovge(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(nf ^ of, a, b))) + return None, e, [] + + +def cmovg(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(zf | (nf ^ of), a, b))) + return None, e, [] + + +def cmovl(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(nf ^ of, b, a))) + return None, e, [] + + +def cmovle(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond((nf ^ of) | zf, b, a))) + return None, e, [] + + +def cmova(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(cf | zf, + lbl_skip, lbl_do), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovae(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(cf, + lbl_skip, lbl_do), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovbe(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(cf | zf, + lbl_do, lbl_skip), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovb(ir, instr, a, b): + lbl_do = ExprId(ir.gen_label(), instr.mode) + lbl_skip = ExprId(ir.get_next_label(instr), instr.mode) + dum, e_do, extra_irs = mov(ir, instr, a, b) + return ExprCond(cf, + lbl_do, lbl_skip), [], [irbloc(lbl_do.name, lbl_skip, [e_do])] + + +def cmovo(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(of, b, a))) + return None, e, [] + + +def cmovno(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond(of, a, b))) + return None, e, [] + + +def cmovs(ir, instr, a, b): + e = [] + # SF is called nf in miasm + e.append(ExprAff(a, ExprCond(nf, b, a))) + return None, e, [] + + +def cmovns(ir, instr, a, b): + e = [] + # SF is called nf in miasm + e.append(ExprAff(a, ExprCond(nf, a, b))) + return None, e, [] + + +def icebp(ir, instr): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] +# XXX + + +def l_int(ir, instr, a): + e = [] + # XXX + if a.arg in [1, 3]: + except_int = EXCEPT_SOFT_BP + else: + except_int = EXCEPT_INT_XX + e.append(ExprAff(exception_flags, + ExprInt32(except_int))) + return None, e, [] + + +def l_sysenter(ir, instr): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] + +# XXX + + +def l_out(ir, instr, a, b): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] + +# XXX + + +def l_outs(ir, instr, size): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] + +# XXX actually, xlat performs al = (ds:[e]bx + ZeroExtend(al)) + + +def xlat(ir, instr): + e = [] + a = ExprCompose([(ExprInt_fromsize(24, 0), 8, 32), + (mRAX[instr.mode][0:8], 0, 8)]) + b = ExprMem(ExprOp('+', mRBX[instr.mode], a), 8) + e.append(ExprAff(mRAX[instr.mode][0:8], b)) + return None, e, [] + + +def cpuid(ir, instr): + e = [] + e.append( + ExprAff(mRAX[instr.mode], + ExprOp('cpuid', mRAX[instr.mode], ExprInt32(0)))) + e.append( + ExprAff(mRBX[instr.mode], + ExprOp('cpuid', mRAX[instr.mode], ExprInt32(1)))) + e.append( + ExprAff(mRCX[instr.mode], + ExprOp('cpuid', mRAX[instr.mode], ExprInt32(2)))) + e.append( + ExprAff(mRDX[instr.mode], + ExprOp('cpuid', mRAX[instr.mode], ExprInt32(3)))) + return None, e, [] + + +def bittest_get(a, b): + b = b.zeroExtend(a.size) + if isinstance(a, ExprMem): + off_bit = ExprOp('&', b, ExprInt_from(a, a.size - 1)) + off_byte = (b >> ExprInt_from(a, 3)) & \ + ExprOp('!', ExprInt_from(a, a.size / 8 - 1)) + + d = ExprMem(a.arg + off_byte, a.size) + # d = ExprOp('>>', mem, off_bit) + else: + off_bit = ExprOp('&', b, ExprInt_from(a, a.size - 1)) + d = a + # d = ExprOp('>>', a, off_bit) + return d, off_bit + + +def bt(ir, instr, a, b): + e = [] + b = b.zeroExtend(a.size) + d, off_bit = bittest_get(a, b) + d = d >> off_bit + e.append(ExprAff(cf, d[:1])) + return None, e, [] + + +def btc(ir, instr, a, b): + e = [] + d, off_bit = bittest_get(a, b) + e.append(ExprAff(cf, (d >> off_bit)[:1])) + + m = ExprInt_from(a, 1) << off_bit + e.append(ExprAff(d, d ^ m)) + + return None, e, [] + + +def bts(ir, instr, a, b): + e = [] + d, off_bit = bittest_get(a, b) + e.append(ExprAff(cf, (d >> off_bit)[:1])) + m = ExprInt_from(a, 1) << off_bit + e.append(ExprAff(d, d | m)) + + return None, e, [] + + +def btr(ir, instr, a, b): + e = [] + d, off_bit = bittest_get(a, b) + e.append(ExprAff(cf, (d >> off_bit)[:1])) + m = ~(ExprInt_from(a, 1) << off_bit) + e.append(ExprAff(d, d & m)) + + return None, e, [] + + +def into(ir, instr): + return None, [], None + + +def l_in(ir, instr, a, b): + e = [] + e.append(ExprAff(exception_flags, + ExprInt32(EXCEPT_PRIV_INSN))) + return None, e, [] + + +def cmpxchg(ir, instr, a, b): + e = [] + + c = mRAX[instr.mode][:a.size] + cond = c - a + e.append( + ExprAff(zf, ExprCond(cond, ExprInt_from(zf, 0), ExprInt_from(zf, 1)))) + e.append(ExprAff(a, ExprCond(cond, + b, + a) + )) + e.append(ExprAff(c, ExprCond(cond, + a, + c) + )) + return None, e, [] + + +def lds(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprMem(b.arg, size=a.size))) + e.append(ExprAff(ds, ExprMem(b.arg + ExprInt_from(a, 2), + size=16))) + return None, e, [] + + +def les(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprMem(b.arg, size=a.size))) + e.append(ExprAff(es, ExprMem(b.arg + ExprInt_from(a, 2), + size=16))) + return None, e, [] + + +def lss(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprMem(b.arg, size=a.size))) + e.append(ExprAff(ss, ExprMem(b.arg + ExprInt_from(a, 2), + size=16))) + return None, e, [] + + +def lahf(ir, instr): + e = [] + args = [] + regs = [cf, ExprInt1(1), pf, ExprInt1(0), af, ExprInt1(0), zf, nf] + for i in xrange(len(regs)): + args.append((regs[i], i, i + 1)) + e.append(ExprAff(mRAX[instr.mode][8:16], ExprCompose(args))) + return None, e, [] + + +def sahf(ir, instr): + tmp = mRAX[instr.mode][8:16] + e = [] + e.append(ExprAff(cf, tmp[0:1])) + e.append(ExprAff(pf, tmp[2:3])) + e.append(ExprAff(af, tmp[4:5])) + e.append(ExprAff(zf, tmp[6:7])) + e.append(ExprAff(nf, tmp[7:8])) + return None, e, [] + + +def lar(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprOp('access_segment', b))) + e.append(ExprAff(zf, ExprOp('access_segment_ok', b))) + return None, e, [] + + +def lsl(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprOp('load_segment_limit', b))) + e.append(ExprAff(zf, ExprOp('load_segment_limit_ok', b))) + return None, e, [] + + +def fclex(ir, instr): + # XXX TODO + return None, [], None + + +def fnclex(ir, instr): + # XXX TODO + return None, [], None + + +def l_str(ir, instr, a): + e = [] + e.append(ExprAff(a, ExprOp('load_tr_segment_selector', ExprInt32(0)))) + return None, e, [] + + +def movd(ir, instr, a, b): + e = [] + if a.size == 64: + e.append(ExprAff(a, ExprCompose([(ExprInt32(0), 32, 64), (b, 0, 32)]))) + else: + e.append(ExprAff(a, b[0:32])) + return None, e, [] + + +def xorps(ir, instr, a, b): + e = [] + if isinstance(b, ExprMem): + b = ExprMem(b.arg, a.size) + e.append(ExprAff(a, ExprOp('xorps', a, b))) + return None, e, [] + + +def movaps(ir, instr, a, b): + e = [] + if isinstance(a, ExprMem): + a = ExprMem(a.arg, b.size) + if isinstance(b, ExprMem): + b = ExprMem(b.arg, a.size) + e.append(ExprAff(a, b)) + return None, e, [] + + +def pminsw(ir, instr, a, b): + e = [] + e.append(ExprAff(a, ExprCond((a - b).msb(), a, b))) + return None, e, [] + + +def cvtsi2sd(ir, instr, a, b): + e = [] + e.append(ExprAff(a[:b.size], ExprOp('cvtsi2sd', b))) + return None, e, [] + + +def movss(ir, instr, a, b): + e = [] + e.append(ExprAff(a[:b.size], ExprOp('movss', b))) + return None, e, [] + + +def ucomiss(ir, instr, a, b): + e = [] + e.append(ExprAff(zf, ExprOp('ucomiss_zf', a[:32], b[:32]))) + e.append(ExprAff(pf, ExprOp('ucomiss_pf', a[:32], b[:32]))) + e.append(ExprAff(cf, ExprOp('ucomiss_cf', a[:32], b[:32]))) + + e.append(ExprAff(of, ExprInt1(0))) + e.append(ExprAff(af, ExprInt1(0))) + e.append(ExprAff(nf, ExprInt1(0))) + + return None, e, [] + +mnemo_func = {'mov': mov, + 'xchg': xchg, + 'movzx': movzx, + 'movsx': movsx, + 'movsxd': movsx, + 'lea': lea, + 'add': add, + 'xadd': xadd, + 'adc': adc, + 'sub': sub, + 'sbb': sbb, + 'neg': neg, + 'not': l_not, + 'cmp': l_cmp, + 'xor': xor, + 'or': l_or, + 'and': l_and, + 'test': l_test, + 'rol': l_rol, + 'ror': l_ror, + 'rcl': rcl, + 'rcr': rcr, + 'sar': sar, + 'shr': shr, + 'shrd_cl': shrd_cl, + 'sal': sal, + 'shl': shl, + 'shld_cl': shld_cl, + 'shld': shld, + 'cmc': cmc, + 'clc': clc, + 'stc': stc, + 'cld': cld, + 'std': std, + 'cli': cli, + 'sti': sti, + 'bsf': bsf, + 'bsr': bsr, + 'inc': inc, + 'dec': dec, + 'push': push, + 'pop': pop, + 'sete': sete, + 'setnz': setnz, + 'setl': setl, + 'setg': setg, + 'setge': setge, + 'seta': seta, + 'setae': setae, + 'setb': setb, + 'setbe': setbe, + 'setns': setns, + 'sets': sets, + 'seto': seto, + 'setp': setp, + 'setpe': setp, + 'setnp': setnp, + 'setpo': setnp, + 'setle': setle, + 'setng': setle, + 'setna': setna, + 'setnbe': setnbe, + 'setno': setno, + 'setnc': setnb, + 'setz': sete, + 'setne': setnz, + 'setnb': setae, + 'setnae': setb, + 'setc': setb, + 'setnge': setl, + 'setnl': setge, + 'setnle': setg, + 'setalc': setalc, + 'bswap': bswap, + 'cmpsb': lambda ir, instr: cmps(ir, instr, 8), + 'cmpsw': lambda ir, instr: cmps(ir, instr, 16), + 'cmpsd': lambda ir, instr: cmps(ir, instr, 32), + 'scasb': lambda ir, instr: scas(ir, instr, 8), + 'scasw': lambda ir, instr: scas(ir, instr, 16), + 'scasd': lambda ir, instr: scas(ir, instr, 32), + 'pushfd': pushfd, + 'pushfw': pushfw, + 'popfd': popfd, + 'popfw': popfw, + 'pushad': pushad, + 'pusha': pushad, + 'popad': popad, + 'popa': popad, + 'call': call, + 'ret': ret, + 'retf': retf, + 'leave': leave, + 'enter': enter, + 'jmp': jmp, + 'jmpf': jmpf, + 'jz': jz, + 'je': jz, + 'jcxz': jcxz, + 'jecxz': jecxz, + 'jrcxz': jrcxz, + 'jnz': jnz, + 'jp': jp, + 'jpe': jp, + 'jnp': jnp, + 'ja': ja, + 'jae': jae, + 'jb': jb, + 'jbe': jbe, + 'jg': jg, + 'jge': jge, + 'jl': jl, + 'jle': jle, + 'js': js, + 'jns': jns, + 'jo': jo, + 'jno': jno, + 'jecxz': jecxz, + 'loop': loop, + 'loopne': loopne, + 'loope': loope, + 'div': div, + 'mul': mul, + 'imul': imul, + 'idiv': idiv, + + 'cbw': cbw, + 'cwde': cwde, + 'cdqe': cdqe, + + 'cwd': cwd, + 'cdq': cdq, + 'cqo': cqo, + + 'daa': daa, + 'aam': aam, + 'aad': aad, + 'aaa': aaa, + 'aas': aas, + 'shrd': shrd, + 'stosb': lambda ir, instr: stos(ir, instr, 8), + 'stosw': lambda ir, instr: stos(ir, instr, 16), + 'stosd': lambda ir, instr: stos(ir, instr, 32), + 'stosq': lambda ir, instr: stos(ir, instr, 64), + + 'lodsb': lambda ir, instr: lods(ir, instr, 8), + 'lodsw': lambda ir, instr: lods(ir, instr, 16), + 'lodsd': lambda ir, instr: lods(ir, instr, 32), + + 'movsb': lambda ir, instr: movs(ir, instr, 8), + 'movsw': lambda ir, instr: movs(ir, instr, 16), + 'movsd': lambda ir, instr: movs(ir, instr, 32), + 'movsq': lambda ir, instr: movs(ir, instr, 64), + 'fcomp': fcomp, + 'nop': nop, + 'fnop': nop, # XXX + 'hlt': hlt, + 'rdtsc': rdtsc, + 'fst': fst, + 'fstp': fstp, + 'fist': fist, + 'fistp': fistp, + 'fld': fld, + 'fldz': fldz, + 'fld1': fld1, + 'fldl2e': fldl2e, + 'fldlg2': fldlg2, + 'fild': fild, + 'fadd': fadd, + 'fninit': fninit, + 'faddp': faddp, + 'fsub': fsub, + 'fmul': fmul, + 'fmulp': fmulp, + 'fdiv': fdiv, + 'fdivr': fdivr, + 'fdivp': fdivp, + 'fxch': fxch, + 'fptan': fptan, + 'frndint': frndint, + 'fsin': fsin, + 'fcos': fcos, + 'fscale': fscale, + 'f2xm1': f2xm1, + 'fsqrt': fsqrt, + 'fabs': fabs, + 'fnstsw': fnstsw, + 'fnstcw': fnstcw, + 'fldcw': fldcw, + 'fwait': fwait, + 'fnstenv': fnstenv, + 'sidt': sidt, + 'sldt': sldt, + 'arpl': arpl, + 'cmovz': cmovz, + 'cmove': cmovz, + 'cmovnz': cmovnz, + 'cmovge': cmovge, + 'cmovnl': cmovge, + 'cmovg': cmovg, + 'cmovl': cmovl, + 'cmova': cmova, + 'cmovae': cmovae, + 'cmovbe': cmovbe, + 'cmovb': cmovb, + 'cmovnge': cmovl, + 'cmovle': cmovle, + 'cmovng': cmovle, + 'cmovo': cmovo, + 'cmovno': cmovno, + 'cmovs': cmovs, + 'cmovns': cmovns, + 'icebp': icebp, + 'int': l_int, + 'xlat': xlat, + 'bt': bt, + 'cpuid': cpuid, + 'jo': jo, + 'fcom': fcom, + 'ficom': ficom, + 'fcomi': fcomi, + 'fcomip': fcomip, + 'fucomi': fucomi, + 'fucomip': fucomip, + 'insb': lambda ir, instr: ins(ir, instr, 8), + 'insw': lambda ir, instr: ins(ir, instr, 16), + 'insd': lambda ir, instr: ins(ir, instr, 32), + 'btc': btc, + 'bts': bts, + 'btr': btr, + 'into': into, + 'in': l_in, + 'outsb': lambda ir, instr: l_outs(ir, instr, 8), + 'outsw': lambda ir, instr: l_outs(ir, instr, 16), + 'outsd': lambda ir, instr: l_outs(ir, instr, 32), + + 'out': l_out, + "sysenter": l_sysenter, + "cmpxchg": cmpxchg, + "lds": lds, + "les": les, + "lss": lss, + "lahf": lahf, + "sahf": sahf, + "lar": lar, + "lsl": lsl, + "fclex": fclex, + "fnclex": fnclex, + "str": l_str, + "movd": movd, + "movaps": movaps, + "xorps": xorps, + + "pminsw": pminsw, + "cvtsi2sd": cvtsi2sd, + "movss": movss, + + "ucomiss": ucomiss, + } + + +class ir_x86_16(ir): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_x86, 16, symbol_pool) + self.do_stk_segm = False + self.do_ds_segm = False + self.do_str_segm = False + self.do_all_segm = False + self.pc = IP + self.sp = SP + + def mod_pc(self, instr, instr_ir, extra_ir): + pass + + def get_ir(self, instr): + args = instr.args[:] + my_ss = None + if self.do_ds_segm: + my_ss = DS + if self.do_all_segm and instr.additional_info.g2.value: + my_ss = {1: CS, 2: SS, 3: DS, 4: ES, 5: FS, 6: GS}[ + instr.additional_info.g2.value] + if my_ss is not None: + for i, a in enumerate(args): + if isinstance(a, ExprMem) and not a.is_op_segm(): + args[i] = ExprMem(ExprOp('segm', my_ss, a.arg), a.size) + + dst, instr_ir, extra_ir = mnemo_func[ + instr.name.lower()](self, instr, *args) + self.mod_pc(instr, instr_ir, extra_ir) + + self.mod_pc(instr, instr_ir, extra_ir) + instr.additional_info.except_on_instr = False + if instr.additional_info.g1.value & 6 == 0 or \ + not instr.name in repeat_mn: + return dst, instr_ir, extra_ir + instr.additional_info.except_on_instr = True + # get instruction size + s = {"B": 8, "W": 16, "D": 32, 'Q': 64}[instr.name[-1]] + size = instr.v_opmode() + c_reg = mRCX[instr.mode][:size] + out_ir = [] + zf_val = None + # set if zf is tested (cmps, scas) + for e in instr_ir: # +[updt_c]: + if e.dst == zf: + zf_val = e.src + + # end condition + if zf_val is None: + c_cond = ExprCond(c_reg, ExprInt1(0), ExprInt1(1)) + elif instr.additional_info.g1.value & 2: # REPNE + # c_cond = ExprCond(c_reg, ExprInt1(0), ExprInt1(1)) | (zf_val) + c_cond = ExprCond(c_reg, ExprInt1(0), ExprInt1(1)) | (zf) + elif instr.additional_info.g1.value & 4: # REP + # c_cond = ExprCond(c_reg, ExprInt1(0), ExprInt1(1)) | + # (zf_val^ExprInt32(1)) + c_cond = ExprCond( + c_reg, ExprInt1(0), ExprInt1(1)) | (zf ^ ExprInt1(1)) + + # gen while + lbl_do = ExprId(self.gen_label(), instr.mode) + lbl_end = ExprId(self.gen_label(), instr.mode) + lbl_skip = ExprId(self.get_next_label(instr), instr.mode) + lbl_next = ExprId(self.get_next_label(instr), instr.mode) + + for b in extra_ir: + # print repr(b) + # print b + # self.replace_expr_in_ir(b, {lbl_next:lbl_end}) + b.dst = b.dst.replace_expr({lbl_next: lbl_end}) + # print b + + cond_bloc = [] + cond_bloc.append(ExprAff(c_reg, c_reg - ExprInt_from(c_reg, 1))) + cond_bloc = irbloc( + lbl_end.name, ExprCond(c_cond, lbl_skip, lbl_do), [cond_bloc]) + e_do = instr_ir + + c = irbloc(lbl_do.name, dst, [e_do]) + c.except_automod = False + return ExprCond(c_reg, lbl_do, lbl_skip), [], [cond_bloc, c] + extra_ir + + def expr_fix_regs_for_mode(self, e, mode=64): + return e.replace_expr(replace_regs[mode]) + + def expraff_fix_regs_for_mode(self, e, mode=64): + dst = self.expr_fix_regs_for_mode(e.dst, mode) + src = self.expr_fix_regs_for_mode(e.src, mode) + return ExprAff(dst, src) + + def irbloc_fix_regs_for_mode(self, irbloc, mode=64): + for irs in irbloc.irs: + for i, e in enumerate(irs): + """ + special case for 64 bits: + if destination is a 32 bit reg, zero extend the 64 bit reg + """ + if mode == 64: + if (isinstance(e.dst, ExprId) and e.dst.size == 32 and + e.dst in replace_regs[64]): + src = self.expr_fix_regs_for_mode(e.src, mode) + dst = replace_regs[64][e.dst].arg + e = ExprAff(dst, src.zeroExtend(64)) + irs[i] = self.expr_fix_regs_for_mode(e, mode) + irbloc.dst = self.expr_fix_regs_for_mode(irbloc.dst, mode) + + +class ir_x86_32(ir_x86_16): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_x86, 32, symbol_pool) + self.do_stk_segm = False + self.do_ds_segm = False + self.do_str_segm = False + self.do_all_segm = False + self.pc = EIP + self.sp = ESP + + +class ir_x86_64(ir_x86_16): + + def __init__(self, symbol_pool=None): + ir.__init__(self, mn_x86, 64, symbol_pool) + self.do_stk_segm = False + self.do_ds_segm = False + self.do_str_segm = False + self.do_all_segm = False + self.pc = RIP + self.sp = RSP + + def mod_pc(self, instr, instr_ir, extra_ir): + # fix RIP for 64 bit + for i, x in enumerate(instr_ir): + if x.dst != self.pc: + x.dst = x.dst.replace_expr( + {self.pc: ExprInt64(instr.offset + instr.l)}) + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt64(instr.offset + instr.l)})) + instr_ir[i] = x + for b in extra_ir: + for irs in b.irs: + for i, x in enumerate(irs): + if x.dst != self.pc: + x.dst = x.dst.replace_expr( + {self.pc: ExprInt64(instr.offset + instr.l)}) + x = ExprAff(x.dst, x.src.replace_expr( + {self.pc: ExprInt64(instr.offset + instr.l)})) + irs[i] = x |