diff options
Diffstat (limited to '')
| -rw-r--r-- | miasm2/core/__init__.py | 0 | ||||
| -rw-r--r-- | miasm2/core/asmbloc.py | 1412 | ||||
| -rw-r--r-- | miasm2/core/bin_stream.py | 175 | ||||
| -rw-r--r-- | miasm2/core/cpu.py | 1804 | ||||
| -rw-r--r-- | miasm2/core/graph.py | 126 | ||||
| -rw-r--r-- | miasm2/core/interval.py | 238 | ||||
| -rw-r--r-- | miasm2/core/parse_asm.py | 237 | ||||
| -rw-r--r-- | miasm2/core/utils.py | 39 |
8 files changed, 4031 insertions, 0 deletions
diff --git a/miasm2/core/__init__.py b/miasm2/core/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/miasm2/core/__init__.py diff --git a/miasm2/core/asmbloc.py b/miasm2/core/asmbloc.py new file mode 100644 index 00000000..945eb990 --- /dev/null +++ b/miasm2/core/asmbloc.py @@ -0,0 +1,1412 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import logging +import miasm2.expression.expression as m2_expr +from miasm2.expression.modint import moduint, modint +from miasm2.core.graph import DiGraph +from utils import Disasm_Exception +from miasm2.core.graph import DiGraph +import inspect + +log_asmbloc = logging.getLogger("asmbloc") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log_asmbloc.addHandler(console_handler) +log_asmbloc.setLevel(logging.WARNING) + + +def whoami(): + return inspect.stack()[2][3] + + +def is_int(a): + return isinstance(a, int) or isinstance(a, long) or \ + isinstance(a, moduint) or isinstance(a, modint) + + +def expr_is_label(e): + if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label): + return True + return False + + +def expr_is_int_or_label(e): + if isinstance(e, m2_expr.ExprInt): + return True + if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label): + return True + return False + + +class asm_label: + + def __init__(self, name="", offset=None): + # print whoami() + self.fixedblocs = False + if is_int(name): + name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) + self.name = name + self.attrib = None + if offset is None: + self.offset = offset + else: + self.offset = int(offset) + self._hash = hash((self.name, self.offset)) + + def __str__(self): + if isinstance(self.offset, (int, long)): + return "%s:0x%08x" % (self.name, self.offset) + else: + return "%s:%s" % (self.name, str(self.offset)) + + def __repr__(self): + rep = '<asmlabel ' + if self.name: + rep += repr(self.name) + ' ' + rep += '>' + return rep + + def __hash__(self): + return self._hash + + def __eq__(self, a): + if isinstance(a, asm_label): + return self._hash == a._hash + else: + return False + + +class asm_raw: + + def __init__(self, raw=""): + self.raw = raw + + def __str__(self): + return repr(self.raw) + + +class asm_constraint(object): + c_to = "c_to" + c_next = "c_next" + c_bad = "c_bad" + + def __init__(self, label=None, c_t=c_to): + self.label = label + self.c_t = c_t + self._hash = hash((self.label, self.c_t)) + + def __str__(self): + return "%s:%s" % (str(self.c_t), str(self.label)) + + def __hash__(self): + return self._hash + + def __eq__(self, a): + if isinstance(a, asm_constraint): + return self._hash == a._hash + else: + return False + + +class asm_constraint_next(asm_constraint): + + def __init__(self, label=None): + super(asm_constraint_next, self).__init__( + label, c_t=asm_constraint.c_next) + + +class asm_constraint_to(asm_constraint): + + def __init__(self, label=None): + super(asm_constraint_to, self).__init__( + label, c_t=asm_constraint.c_to) + + +class asm_constraint_bad(asm_constraint): + + def __init__(self, label=None): + super(asm_constraint_bad, self).__init__( + label, c_t=asm_constraint.c_bad) + + +class asm_bloc: + + def __init__(self, label=None): + self.bto = set() + self.lines = [] + self.label = label + + def __str__(self): + out = [] + out.append(str(self.label)) + for l in self.lines: + out.append(str(l)) + if self.bto: + lbls = ["->"] + for l in self.bto: + if l is None: + lbls.append("Unknown? ") + else: + lbls.append(str(l) + " ") + lbls = '\t'.join(lbls) + out.append(lbls) + return '\n'.join(out) + + def addline(self, l): + self.lines.append(l) + + def addto(self, c): + assert(type(self.bto) is set) + self.bto.add(c) + + def split(self, offset, l): + log_asmbloc.debug('split at %x' % offset) + i = -1 + offsets = [x.offset for x in self.lines] + if not l.offset in offsets: + log_asmbloc.warning( + 'cannot split bloc at %X ' % offset + + 'middle instruction? default middle') + offsets.sort() + return None + new_bloc = asm_bloc(l) + i = offsets.index(offset) + + self.lines, new_bloc.lines = self.lines[:i], self.lines[i:] + flow_mod_instr = self.get_flow_instr() + log_asmbloc.debug('flow mod %r' % flow_mod_instr) + c = asm_constraint(l, asm_constraint.c_next) + # move dst if flowgraph modifier was in original bloc + # (usecase: split delayslot bloc) + if flow_mod_instr: + for xx in self.bto: + log_asmbloc.debug('lbl %s' % xx) + c_next = set( + [x for x in self.bto if x.c_t == asm_constraint.c_next]) + c_to = [x for x in self.bto if x.c_t != asm_constraint.c_next] + self.bto = set([c] + c_to) + new_bloc.bto = c_next + else: + new_bloc.bto = self.bto + self.bto = set([c]) + return new_bloc + + def get_range(self): + if len(self.lines): + return self.lines[0].offset, self.lines[-1].offset + else: + return 0, 0 + + def get_offsets(self): + return [x.offset for x in self.lines] + + def add_cst(self, offset, c_t, symbol_pool): + if type(offset) in [int, long]: + l = symbol_pool.getby_offset_create(offset) + elif type(offset) is str: + l = symbol_pool.getby_name_create(offset) + elif isinstance(offset, asm_label): + l = offset + else: + raise ValueError('unknown offset type %r' % offset) + c = asm_constraint(l, c_t) + self.bto.add(c) + + def get_flow_instr(self): + if not self.lines: + return None + for i in xrange(-1, -1 - self.lines[0].delayslot - 1, -1): + l = self.lines[i] + if l.splitflow() or l.breakflow(): + raise NotImplementedError('not fully functional') + return l + + def get_subcall_instr(self): + if not self.lines: + return None + for i in xrange(-1, -1 - self.lines[0].delayslot - 1, -1): + l = self.lines[i] + if l.is_subcall(): + return l + + def get_next(self): + for x in self.bto: + if x.c_t == asm_constraint.c_next: + return x.label + return None + + +class asm_symbol_pool: + + def __init__(self, no_collision=True): + self.labels = [] + self.s = {} + self.s_offset = {} + self.no_collision = no_collision + self.label_num = 0 + + def add_label(self, name="", offset=None): + """ + This should be the only method to create new asm_label objects + """ + l = asm_label(name, offset) + collision = None + if l.offset in self.s_offset and l != self.s_offset[l.offset]: + collision = 'offset' + if l.name in self.s and l != self.s[l.name]: + collision = 'name' + if self.no_collision and collision == 'offset': + raise ValueError('symbol %s has same offset as %s' % + (l, self.s_offset[l.offset])) + if self.no_collision and collision == 'name': + raise ValueError( + 'symbol %s has same name as %s' % (l, self.s[l.name])) + self.labels.append(l) + if l.offset is not None: + self.s_offset[l.offset] = l + if l.name != "": + self.s[l.name] = l + return l + + def remove(self, obj): + """ + obj can be an asm_label or an offset + """ + if isinstance(obj, asm_label): + if obj.name in self.s: + del(self.s[obj.name]) + if obj.offset is not None and obj.offset in self.s_offset: + del(self.s_offset[obj.offset]) + else: + offset = int(obj) + if offset in self.s_offset: + obj = self.s_offset[offset] + del(self.s_offset[offset]) + if obj.name in self.s: + del(self.s[obj.name]) + + def del_offset(self, l=None): + if l is not None: + if l.offset in self.s_offset: + del(self.s_offset[l.offset]) + l.offset = None + else: + self.s_offset = {} + for l in self.s: + self.s[l].offset = None + + def getby_offset(self, offset): + return self.s_offset.get(offset, None) + + def getby_name(self, name): + return self.s.get(name, None) + + def getby_name_create(self, name): + l = self.getby_name(name) + if l is None: + l = self.add_label(name) + return l + + def getby_offset_create(self, offset): + l = self.getby_offset(offset) + if l is None: + l = self.add_label(offset, offset) + return l + + def rename(self, s, newname): + if not s.name in self.s: + log_asmbloc.warn('unk symb') + return + del(self.s[s.name]) + s.name = newname + self.s[s.name] = s + + def set_offset(self, label, offset): + # Note that there is a special case when the offset is a list + # it happens when offsets are recomputed in resolve_symbol* + if not label in self.labels: + raise ValueError('label %s not in symbol pool' % label) + if not isinstance(label.offset, list) and label.offset in self.s_offset: + del(self.s_offset[label.offset]) + label.offset = offset + if not isinstance(label.offset, list): + self.s_offset[label.offset] = label + + def items(self): + return self.labels[:] + + def __str__(self): + return reduce(lambda x, y: x + str(y) + '\n', self.labels, "") + + def __in__(self, obj): + if obj in self.s: + return True + if obj in self.s_offset: + return True + return False + + def __getitem__(self, item): + if item in self.s: + return self.s[item] + if item in self.s_offset: + return self.s_offset[item] + raise KeyError('unknown symbol %r' % item) + + def __contains__(self, item): + return item in self.s or item in self.s_offset + + def merge(self, symbol_pool): + self.labels += symbol_pool.labels + self.s.update(symbol_pool.s) + self.s_offset.update(symbol_pool.s_offset) + + def gen_label(self): + l = self.add_label("lbl_gen_%.8X" % (self.label_num)) + self.label_num += 1 + return l + + +def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool, + dont_dis=[], split_dis=[ + ], follow_call=False, patch_instr_symb=True, + dontdis_retcall=False, lines_wd=None, + dis_bloc_callback=None, dont_dis_nulstart_bloc=False, + attrib={}): + # pool_bin.offset = offset + lines_cpt = 0 + in_delayslot = False + delayslot_count = mnemo.delayslot + offsets_to_dis = set() + add_next_offset = False + log_asmbloc.debug("dis at %X" % int(offset)) + while not in_delayslot or delayslot_count > 0: + if in_delayslot: + delayslot_count -= 1 + + if offset in dont_dis or (lines_cpt > 0 and offset in split_dis): + cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool) + offsets_to_dis.add(offset) + break + + lines_cpt += 1 + if lines_wd is not None and lines_cpt > lines_wd: + # log_asmbloc.warning( "lines watchdog reached at %X"%int(offset)) + break + + if offset in job_done: + cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool) + break + + off_i = offset + try: + # print repr(pool_bin.getbytes(offset, 4)) + instr = mnemo.dis(pool_bin, attrib, offset) + except (Disasm_Exception, IOError), e: + log_asmbloc.warning(e) + instr = None + + if instr is None: + log_asmbloc.warning("cannot disasm at %X" % int(off_i)) + cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool) + break + + # XXX TODO nul start block option + if dont_dis_nulstart_bloc and instr.b.count('\x00') == instr.l: + log_asmbloc.warning("reach nul instr at %X" % int(off_i)) + cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool) + break + + # special case: flow graph modificator in delayslot + if in_delayslot and instr and (instr.splitflow() or instr.breakflow()): + add_next_offset = True + break + + job_done.add(offset) + log_asmbloc.debug("dis at %X" % int(offset)) + + offset += instr.l + log_asmbloc.debug(instr) + log_asmbloc.debug(instr.args) + + cur_bloc.addline(instr) + if not instr.breakflow(): + continue + # test split + if instr.splitflow() and not (instr.is_subcall() and dontdis_retcall): + add_next_offset = True + # cur_bloc.add_cst(n, asm_constraint.c_next, symbol_pool) + pass + if instr.dstflow(): + instr.dstflow2label(symbol_pool) + dst = instr.getdstflow(symbol_pool) + dstn = [] + for d in dst: + if isinstance(d, m2_expr.ExprId) and isinstance(d.name, asm_label): + dstn.append(d.name) + dst = dstn + if (not instr.is_subcall()) or follow_call: + cur_bloc.bto.update( + [asm_constraint(x, asm_constraint.c_to) for x in dst]) + + # get in delayslot mode + in_delayslot = True + delayslot_count = instr.delayslot + + for c in cur_bloc.bto: + if c.c_t == asm_constraint.c_bad: + continue + if isinstance(c.label, asm_label): + offsets_to_dis.add(c.label.offset) + + if add_next_offset: + cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool) + offsets_to_dis.add(offset) + + if dis_bloc_callback is not None: + dis_bloc_callback( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) + # print 'dst', [hex(x) for x in offsets_to_dis] + return offsets_to_dis + + +def split_bloc(mnemo, attrib, pool_bin, blocs, + symbol_pool, more_ref=None, dis_bloc_callback=None): + i = -1 + err = False + if not more_ref: + more_ref = [] + + # get all possible dst + bloc_dst = [symbol_pool.s_offset[x] for x in more_ref] + for b in blocs: + for c in b.bto: + if not isinstance(c.label, asm_label): + continue + if c.c_t == asm_constraint.c_bad: + continue + bloc_dst.append(c.label) + + bloc_dst = [x.offset for x in bloc_dst if x.offset is not None] + + j = -1 + while j < len(blocs) - 1: + j += 1 + cb = blocs[j] + a, b = cb.get_range() + + for off in bloc_dst: + if not (off > a and off <= b): + continue + l = symbol_pool.getby_offset_create(off) + new_b = cb.split(off, l) + log_asmbloc.debug("split bloc %x" % off) + if new_b is None: + log_asmbloc.error("cannot split %x!!" % off) + err = True + break + if dis_bloc_callback: + offsets_to_dis = set( + [x.label.offset for x in new_b.bto + if isinstance(x.label, asm_label)]) + dis_bloc_callback( + mnemo, attrib, pool_bin, new_b, offsets_to_dis, + symbol_pool) + blocs.append(new_b) + a, b = cb.get_range() + + """ + if err: + break + """ + return blocs + + +def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[], + split_dis=[], follow_call=False, patch_instr_symb=True, + dontdis_retcall=False, + blocs_wd=None, lines_wd=None, blocs=None, + dis_bloc_callback=None, dont_dis_nulstart_bloc=False, + attrib={}): + log_asmbloc.info("dis bloc all") + if blocs is None: + blocs = [] + todo = [offset] + + bloc_cpt = 0 + while len(todo): + bloc_cpt += 1 + if blocs_wd is not None and bloc_cpt > blocs_wd: + log_asmbloc.debug("blocs watchdog reached at %X" % int(offset)) + break + + n = int(todo.pop(0)) + if n is None: + continue + if n in job_done: + continue + + if n in dont_dis: + continue + dd_flag = False + for dd in dont_dis: + if not isinstance(dd, tuple): + continue + dd_a, dd_b = dd + if dd_a <= n < dd_b: + dd_flag = True + break + if dd_flag: + continue + l = symbol_pool.getby_offset_create(n) + cur_bloc = asm_bloc(l) + todo += dis_bloc(mnemo, pool_bin, cur_bloc, n, job_done, symbol_pool, + dont_dis, split_dis, follow_call, patch_instr_symb, + dontdis_retcall, + dis_bloc_callback=dis_bloc_callback, + lines_wd=lines_wd, + dont_dis_nulstart_bloc=dont_dis_nulstart_bloc, + attrib=attrib) + blocs.append(cur_bloc) + + return split_bloc(mnemo, attrib, pool_bin, blocs, + symbol_pool, dis_bloc_callback=dis_bloc_callback) + + +def bloc2graph(blocs, label=False, lines=True): + # rankdir=LR; + out = """ +digraph asm_graph { +size="80,50"; +node [ +fontsize = "16", +shape = "box" +]; +""" + for b in blocs: + out += '%s [\n' % b.label.name + out += 'label = "' + + out += b.label.name + "\\l\\\n" + if lines: + for l in b.lines: + if label: + out += "%.8X " % l.offset + out += ("%s\\l\\\n" % l).replace('"', '\\"') + out += '"\n];\n' + + for b in blocs: + for n in b.bto: + # print 'xxxx', n.label, n.label.__class__ + # if isinstance(n.label, ExprId): + # print n.label.name, n.label.name.__class__ + if isinstance(n.label, m2_expr.ExprId): + dst, name, cst = b.label.name, n.label.name, n.c_t + # out+='%s -> %s [ label = "%s" ];\n'%(b.label.name, + # n.label.name, n.c_t) + elif isinstance(b.label, asm_label): + dst, name, cst = b.label.name, n.label.name, n.c_t + else: + continue + out += '%s -> %s [ label = "%s" ];\n' % (dst, name, cst) + + out += "}" + return out + + +def conservative_asm(mnemo, mode, instr, symbols, conservative): + """ + Asm instruction; + Try to keep original instruction bytes if it exists + """ + candidates = mnemo.asm(instr, symbols) + if not candidates: + raise ValueError('cannot asm:%s' % str(instr)) + if not hasattr(instr, "b"): + return candidates[0], candidates + if instr.b in candidates: + return instr.b, candidates + if conservative: + for c in candidates: + if len(c) == len(instr.b): + return c, candidates + return candidates[0], candidates + + +def guess_blocs_size(mnemo, mode, blocs, symbols): + """ + Asm and compute max bloc length + """ + for b in blocs: + log_asmbloc.debug('---') + blen = 0 + blen_max = 0 + for instr in b.lines: + if isinstance(instr, asm_raw): + candidates = [instr.raw] + c = instr.raw + data = c + l = len(c) + else: + l = mnemo.max_instruction_len + data = None + instr.data = data + instr.l = l + blen += l + + b.blen = blen + # bloc with max rel values encoded + b.blen_max = blen + blen_max + log_asmbloc.info("blen: %d max: %d" % (b.blen, b.blen_max)) + + +def group_blocs(blocs): + """ + this function group asm blocs with next constraints + """ + log_asmbloc.info('group_blocs') + # group adjacent blocs + rest = blocs[:] + groups_bloc = {} + d = dict([(x.label, x) for x in rest]) + log_asmbloc.debug([str(x.label) for x in rest]) + + while rest: + b = [rest.pop()] + # find recursive son + fini = False + while not fini: + fini = True + for c in b[-1].bto: + if c.c_t != asm_constraint.c_next: + continue + if c.label in d and d[c.label] in rest: + b.append(d[c.label]) + rest.remove(d[c.label]) + fini = False + break + # check if son in group: + found_in_group = False + for c in b[-1].bto: + if c.c_t != asm_constraint.c_next: + continue + if c.label in groups_bloc: + b += groups_bloc[c.label] + del(groups_bloc[c.label]) + groups_bloc[b[0].label] = b + found_in_group = True + break + + if not found_in_group: + groups_bloc[b[0].label] = b + + # create max label range for bigbloc + for l in groups_bloc: + l.total_max_l = reduce(lambda x, y: x + y.blen_max, groups_bloc[l], 0) + log_asmbloc.debug(("offset totalmax l", l.offset, l.total_max_l)) + if is_int(l.offset): + hof = hex(int(l.offset)) + else: + hof = l.name + log_asmbloc.debug(("offset totalmax l", hof, l.total_max_l)) + return groups_bloc + + +def gen_free_space_intervals(f, max_offset=0xFFFFFFFF): + interval = {} + offset_label = dict([(x.offset_free, x) for x in f]) + offset_label_order = offset_label.keys() + offset_label_order.sort() + offset_label_order.append(max_offset) + offset_label_order.reverse() + + unfree_stop = 0L + while len(offset_label_order) > 1: + offset = offset_label_order.pop() + offset_end = offset + f[offset_label[offset]] + prev = 0 + if unfree_stop > offset_end: + space = 0 + else: + space = offset_label_order[-1] - offset_end + if space < 0: + space = 0 + interval[offset_label[offset]] = space + if offset_label_order[-1] in offset_label: + prev = offset_label[offset_label_order[-1]] + prev = f[prev] + + interval[offset_label[offset]] = space + + unfree_stop = max( + unfree_stop, offset_end, offset_label_order[-1] + prev) + return interval + + +def add_dont_erase(f, dont_erase=[]): + tmp_symbol_pool = asm_symbol_pool() + for a, b in dont_erase: + l = tmp_symbol_pool.add_label(a, a) + l.offset_free = a + f[l] = b - a + return + + +def gen_non_free_mapping(group_bloc, dont_erase=[]): + non_free_mapping = {} + # calculate free space for bloc placing + for g in group_bloc: + rest_len = 0 + g.fixedblocs = False + # if a label in the group is fixed + diff_offset = 0 + for b in group_bloc[g]: + if not is_int(b.label.offset): + diff_offset += b.blen_max + continue + g.fixedblocs = True + g.offset_free = b.label.offset - diff_offset + break + if g.fixedblocs: + non_free_mapping[g] = g.total_max_l + + log_asmbloc.debug("non free bloc:") + log_asmbloc.debug(non_free_mapping) + add_dont_erase(non_free_mapping, dont_erase) + log_asmbloc.debug("non free more:") + log_asmbloc.debug(non_free_mapping) + return non_free_mapping + + +def resolve_symbol( + group_bloc, symbol_pool, dont_erase=[], max_offset=0xFFFFFFFF): + """ + place all asmblocs + """ + log_asmbloc.info('resolve_symbol') + log_asmbloc.info(str(dont_erase)) + bloc_list = [] + unr_bloc = reduce(lambda x, y: x + group_bloc[y], group_bloc, []) + ending_ad = [] + + non_free_mapping = gen_non_free_mapping(group_bloc, dont_erase) + free_interval = gen_free_space_intervals(non_free_mapping, max_offset) + log_asmbloc.debug(free_interval) + + # first big ones + g_tab = [(x.total_max_l, x) for x in group_bloc] + g_tab.sort() + g_tab.reverse() + g_tab = [x[1] for x in g_tab] + + # g_tab => label of grouped blov + # group_bloc => dict of grouped bloc labeled-key + + # first, near callee placing algo + for g in g_tab: + if g.fixedblocs: + continue + finish = False + for x in group_bloc: + if not x in free_interval.keys(): + continue + if free_interval[x] < g.total_max_l: + continue + + for b in group_bloc[x]: + for c in b.bto: + if c.label == g: + tmp = free_interval[x] - g.total_max_l + log_asmbloc.debug( + "consumed %d rest: %d" % (g.total_max_l, int(tmp))) + free_interval[g] = tmp + del(free_interval[x]) + symbol_pool.set_offset( + g, [group_bloc[x][-1].label, group_bloc[x][-1], 1]) + g.fixedblocs = True + finish = True + break + if finish: + break + if finish: + break + + # second, bigger in smaller algo + for g in g_tab: + if g.fixedblocs: + continue + # chose smaller free_interval first + k_tab = [(free_interval[x], x) for x in free_interval] + k_tab.sort() + k_tab = [x[1] for x in k_tab] + # choose free_interval + for k in k_tab: + if g.total_max_l > free_interval[k]: + continue + symbol_pool.set_offset( + g, [group_bloc[k][-1].label, group_bloc[k][-1], 1]) + tmp = free_interval[k] - g.total_max_l + log_asmbloc.debug( + "consumed %d rest: %d" % (g.total_max_l, int(tmp))) + free_interval[g] = tmp + del(free_interval[k]) + + g.fixedblocs = True + break + + while unr_bloc: + # propagate know offset + resolving = False + i = 0 + while i < len(unr_bloc): + if unr_bloc[i].label.offset is None: + i += 1 + continue + resolving = True + log_asmbloc.info("bloc %s resolved" % unr_bloc[i].label) + bloc_list.append((unr_bloc[i], 0)) + g_found = None + for g in g_tab: + if unr_bloc[i] in group_bloc[g]: + if g_found is not None: + raise ValueError('blocin multiple group!!!') + g_found = g + my_group = group_bloc[g_found] + + index = my_group.index(unr_bloc[i]) + if index > 0 and my_group[index - 1] in unr_bloc: + symbol_pool.set_offset( + my_group[index - 1].label, + [unr_bloc[i].label, unr_bloc[i - 1], -1]) + if index < len(my_group) - 1 and my_group[index + 1] in unr_bloc: + symbol_pool.set_offset( + my_group[index + 1].label, + [unr_bloc[i].label, unr_bloc[i], 1]) + del unr_bloc[i] + + if not resolving: + log_asmbloc.warn("cannot resolve symbol! (no symbol fix found)") + else: + continue + + for g in g_tab: + print g + if g.fixedblocs: + print "fixed" + else: + print "not fixed" + raise ValueError('enable to fix bloc') + return bloc_list + + +def calc_symbol_offset(symbol_pool): + s_to_use = set() + + s_dependent = {} + + for label in symbol_pool.items(): + if label.offset is None: + # raise ValueError("symbol missing?", label) + #print "symbol missing?? %s" % label + label.offset_g = None + continue + if not is_int(label.offset): + # construct dependant blocs tree + s_d = label.offset[0] + if not s_d in s_dependent: + s_dependent[s_d] = set() + s_dependent[s_d].add(label) + else: + s_to_use.add(label) + label.offset_g = label.offset + + while s_to_use: + label = s_to_use.pop() + if not label in s_dependent: + continue + for l in s_dependent[label]: + if label.offset_g is None: + raise ValueError("unknown symbol: %s" % str(label.name)) + l.offset_g = label.offset_g + l.offset_g[1].blen * l.offset_g[2] + s_to_use.add(l) + + +def asmbloc_final(mnemo, mode, blocs, symbol_pool, symb_reloc_off=None, conservative = False): + log_asmbloc.info("asmbloc_final") + if symb_reloc_off is None: + symb_reloc_off = {} + fini = False + # asm with minimal instr len + # check if dst label are ok to this encoded form + # recompute if not + # TODO XXXX: implement todo list to remove n^high complexity! + while fini is not True: + + fini = True + my_symb_reloc_off = {} + + calc_symbol_offset(symbol_pool) + + symbols = asm_symbol_pool() + for s, v in symbol_pool.s.items(): + symbols.add_label(s, v.offset_g) + # print symbols + # test if bad encoded relative + for b, t in blocs: + + offset_i = 0 + blen = 0 + my_symb_reloc_off[b.label] = [] + for instr in b.lines: + if isinstance(instr, asm_raw): + offset_i += instr.l # len(instr.data) + continue + # if not [True for a in instr.arg if mnemo.has_symb(a)]: + # offset_i+=len(instr.data) + # continue + sav_a = instr.args[:] # [a.expr for a in instr.args] + # print [str(x) for x in sav_a] + args_e = instr.resolve_args_with_symbols(symbols) + for i, e in enumerate(args_e): + # print 'ee', e.size, e + instr.args[i] = e + + instr.offset = b.label.offset_g + offset_i + if instr.dstflow(): + # instr.l = len(instr.data) + instr.fixDstOffset() + """ + lbls = {} + xxx = instr.getdstflow() + if len(xxx) !=1: + raise ValueError('multi dst ?!') + label = mnemo.get_label(xxx[0]) + is_mem = mnemo.is_mem(xxx[0]) + lbls[label.name] = label.offset_g + instr.fixdst(lbls, b.label.offset_g+b.blen, is_mem) + """ + # else: + # instr.arg = [mnemo.fix_symbol(a, symbol_pool) + # for a in instr.arg] + # pass + symbol_reloc_off = [] + old_l = instr.l + c, candidates = conservative_asm( + mnemo, mode, instr, symbol_reloc_off, conservative) + + # print "XXXX", instr + # print candidates + for i, e in enumerate(sav_a): + instr.args[i] = e + + if len(c) != instr.l: + # good len, bad offset...XXX + b.blen = b.blen - old_l + len(c) + instr.data = c + instr.l = len(c) + fini = False + continue + found = False + for cpos, c in enumerate(candidates): + # if len(c) == len(instr.data): + if len(c) == instr.l: + # print 'UPDD', repr(instr.data), repr(c) + # b.blen = b.blen-old_l+len(c) + instr.data = c + instr.l = len(c) + + found = True + break + if not found: + raise ValueError('something wrong in instr.data') + + if cpos < len(symbol_reloc_off): + my_s = symbol_reloc_off[cpos] + else: + my_s = None + + if my_s is not None: + my_symb_reloc_off[b.label].append(offset_i + my_s) + offset_i += instr.l + blen += instr.l + assert(len(instr.data) == instr.l) + # we have fixed all relative values + # recompute good offsets + for label in symbol_pool.items(): + # if label.offset_g is None: + # fdfd + symbol_pool.set_offset(label, label.offset_g) + + for a, b in my_symb_reloc_off.items(): + symb_reloc_off[a] = b + + +def asm_resolve_final(mnemo, mode, blocs, symbol_pool, dont_erase=[], + max_offset=0xFFFFFFFF, + symb_reloc_off=None, constrain_pos=False): + if symb_reloc_off is None: + symb_reloc_off = {} + # asmbloc(mnemo, mode, blocs, symbol_pool) + guess_blocs_size(mnemo, mode, blocs, symbol_pool) + bloc_g = group_blocs(blocs) + + resolved_b = resolve_symbol(bloc_g, symbol_pool, dont_erase=dont_erase, + max_offset=max_offset) + + asmbloc_final(mnemo, mode, resolved_b, symbol_pool, symb_reloc_off) + written_bytes = {} + patches = {} + for b, t in resolved_b: + offset = b.label.offset + for i in b.lines: + assert(i.data is not None) + patches[offset] = i.data + for c in range(i.l): + if offset + c in written_bytes: + raise ValueError( + "overlapping bytes in asssembly %X" % int(offset)) + written_bytes[offset + c] = 1 + i.offset = offset + i.l = i.l + offset += i.l + + return resolved_b, patches + + +def blist2graph(ab): + """ + ab: list of asmbloc + return: graph of asmbloc + """ + g = DiGraph() + g.lbl2bloc = {} + for b in ab: + g.lbl2bloc[b.label] = b + g.add_node(b.label) + for x in b.bto: + g.add_edge(b.label, x.label) + return g + + +class basicblocs: + + def __init__(self, ab=[]): + self.blocs = {} + self.g = DiGraph() + self.add_blocs(ab) + + def add(self, b): + self.blocs[b.label] = b + self.g.add_node(b.label) + for dst in b.bto: + if isinstance(dst.label, asm_label): + self.g.add_edge(b.label, dst.label) + + def add_blocs(self, ab): + for b in ab: + self.add(b) + + def get_bad_dst(self): + o = set() + for b in self.blocs.values(): + for c in b.bto: + if c.c_t == asm_constraint.c_bad: + o.add(b) + return o + + +def find_parents(blocs, l): + p = set() + for b in blocs: + if l in [x.label for x in b.bto if isinstance(x.label, asm_label)]: + p.add(b.label) + return p + + +def bloc_blink(blocs): + for b in blocs: + b.parents = find_parents(blocs, b.label) + + +def getbloc_around(blocs, a, level=3, done=None, blocby_label=None): + + if not blocby_label: + blocby_label = {} + for b in blocs: + blocby_label[b.label] = b + if done is None: + done = set() + + done.add(a) + if not level: + return done + for b in a.parents: + b = blocby_label[b] + if b in done: + continue + done.update(getbloc_around(blocs, b, level - 1, done, blocby_label)) + for b in a.bto: + b = blocby_label[b.label] + if b in done: + continue + done.update(getbloc_around(blocs, b, level - 1, done, blocby_label)) + return done + + +def getbloc_parents(blocs, a, level=3, done=None, blocby_label=None): + + if not blocby_label: + blocby_label = {} + for b in blocs: + blocby_label[b.label] = b + if done is None: + done = set() + + done.add(a) + if not level: + return done + for b in a.parents: + b = blocby_label[b] + if b in done: + continue + done.update(getbloc_parents(blocs, b, level - 1, done, blocby_label)) + return done + +# get ONLY level_X parents + + +def getbloc_parents_strict( + blocs, a, level=3, rez=None, done=None, blocby_label=None): + + if not blocby_label: + blocby_label = {} + for b in blocs: + blocby_label[b.label] = b + if rez is None: + rez = set() + if done is None: + done = set() + + done.add(a) + if level == 0: + rez.add(a) + if not level: + return rez + for b in a.parents: + b = blocby_label[b] + if b in done: + continue + rez.update(getbloc_parents_strict( + blocs, b, level - 1, rez, done, blocby_label)) + return rez + + +def bloc_find_path_next(blocs, blocby_label, a, b, path=None): + if path == None: + path = [] + if a == b: + return [path] + + all_path = [] + for x in a.bto: + if x.c_t != asm_constraint.c_next: + continue + if not x.label in blocby_label: + print 'XXX unknown label' + continue + x = blocby_label[x.label] + all_path += bloc_find_path_next(blocs, blocby_label, x, b, path + [a]) + # stop if at least one path found + if all_path: + return all_path + return all_path + + +def bloc_merge(blocs, symbol_pool, dont_merge=[]): + i = -1 + """ + # TODO XXXX implement find all path for digraph + + g = blist2graph(blocs) + g.lbl2node = dict([(b.label, b) for b in blocs]) + + while i<len(blocs)-1: + i+=1 + b = blocs[i] + if b.label in dont_merge: + continue + + successors = [x for x in g.successors(b.label)] + predecessors = [x for x in g.predecessors(b.label)] + # if bloc doesn't self ref + if b.label in successors: + continue + # and bloc has only one parent + if len(predecessors) != 1: + continue + # may merge + bpl = predecessors[0] + + # and parent has only one son + p_s = [x for x in g.successors(bpl)] + if len(p_s)!=1: + continue + + bp = g.lbl2node[bpl] + # and parent has not a next constraint yet + found = False + for gpl in g.predecessors(bpl): + gp = g.lbl2node[gpl] + for x in gp.bto: + if x.c_t != asm_constraint.c_next: + continue + if x.label == bpl: + found = True + break + if found: + break + if found: + continue + if bp.lines: + l = bp.lines[-1] + #jmp opt; jcc opt + if l.is_subcall(): + continue + if l.breakflow() and l.dstflow(): + bp.lines.pop() + #merge + #sons = b.bto[:] + + # update parents + for s in b.bto: + if not isinstance(s.label, asm_label): continue + if s.label.name == None: + continue + if not s.label in g.lbl2node: + print "unknown parent XXX" + continue + bs = g.lbl2node[s.label] + for p in g.predecessors(bs.label): + if p == b.label: + bs.parents.discard(p) + bs.parents.add(bp.label) + bp.lines+=b.lines + bp.bto = b.bto + #symbol_pool.remove(b.label) + del(blocs[i]) + i = -1 + + return + """ + blocby_label = {} + for b in blocs: + blocby_label[b.label] = b + b.parents = find_parents(blocs, b.label) + + while i < len(blocs) - 1: + i += 1 + b = blocs[i] + if b.label in dont_merge: + continue + p = set(b.parents) + # if bloc dont self ref + if b.label in p: + continue + # and bloc has only one parent + if len(p) != 1: + continue + # may merge + bpl = p.pop() + # bp = getblocby_label(blocs, bpl) + bp = blocby_label[bpl] + # and parent has only one son + if len(bp.bto) != 1: + continue + """ + and will not create next loop composed of constraint_next from son to + parent + """ + path = bloc_find_path_next(blocs, blocby_label, b, bp) + if path: + continue + if bp.lines: + l = bp.lines[-1] + # jmp opt; jcc opt + if l.is_subcall(): + continue + if l.breakflow() and l.dstflow(): + bp.lines.pop() + # merge + # sons = b.bto[:] + + # update parents + for s in b.bto: + if not isinstance(s.label, asm_label): + continue + if s.label.name == None: + continue + if not s.label in blocby_label: + print "unknown parent XXX" + continue + bs = blocby_label[s.label] + for p in list(bs.parents): + if p == b.label: + bs.parents.discard(p) + bs.parents.add(bp.label) + bp.lines += b.lines + bp.bto = b.bto + # symbol_pool.remove(b.label) + del(blocs[i]) + i = -1 + + +class disasmEngine(object): + + def __init__(self, arch, attrib, bs=None, **kwargs): + self.arch = arch + self.attrib = attrib + self.bs = bs + self.symbol_pool = asm_symbol_pool() + self.dont_dis = [] + self.split_dis = [] + self.follow_call = False + self.patch_instr_symb = True + self.dontdis_retcall = False + self.lines_wd = None + self.blocs_wd = None + self.dis_bloc_callback = None + self.dont_dis_nulstart_bloc = False + self.job_done = set() + self.__dict__.update(kwargs) + + def dis_bloc(self, offset): + job_done = set() + l = self.symbol_pool.getby_offset_create(offset) + current_bloc = asm_bloc(l) + dis_bloc(self.arch, self.bs, current_bloc, offset, self.job_done, + self.symbol_pool, + dont_dis=self.dont_dis, split_dis=self.split_dis, + follow_call=self.follow_call, + patch_instr_symb=self.patch_instr_symb, + dontdis_retcall=self.dontdis_retcall, + lines_wd=self.lines_wd, + dis_bloc_callback=self.dis_bloc_callback, + dont_dis_nulstart_bloc=self.dont_dis_nulstart_bloc, + attrib=self.attrib) + return current_bloc + + def dis_multibloc(self, offset, blocs=None): + blocs = dis_bloc_all(self.arch, self.bs, offset, self.job_done, + self.symbol_pool, + dont_dis=self.dont_dis, split_dis=self.split_dis, + follow_call=self.follow_call, + patch_instr_symb=self.patch_instr_symb, + dontdis_retcall=self.dontdis_retcall, + blocs_wd=self.blocs_wd, + lines_wd=self.lines_wd, + blocs=blocs, + dis_bloc_callback=self.dis_bloc_callback, + dont_dis_nulstart_bloc=self.dont_dis_nulstart_bloc, + attrib=self.attrib) + return blocs + diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py new file mode 100644 index 00000000..7ae6d3fa --- /dev/null +++ b/miasm2/core/bin_stream.py @@ -0,0 +1,175 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + + +class bin_stream(object): + + def __init__(self, *args, **kargs): + pass + + def __repr__(self): + return "<%s !!>" % self.__class__.__name__ + + def hexdump(self, offset, l): + return + + def getbytes(self, start, l=1): + return self.bin[start:start + l] + + def getbits(self, start, n): + if not n: + return 0 + o = 0 + if n > self.getlen() * 8: + raise ValueError('not enought bits %r %r' % (n, len(self.bin) * 8)) + while n: + # print 'xxx', n, start + i = start / 8 + c = self.getbytes(i) + if not c: + raise IOError + c = ord(c) + # print 'o', hex(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + # print 'm', hex(c) + l = min(r, n) + # print 'd', r-l + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + +class bin_stream_str(bin_stream): + + def __init__(self, bin="", offset=0L, shift=0): + bin_stream.__init__(self) + if offset > len(bin): + raise IOError + self.bin = bin + self.offset = offset + self.shift = shift + self.l = len(bin) + if "is_addr_in" in self.bin.__class__.__dict__: + self.is_addr_in = lambda ad: self.bin.is_addr_in(ad) + + def getbytes(self, start, l=1): + if start + l > self.l: + raise IOError + + return super(bin_stream_str, self).getbytes(start + self.shift, l) + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError + self.offset += l + print hex(self.offset + self.shift) + return self.bin[self.offset - l + self.shift:self.offset + self.shift] + + def writebs(self, l=1): + raise ValueError('writebs unsupported') + + def __str__(self): + out = self.bin[self.offset + self.shift:] + return out + + def setoffset(self, val): + self.offset = val + + def __len__(self): + return len(self.bin) - self.offset + self.shift + + def getlen(self): + return len(self.bin) - self.offset + self.shift + + +class bin_stream_file(bin_stream): + + def __init__(self, bin, offset=0L): + bin_stream.__init__(self) + self.bin = bin + self.bin.seek(0, 2) + self.l = self.bin.tell() + self.offset = offset + + def getoffset(self): + return self.bin.tell() + + def setoffset(self, val): + self.bin.seek(val) + offset = property(getoffset, setoffset) + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError + return self.bin.read(l) + + def writebs(self, l=1): + if self.offset + l > self.l: + raise IOError + return self.bin.write(l) + + def __str__(self): + return str(self.bin) + + +class bin_stream_pe(bin_stream): + + def __init__(self, bin="", offset=0L): + bin_stream.__init__(self) + # print 'ELF/PE' + self.mylen = len(bin) + if offset > bin.__len__(): + raise IOError + self.bin = bin + self.offset = offset + self.l = bin.__len__() + if "is_addr_in" in self.bin.__class__.__dict__: + self.is_addr_in = lambda ad: self.bin.is_addr_in(ad) + + def getlen(self): + return self.mylen + # s = self.bin.parent.SHList[-1] + # l = self.bin.parent.rva2virt(s.addr+s.size) + # return l + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError + self.offset += l + return self.bin(self.offset - l, self.offset) + + def writebs(self, l=1): + raise ValueError('writebs unsupported') + + def getbytes(self, start, l=1): + return self.bin(start, start + l) + + def __str__(self): + out = self.bin[self.offset:] + return out + + def setoffset(self, val): + self.offset = val + + +class bin_stream_elf(bin_stream_pe): + pass diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py new file mode 100644 index 00000000..7d672caa --- /dev/null +++ b/miasm2/core/cpu.py @@ -0,0 +1,1804 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import re +import struct +import logging +from pyparsing import * +from miasm2.expression.expression import * +from miasm2.core import asmbloc +from collections import defaultdict +from bin_stream import bin_stream, bin_stream_str +from utils import Disasm_Exception +from miasm2.expression.simplifications import expr_simp + +log = logging.getLogger("cpuhelper") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +# size2int = {8:ExprInt8, 16:ExprInt16, 32:ExprInt32,64:ExprInt64} + + +class bitobj: + + def __init__(self, s=""): + if not s: + bits = [] + else: + bits = list(bin(int(str(s).encode('hex'), 16))[2:]) + bits = [int(x) for x in bits] + if len(bits) % 8: + bits = [0 for x in xrange(8 - (len(bits) % 8))] + bits + bits = ['0' for x in xrange(len(s) * 8 - len(bits))] + bits + self.bits = bits + self.offset = 0 + + def __len__(self): + return len(self.bits) - self.offset + + def getbits(self, n): + if not n: + return 0 + o = 0 + if n > len(self.bits) - self.offset: + raise ValueError('not enought bits %r %r' % (n, len(self.bits))) + b = self.bits[self.offset:self.offset + n] + b = int("".join([str(x) for x in b]), 2) + self.offset += n + return b + + def putbits(self, b, n): + if not n: + return + bits = list(bin(b)[2:]) + bits = [int(x) for x in bits] + bits = [0 for x in xrange(n - len(bits))] + bits + self.bits += bits + + def tostring(self): + if len(self.bits) % 8: + raise ValueError( + 'num bits must be 8 bit aligned: %d' % len(self.bits)) + b = int("".join([str(x) for x in self.bits]), 2) + b = "%X" % b + b = '0' * (len(self.bits) / 4 - len(b)) + b + b = b.decode('hex') + return b + + def reset(self): + self.offset = 0 + + def copy_state(self): + b = self.__class__() + b.bits = self.bits + b.offset = self.offset + return b + + +def literal_list(l): + l = l[:] + l.sort() + l = l[::-1] + o = Literal(l[0]) + for x in l[1:]: + o |= Literal(x) + return o + + +class reg_info: + + def __init__(self, reg_str, reg_expr): + self.str = reg_str + self.expr = reg_expr + self.parser = literal_list(reg_str).setParseAction(self.reg2expr) + + def reg2expr(self, s): + i = self.str.index(s[0]) + return self.expr[i] + + def expr2regi(self, e): + return self.expr.index(e) + + +def gen_reg(rname, env, sz=32): + """ + Gen reg expr and parser + Equivalent to: + PC = ExprId('PC') + reg_pc_str = ['PC'] + reg_pc_expr = [ExprId(x, sz) for x in reg_pc_str] + regpc = reg_info(reg_pc_str, reg_pc_expr) + + class bs_rname(m_reg): + reg = regi_rname + + bsrname = bs(l=0, cls=(bs_rname,)) + + """ + rnamel = rname.lower() + r = ExprId(rname, sz) + reg_str = [rname] + reg_expr = [r] + regi = reg_info(reg_str, reg_expr) + # define as global val + cname = "bs_" + rnamel + c = type(cname, (m_reg,), {'reg': regi}) + env[rname] = r + env["regi_" + rnamel] = regi + env[cname] = c + env["bs" + rnamel] = bs(l=0, cls=(c,)) + return r, regi + +LPARENTHESIS = Literal("(") +RPARENTHESIS = Literal(")") + + +# + + +def int2expr(t): + v = t[0] + return (ExprInt, v) + + +def parse_op(t): + v = t[0] + return (ExprOp, v) + + +def parse_id(t): + v = t[0] + return (ExprId, v) + + +def ast_parse_op(t): + if len(t) == 1: + return t[0] + if len(t) == 2: + if t[0] in ['-', '+', '!']: + return ExprOp(t[0], t[1]) + if len(t) == 3: + args = [t[0], t[2]] + return ExprOp(t[1], t[0], t[2]) + t = t[::-1] + while len(t) >= 3: + o1, op, o2 = t.pop(), t.pop(), t.pop() + e = ExprOp(op, o1, o2) + t.append(e) + if len(t) != 1: + raise NotImplementedError('strange op') + return t[0] + + +def ast_id2expr(a): + return ExprId(a, 32) + + +def ast_int2expr(a): + return ExprInt32(a) + + +def ast_raw2expr(a, my_id2expr, my_int2expr): + assert(isinstance(a, tuple)) + if a[0] is ExprId: + e = my_id2expr(a[1]) + elif a[0] is ExprInt: + e = my_int2expr(a[1]) + elif a[0] is ExprOp: + out = [] + for x in a[1]: + if isinstance(x, tuple): + x = ast_raw2expr(x, my_id2expr, my_int2expr) + out.append(x) + e = ast_parse_op(out) + else: + raise TypeError('unknown type') + return e + + +def ast_get_ids(a): + assert(isinstance(a, tuple)) + if a[0] is ExprId: + return set([a[1]]) + elif a[0] is ExprInt: + return set() + elif a[0] is ExprOp: + out = set() + for x in a[1]: + if isinstance(x, tuple): + out.update(ast_get_ids(x)) + return out + raise TypeError('unknown type') + + +def _extract_ast_core(a): + assert(isinstance(a, tuple)) + if a[0] in [ExprInt, ExprId]: + return a + elif a[0] is ExprOp: + out = [] + for x in a[1]: + if isinstance(x, tuple): + x = _extract_ast_core(x) + out.append(x) + return tuple([a[0]] + [out]) + else: + raise TypeError('unknown type') + + +def extract_ast_core(v, my_id2expr, my_int2expr): + ast_tokens = _extract_ast_core(v) + ids = ast_get_ids(ast_tokens) + # print 'IDS', ids + ids_expr = [my_id2expr(x) for x in ids] + # print 'IDS_expr', ids_expr + sizes = set([i.size for i in ids_expr]) + # print "SIZE", sizes + if len(sizes) == 0: + pass + elif len(sizes) == 1: + size = sizes.pop() + my_int2expr = lambda x: ExprInt_fromsize(size, x) + else: + raise ValueError('multiple sizes in ids') + e = ast_raw2expr(ast_tokens, my_id2expr, my_int2expr) + return e + + +class parse_ast: + + def __init__(self, id2expr, int2expr, extract_ast=extract_ast_core): + self.id2expr = id2expr + self.int2expr = int2expr + self.extract_ast_core = extract_ast + + def __call__(self, v): + v = v[0] + if isinstance(v, Expr): + return v + return self.extract_ast_core(v, self.id2expr, self.int2expr) + + +def neg_int(t): + x = -t[0] + return x + + +integer = Word(nums).setParseAction(lambda s, l, t: int(t[0])) +hex_int = Combine(Literal('0x') + Word(hexnums)).setParseAction( + lambda s, l, t: int(t[0], 16)) + +# str_int = (Optional('-') + (hex_int | integer)) +str_int_pos = (hex_int | integer) +str_int_neg = (Suppress('-') + (hex_int | integer)).setParseAction(neg_int) + +str_int = str_int_pos | str_int_neg +str_int.setParseAction(int2expr) + +logicop = oneOf('& | ^ >> << <<< >>>') +signop = oneOf('+ -') +multop = oneOf('* / %') +plusop = oneOf('+ -') + + +def gen_base_expr(): + variable = Word(alphas + "_$.", alphanums + "_") + variable.setParseAction(parse_id) + operand = str_int | variable + base_expr = operatorPrecedence(operand, + [("!", 1, opAssoc.RIGHT, parse_op), + (logicop, 2, opAssoc.RIGHT, parse_op), + (signop, 1, opAssoc.RIGHT, parse_op), + (multop, 2, opAssoc.LEFT, parse_op), + (plusop, 2, opAssoc.LEFT, parse_op), ] + ) + return variable, operand, base_expr + + +variable, operand, base_expr = gen_base_expr() + +my_var_parser = parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + +# + + +default_prio = 0x1337 + + +def isbin(s): + return re.match('[0-1]+$', s) + + +def int2bin(i, l): + s = '0' * l + bin(i)[2:] + return s[-l:] + + +def myror32(v, r): + return ((v & 0xFFFFFFFFL) >> r) | ((v << (32 - r)) & 0xFFFFFFFFL) + + +def myrol32(v, r): + return ((v & 0xFFFFFFFFL) >> (32 - r)) | ((v << r) & 0xFFFFFFFFL) + + +class bs(object): + all_new_c = {} + prio = default_prio + + def __init__(self, strbits=None, l=None, cls=None, + fname=None, order=0, flen=None, **kargs): + if fname is None: + # fname = hex(id((strbits, l, cls, fname, order, flen, kargs))) + # fname = hex(id((strbits, l, fname, order, flen))) + # print str((strbits, l, cls, fname, order, flen, kargs)) + fname = hex(id(str((strbits, l, cls, fname, order, flen, kargs)))) + # print fname + if strbits is None: + strbits = "" # "X"*l + elif l is None: + l = len(strbits) + if strbits and isbin(strbits): + value = int(strbits, 2) + elif 'default_val' in kargs: + value = int(kargs['default_val'], 2) + else: + value = None + allbits = list(strbits) + allbits.reverse() + fbits = 0 + fmask = 0 + while allbits: + a = allbits.pop() + if a == " ": + continue + fbits <<= 1 + fmask <<= 1 + if a in '01': + a = int(a) + fbits |= a + fmask |= 1 + lmask = (1 << l) - 1 + # gen conditional field + # if flen is None: + # flen = lambda mode, v:l + if cls: + for b in cls: + if 'flen' in b.__dict__: + flen = getattr(b, 'flen') + + self.strbits = strbits + self.l = l + self.cls = cls + self.fname = fname + self.order = order + self.lmask = lmask + self.fbits = fbits + self.fmask = fmask + self.flen = flen + self.value = value + self.kargs = kargs + + def __getitem__(self, item): + return getattr(self, item) + + def __repr__(self): + o = self.__class__.__name__ + if self.fname: + o += "_%s" % self.fname + o += "_%(strbits)s" % self + if self.cls: + o += '_' + '_'.join([x.__name__ for x in self.cls]) + return o + + def gen(self, parent): + c_name = 'nbsi' + if self.cls: + c_name += '_' + '_'.join([x.__name__ for x in self.cls]) + bases = list(self.cls) + else: + bases = [] + # bsi added at end of list + # used to use first function of added class + bases += [bsi] + # new_c = type(c_name, tuple(bases), {}) + k = c_name, tuple(bases) + if k in self.all_new_c: + new_c = self.all_new_c[k] + else: + new_c = type(c_name, tuple(bases), {}) + self.all_new_c[k] = new_c + c = new_c(parent, + self.strbits, self.l, self.cls, + self.fname, self.order, self.lmask, self.fbits, + self.fmask, self.value, self.flen, **self.kargs) + return c + + def check_fbits(self, v): + return v & self.fmask == self.fbits + + @classmethod + def flen(cls, v): + raise NotImplementedError('not fully functional') + + +class dum_arg(object): + + def __init__(self, e=None): + self.expr = e + + @staticmethod + def arg2str(e): + return str(e) + + +class bsopt(bs): + + def ispresent(self): + return True + + +class bsi(object): + + def __init__(self, parent, strbits, l, cls, fname, order, + lmask, fbits, fmask, value, flen, **kargs): + self.parent = parent + self.strbits = strbits + self.l = l + self.cls = cls + self.fname = fname + self.order = order + self.lmask = lmask + self.fbits = fbits + self.fmask = fmask + self.flen = flen + self.value = value + self.kargs = kargs + self.__dict__.update(self.kargs) + + def decode(self, v): + self.value = v & self.lmask + return True + + def encode(self): + # self.value = v&self.lmask + return True + + def clone(self): + s = self.__class__(self.parent, + self.strbits, self.l, self.cls, + self.fname, self.order, self.lmask, self.fbits, + self.fmask, self.value, self.flen, **self.kargs) + s.__dict__.update(self.kargs) + if hasattr(self, 'expr'): + s.expr = self.expr + return s + + def __hash__(self): + kargs = [] + for k, v in self.kargs.items(): + if isinstance(v, list): + v = tuple(v) + kargs.append((k, v)) + l = [self.strbits, self.l, self.cls, + self.fname, self.order, self.lmask, self.fbits, + self.fmask, self.value] # + kargs + # l = [self.value] + return hash(tuple(l)) + + +class bs_divert(object): + prio = default_prio + + def __init__(self, **kargs): + self.args = kargs + + def __getattr__(self, item): + if item in self.__dict__: + return self.__dict__[item] + elif item in self.args: + return self.args.get(item) + else: + raise AttributeError + + +class bs_name(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + for new_name, value in self.args['name'].items(): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + ndct = dict(dct) + ndct['name'] = new_name + out.append((cls, new_name, bases, ndct, nfields)) + return out + + +class bs_mod_name(bs_divert): + prio = 2 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + for value, new_name in enumerate(self.args['mn_mod']): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + ndct = dict(dct) + # new_name = ndct['name'] + new_name + ndct['name'] = self.modname(ndct['name'], value) + # ndct['name'] = new_name + out.append((cls, new_name, bases, ndct, nfields)) + return out + + def modname(self, name, i): + return name + self.args['mn_mod'][i] + + +class bs_cond(bsi): + pass + + +class bs_swapargs(bs_divert): + + def divert(self, i, candidates): + # print candidates + out = [] + for cls, name, bases, dct, fields in candidates: + # args not permuted + ndct = dict(dct) + nfields = fields[:] + # gen fix field + f = gen_bsint(0, self.args['l'], self.args) + nfields[i] = f + out.append((cls, name, bases, ndct, nfields)) + + # args permuted + ndct = dict(dct) + nfields = fields[:] + ap = ndct['args_permut'][:] + a = ap.pop(0) + b = ap.pop(0) + ndct['args_permut'] = [b, a] + ap + # print ndct['args_permut'] + # gen fix field + f = gen_bsint(1, self.args['l'], self.args) + nfields[i] = f + + out.append((cls, name, bases, ndct, nfields)) + return out + + +class m_arg(object): + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + self.expr = e + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + self.expr = v[0] + return start, stop + + @staticmethod + def arg2str(e): + return str(e) + + +class m_reg(m_arg): + prio = default_prio + + @property + def parser(self): + return self.reg.parser + + def decode(self, v): + self.expr = self.reg.expr[0] + return True + + def encode(self): + return self.expr == self.reg.expr[0] + + @staticmethod + def arg2str(e): + return str(e) + + +class reg_noarg(object): + reg_info = None + parser = None + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + self.expr = e + return start, stop + try: + v, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + self.expr = v[0] + return start, stop + + @staticmethod + def arg2str(e): + return str(e) + + def decode(self, v): + v = v & self.lmask + if v >= len(self.reg_info.expr): + return False + self.expr = self.reg_info.expr[v] + return True + + def encode(self): + if not self.expr in self.reg_info.expr: + log.debug("cannot encode reg %r" % self.expr) + return False + self.value = self.reg_info.expr.index(self.expr) + if self.value > self.lmask: + log.debug("cannot encode field value %x %x" % + (self.value, self.lmask)) + return False + return True + + def check_fbits(self, v): + return v & self.fmask == self.fbits + + +class mn_prefix: + + def __init__(self): + b = None + + +def swap16(v): + return struct.unpack('<H', struct.pack('>H', v))[0] + + +def swap32(v): + return struct.unpack('<I', struct.pack('>I', v))[0] + + +def perm_inv(p): + o = [None for x in xrange(len(p))] + for i, x in enumerate(p): + o[x] = i + return o + + +def gen_bsint(value, l, args): + s = int2bin(value, l) + args = dict(args) + args.update({'strbits': s}) + f = bs(**args) + return f + +total_scans = 0 + + +def branch2nodes(branch, nodes=None): + if nodes is None: + node = [] + for k, v in branch.items(): + if not isinstance(v, dict): + continue + for k2 in v.keys(): + nodes.append((k, k2)) + branch2nodes(v, nodes) + + +def factor_one_bit(tree): + if isinstance(tree, set): + return tree + new_keys = defaultdict(lambda: defaultdict(dict)) + if len(tree) == 1: + return tree + for k, v in tree.items(): + # print k, v + if k == "mn": + new_keys[k] = v + continue + l, fmask, fbits, fname, flen = k + if flen is not None or l <= 1: + new_keys[k] = v + continue + cfmask = fmask >> (l - 1) + nfmask = fmask & ((1 << (l - 1)) - 1) + cfbits = fbits >> (l - 1) + nfbits = fbits & ((1 << (l - 1)) - 1) + ck = 1, cfmask, cfbits, None, flen + nk = l - 1, nfmask, nfbits, fname, flen + # print ck + if nk in new_keys[ck]: + raise NotImplementedError('not fully functional') + new_keys[ck][nk] = v + for k, v in new_keys.items(): + new_keys[k] = factor_one_bit(v) + # try factor sons + if len(new_keys) != 1: + return new_keys + subtree = new_keys.values()[0] + if len(subtree) != 1: + return new_keys + if subtree.keys()[0] == 'mn': + return new_keys + + return new_keys + + +def factor_fields(tree): + if not isinstance(tree, dict): + return tree + if len(tree) != 1: + return tree + # merge + k1, v1 = tree.items()[0] + if k1 == "mn": + return tree + l1, fmask1, fbits1, fname1, flen1 = k1 + if fname1 is not None: + return tree + if flen1 is not None: + return tree + + if not isinstance(v1, dict): + return tree + if len(v1) != 1: + return tree + k2, v2 = v1.items()[0] + if k2 == "mn": + return tree + l2, fmask2, fbits2, fname2, flen2 = k2 + if fname2 is not None: + return tree + if flen2 is not None: + return tree + l = l1 + l2 + fmask = (fmask1 << l2) | fmask2 + fbits = (fbits1 << l2) | fbits2 + fname = fname2 + flen = flen2 + k = l, fmask, fbits, fname, flen + new_keys = {k: v2} + return new_keys + + +def factor_fields_all(tree): + if not isinstance(tree, dict): + return tree + new_keys = {} + for k, v in tree.items(): + v = factor_fields(v) + new_keys[k] = factor_fields_all(v) + return new_keys + + +def factor_tree(tree): + new_keys = {} + i = 1 + min_len = min([x[0] for x in tree.keys()]) + while i < min_len: + + i += 1 + + +def graph_tree(tree): + nodes = [] + branch2nodes(tree, nodes) + + out = """ + digraph G { + """ + for a, b in nodes: + # print a, id(a) + # print b, id(b) + if b == 'mn': + continue + out += "%s -> %s;\n" % (id(a), id(b)) + out += "}" + open('graph.txt', 'w').write(out) + + +def add_candidate_to_tree(tree, c): + branch = tree + for f in c.fields: + if f.l == 0: + continue + # print len(bits), f.l + # if f.flen: + # pass + # print f + node = f.l, f.fmask, f.fbits, f.fname, f.flen + # node = f.strbits, f.l, f.cls, f.fname, f.order, f.lmask, f.fbits, + # f.fmask, f.value#, tuple(f.kargs.items()) + + if not node in branch: + branch[node] = {} + branch = branch[node] + if not 'mn' in branch: + branch['mn'] = set() + branch['mn'].add(c) + + +def add_candidate(bases, c): + add_candidate_to_tree(bases[0].bintree, c) + + +def getfieldby_name(fields, fname): + f = filter(lambda x: hasattr(x, 'fname') and x.fname == fname, fields) + if len(f) != 1: + raise ValueError('more than one field with name: %s' % fname) + return f[0] + + +def getfieldindexby_name(fields, fname): + for i, f in enumerate(fields): + if hasattr(f, 'fname') and f.fname == fname: + return f, i + return None + + +class metamn(type): + + def __new__(mcs, name, bases, dct): + if name == "cls_mn" or name.startswith('mn_'): + return type.__new__(mcs, name, bases, dct) + alias = dct.get('alias', False) + # fields = [bm_cond]+dct['fields'] + fields = bases[0].mod_fields(dct['fields']) + # print 'f1', dct['fields'] + # print 'f2', fields + if not 'name' in dct: + dct["name"] = bases[0].getmn(name) + if 'args' in dct: + # special case for permuted arguments + o = [] + p = [] + for i, a in enumerate(dct['args']): + o.append((i, a)) + if a in fields: + p.append((fields.index(a), a)) + p.sort() + p = [x[1] for x in p] + p = [dct['args'].index(x) for x in p] + dct['args_permut'] = perm_inv(p) + # order fields + f_ordered = [x for x in enumerate(fields)] + f_ordered.sort(key=lambda x: (x[1].prio, x[0])) + candidates = bases[0].gen_modes(mcs, name, bases, dct, fields) + for i, fc in f_ordered: + # print fc, isinstance(fc, bs_divert) + if isinstance(fc, bs_divert): + # print 'iiii', fc + candidates = fc.divert(i, candidates) + for cls, name, bases, dct, fields in candidates: + ndct = dict(dct) + fields = [f for f in fields if f] + ndct['fields'] = fields + ndct['mn_len'] = sum([x.l for x in fields]) + c = type.__new__(cls, name, bases, ndct) + c.alias = alias + c.check_mnemo(fields) + c.num = bases[0].num + bases[0].num += 1 + bases[0].all_mn.append(c) + mode = dct['mode'] + # print 'add mnemo', c.name, c.mode, len(bases[0].all_mn_mode[mode]) + # print fields + # if 'args_permut' in dct: + # print dct['args_permut'] + bases[0].all_mn_mode[mode].append(c) + bases[0].all_mn_name[c.name].append(c) + i = c() + i.init_class() + bases[0].all_mn_inst[c].append(i) + add_candidate(bases, c) + # gen byte lookup + off = 0 + o = "" + for f in i.fields_order: + if not isinstance(f, bsi): + raise ValueError('f is not bsi') + if f.l == 0: + continue + # if f.fmask: + o += f.strbits + # print o, len(o) + # fd + return c + + +class instruction(object): + + def __init__(self, name, mode, args, args_str=None, additional_info=None): + self.name = name + self.mode = mode + self.args = args + if args_str is None: + raise NotImplementedError('not fully functional') + self.args_str = args_str + self.additional_info = additional_info + + def gen_args(self, args): + out = ', '.join([str(x) for x in args]) + return out + + def __str__(self): + o = "%-10s " % self.name + args = [] + args_str = self.args_str + if args_str is None: + args_str = [lambda x:str(x) for i in xrange(len(self.args))] + for arg, arg_str in zip(self.args, args_str): + if not isinstance(arg, Expr): + raise ValueError('zarb arg type') + x = arg_str(arg) + args.append(x) + o += self.gen_args(args) + return o + + def resolve_args_with_symbols(self, symbols=None): + if symbols is None: + symbols = {} + args_out = [] + for a in self.args: + e = a + # try to resolve symbols using symbols (0 for default value) + ids = get_expr_ids(e) + fixed_ids = {} + for x in ids: + if isinstance(x.name, asmbloc.asm_label): + name = x.name.name + if not name in symbols: + raise ValueError('unresolved symbol! %r' % x) + else: + name = x.name + # special symbol + if name == '$': + value = ExprInt_from(x, self.offset) + fixed_ids[x] = value + continue + if not name in symbols: + continue + if symbols[name].offset is None: + default_size = self.get_symbol_size(x, symbols) + value = ExprInt_fromsize(default_size, 0) # default value + else: + size = x.size + if size is None: + default_size = self.get_symbol_size(x, symbols) + size = default_size + value = ExprInt_fromsize(size, symbols[name].offset) + fixed_ids[x] = value + e = e.replace_expr(fixed_ids) + # print 'replaced e', e, fixed_ids + e = expr_simp(e) + # print 'replaced e simp', e, fixed_ids + args_out.append(e) + # print "args out", [str(x) for x in args_out] + return args_out + + def get_info(self, c): + return + + +class cls_mn(object): + __metaclass__ = metamn + args_symb = [] + instruction = instruction + + @classmethod + def guess_mnemo(cls, bs, mode, pre_dis_info, offset): + candidates = [] + + candidates = set() + + fname_values = pre_dis_info + todo = [(0, dict(fname_values), branch, offset * 8) + for branch in cls.bintree.items()] + cpt = 0 + if hasattr(bs, 'getlen'): + bs_l = bs.getlen() + else: + bs_l = len(bs) + # print fname_values + for bvalo, fname_values, branch, offset_b in todo: + (l, fmask, fbits, fname, flen), vals = branch + cpt += 1 + # print 'len', l, fmask, fbits, fname, flen + if flen is not None: + l = flen(mode, fname_values) + # print 'len', fname, l + if l is not None: + # print fname, hex(bs_l), l + if bs_l * 8 - offset_b < l: + continue + # print hex(offset_b) + v = cls.getbits(bs, offset_b, l) + bval = (bvalo << l) + v + # print 'TEST', bval, fname, offset_b, cpt, (l, fmask, fbits), + # hex(v), hex(v & fmask), hex(fbits), v & fmask == fbits + offset_b += l + if v & fmask != fbits: + continue + if fname is not None and not fname in fname_values: + fname_values[fname] = bval + bval = 0 + # print vals + for nb, v in vals.items(): + if 'mn' in nb: + candidates.update(v) + else: + todo.append((bval, dict(fname_values), (nb, v), offset_b)) + + candidates = [c for c in candidates] # if c.mode == mode] + + if not candidates: + raise Disasm_Exception('cannot disasm (guess) at %X' % offset) + return candidates + + def reset_class(self): + for f in self.fields_order: + if f.strbits and isbin(f.strbits): + # print 'a', + f.value = int(f.strbits, 2) + elif 'default_val' in f.kargs: + # print 'b', + f.value = int(f.kargs['default_val'], 2) + else: + # print 'c', + f.value = None + # print "reset", f.fname, f.value + if f.fname: + # print 'SET asm', f.fname + setattr(self, f.fname, f) + + def init_class(self): + args = [] + fields_order = [] + to_decode = [] + off = 0 + for i, fc in enumerate(self.fields): + f = fc.gen(self) + f.offset = off + off += f.l + fields_order.append(f) + to_decode.append((i, f)) + + if isinstance(f, m_arg): + args.append(f) + # print f, fc.fname + if f.fname: + # print 'SET asm', f.fname + setattr(self, f.fname, f) + # print args + if hasattr(self, 'args_permut'): + args = [args[self.args_permut[i]] + for i in xrange(len(self.args_permut))] + to_decode.sort(key=lambda x: (x[1].order, x[0])) + to_decode = [fields_order.index(f[1]) for f in to_decode] + self.args = args + self.fields_order = fields_order + self.to_decode = to_decode + + def add_pre_dis_info(self, prefix=None): + return True + + @classmethod + def getbits(cls, bs, offset_b, l): + return bs.getbits(offset_b, l) + + @classmethod + def getbytes(cls, bs, offset, l): + return bs.getbytes(offset, l) + + @classmethod + def pre_dis(cls, v_o, mode_o, offset): + return {}, v_o, mode_o, offset, 0 + + def post_dis(self): + return self + + @classmethod + def check_mnemo(cls, fields): + pass + + @classmethod + def mod_fields(cls, fields): + return fields + + @classmethod + def dis(cls, bs_o, mode_o, offset=0): + if not isinstance(bs_o, bin_stream): + bs_o = bin_stream_str(bs_o) + loggg = False + # bs_o == 'fg\x11\x90\x00\x00'#False#'\x48\x15\x44\x33\x22\x11'==bs_o + # print 'disfunc', repr(bs_o) + offset_o = offset + # print 'DIS', hex(offset), mode_o#repr(bs_o.bin) + pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( + bs_o, mode_o, offset) + candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) + # print 'guess', repr(v), mode, prefix.rex_w + out = [] + out_c = [] + # print 'DIS CAND', len(candidates), mode + if hasattr(bs, 'getlen'): + bs_l = bs.getlen() + else: + bs_l = len(bs) + + alias = False + for c in candidates: + # print 'RRR' + if loggg: + print "*" * 40, mode, c.mode + print c.fields + # c.mode_o = mode_o + # off = c.parse_prefix(mode_o, v) + # bits = bin_stream(v)#[:c.mn_len/8]) + + # c = c() + # c.init_class() + c = cls.all_mn_inst[c][0] + # c.init_class() + c.reset_class() + c.mode = mode + # for f in c.fields_order: print f.is_present + + if not c.add_pre_dis_info(pre_dis_info): # = prefix#cls.mnprefix() + continue + # print "zz", c.rex_w.value + """ + if prefix.opmode != c.mp[1]: + continue + if prefix.admode != c.mp[2]: + continue + """ + + args = [] + todo = {} + getok = True + fname_values = dict(pre_dis_info) + offset_b = offset * 8 + # print pre_dis_info + total_l = 0 + for i, f in enumerate(c.fields_order): + # print 'XX', i, f, id(f) + # print 'ZZ', c.rex_x.value + if f.flen is not None: + l = f.flen(mode, fname_values) + else: + l = f.l + # print 'len', l + # print "zz", c.rex_w, c.rex_w.value + if l is not None: + total_l += l + f.l = l + f.is_present = True + if loggg: + print "FIELD", f.__class__, f.fname, offset_b, l + if bs_l * 8 - offset_b < l: + getok = False + break + bv = cls.getbits(bs, offset_b, l) + offset_b += l + if not f.fname in fname_values: + fname_values[f.fname] = bv + todo[i] = bv + else: + f.is_present = False + todo[i] = None + + # print "decode", id(f), f.fname, + # print "l", l, "off", offset_b, "v", todo[i] + # print "zzz", c.rex_w, c.rex_w.value + + if not getok: + continue + + # print 'PRIOdec', [(x[0], x[1].order) for x in c.to_decode] + for i in c.to_decode: + f = c.fields_order[i] + if f.is_present: + # print "zz", f.fname, f.is_present, c.rex_w.value, + # c.rex_b.value, c.rex_x.value + ret = f.decode(todo[i]) + if not ret: + log.debug("cannot decode %r" % (f)) + break + + if not ret: + continue + for a in c.args: + a.expr = expr_simp(a.expr) + # print offset, offset_o, total_l + c.l = prefix_len + total_l / 8 + c.b = cls.getbytes(bs, offset, total_l / 8) + c.offset = offset_o + c = c.post_dis() + if c is None: + continue + c_args = [a.expr for a in c.args] + c_args_str = [] + for a in c.args: + if hasattr(a, 'arg2str'): + c_args_str.append(a.arg2str) + else: + raise NotImplementedError('not fully functional') + c_args_str.append(str) + # c_args_str = [a.arg2str for a in c.args] + instr = cls.instruction(c.name, mode, c_args, c_args_str, + additional_info=c.additional_info()) + instr.l = prefix_len + total_l / 8 + instr.b = cls.getbytes(bs, offset, total_l / 8) + instr.offset = offset_o + instr.get_info(c) + # instr = c.post_dis() + if c.alias: + alias = True + out.append(instr) + out_c.append(c) + if not out: + raise Disasm_Exception('cannot disasm at %X' % offset_o) + if len(out) != 1: + if not alias: + log.warning('dis multiple args ret default') + + assert(len(out) == 2) + for i, o in enumerate(out_c): + if o.alias: + return out[i] + raise NotImplementedError('not fully functional') + # for xx in out: + # print xx + # if xx.name == "ADC": + # pass + return out[0] + + @classmethod + def fromstring(cls, s, mode): + global total_scans + name = re.search('(\S+)', s).groups() + if not name: + raise ValueError('cannot find name', s) + name = name[0] + # print "mnemo_name", name + if not name in cls.all_mn_name: + raise ValueError('unknown name', name) + clist = [x for x in cls.all_mn_name[name]] # if x.mode == mode] + out = [] + out_args = [] + parsers = defaultdict(dict) + # print 'ASM CAND', len(clist), name + + for cc in clist: + #""" + # c = cc() + # c.init_class() + #""" + """ + c = cls.all_mn_inst[cc][0] + c.reset_class() + c.mode = mode + """ + for c in cls.get_cls_instance(cc, mode): + args_expr = [] + args_str = s[len(name):].strip(' ') + + start = 0 + cannot_parse = False + len_o = len(args_str) + + for i, f in enumerate(c.args): + start_i = len_o - len(args_str) + # print i, "will parse", repr(args_str) + if type(f.parser) == tuple: + parser = f.parser + else: + parser = (f.parser,) + for p in parser: + if p in parsers[(i, start_i)]: + continue + try: + total_scans += 1 + # print type(p) + v, start, stop = p.scanString(args_str).next() + # print "pp", args_str, v, start, stop + except StopIteration: + v, start, stop = [None], None, None + if start != 0: + v, start, stop = [None], None, None + parsers[(i, start_i)][p] = v[0], start, stop + + start, stop = f.fromstring(args_str, parsers[(i, start_i)]) + # print args_str, start, stop#, f.expr + # if start is not None: print f.expr + if start != 0: + log.debug("cannot fromstring %r" % (args_str)) + cannot_parse = True + # print "cannot_parse1" + break + if f.expr is None: + raise NotImplementedError('not fully functional') + # print "f expr", repr(f.expr) + f.expr = expr_simp(f.expr) + args_expr.append(f.expr) + a = args_str[start:stop] + args_str = args_str[stop:].strip(' ') + if args_str.startswith(','): + args_str = args_str[1:] + args_str = args_str.strip(' ') + if args_str: + # print "cannot_parse", repr(args_str) + cannot_parse = True + if cannot_parse: + continue + # print [x for x in c.args] + # print [str(x) for x in c.args] + """ + try: + c.value() + except Exception, e: + log.debug("cannot encode %r\n%s"%(e, traceback.format_exc())) + cannot_parse = True + if cannot_parse: + continue + """ + out.append(c) + out_args.append(args_expr) + break + + if len(out) == 0: + raise ValueError('cannot fromstring %r' % s) + if len(out) != 1: + log.warning('fromstring multiple args ret default') + # raise ValueError("cannot parse %r (%d cand)"%(s, len(out))) + c = out[0] + c_args = out_args[0] + + c_args_str = [] + for a in c.args: + if hasattr(a, 'arg2str'): + c_args_str.append(a.arg2str) + else: + raise NotImplementedError('not fully functional') + c_args_str.append(str) + + instr = cls.instruction(c.name, mode, c_args, c_args_str, + additional_info=c.additional_info()) + # instruction(name, attrib, args, args_str, additional_info): + # c = c() + # c.init_class() + # re parse instruction + """ + args_str = s[len(name):].strip(' ') + for i, f in enumerate(c.args): + if isinstance(f, m_arg): + start, stop = f.fromstring(args_str) + args_str = args_str[stop:].strip(' ') + if args_str.startswith(','): + args_str = args_str[1:] + args_str = args_str.strip(' ') + """ + + return instr + + def dup_info(self, infos): + return + + @classmethod + def get_cls_instance(cls, cc, mode, infos=None): + c = cls.all_mn_inst[cc][0] + + c.reset_class() + c.add_pre_dis_info() + c.dup_info(infos) + + c.mode = mode + yield c + + @classmethod + def asm(cls, instr, symbols=None): + # t = time.time() + """ + Re asm instruction by searching mnemo using name and args. We then + can modify args and get the hex of a modified instruction + """ + clist = cls.all_mn_name[instr.name] + clist = [x for x in clist] # if x.mode == instr.mode] + # print 'ASM CAN', len(clist) + vals = [] + candidates = [] + # print "resolve" + args = instr.resolve_args_with_symbols(symbols) + # print "ok", [str(x) for x in args] + """ + args = [] + for i, f in enumerate(cls.args): + e = f.expr + # try to resolve symbols using symbols (0 for default value) + if symbols: + #print 'origine', e + ids = get_expr_ids(e) + fixed_ids = {} + for x in ids: + if not x.name in symbols: + #print 'not IN', x + continue + if symbols[x.name].offset is None: + value = ExprInt32(0) # default value + else: + value = ExprInt_fromsize(x.size, symbols[x.name].offset) + fixed_ids[x] = value + e = e.replace_expr(fixed_ids) + #print 'replaced e', e, fixed_ids + e = expr_simp(e) + #print 'replaced e simp', e, fixed_ids + args.append(e) + """ + for cc in clist: + # if cc.mode != cls.mode: + # continue + """ + c = c() + c.init_class() + """ + for c in cls.get_cls_instance( + cc, instr.mode, instr.additional_info): + + # c = cls.all_mn_inst[cc][0] + # c = cc() + # c.init_class() + + cannot_parse = False + if len(c.args) != len(instr.args): + continue + # print c.mode, c.mp, c.fields[6:] + # print "eee", c.fields + # print [str(x.expr) for x in cls.args] + # only fix args expr + for i in xrange(len(c.args)): + c.args[i].expr = args[i] + # print 'ARGS', [str(x) for x in args] + # for a in c.args: + # print a.expr, + # print + # print instr.mode + v = c.value(instr.mode) + if not v: + log.debug("cannot encode %r" % (c)) + cannot_parse = True + if cannot_parse: + continue + vals += v + candidates.append((c, v)) + if len(vals) == 0: + raise ValueError('cannot asm %r %r' % + (instr.name, [str(x) for x in instr.args])) + if len(vals) != 1: + log.debug('asm multiple args ret default') + # raise ValueError("cannot parse %r (%d cand)"%(s, len(out))) + """ + for x in out: + print repr(x.value()) + print [str(a.expr) for a in x.args] + """ + vals = cls.filter_asm_candidates(instr, candidates) + # vals = list(set(vals)) + # vals.sort(key=lambda x:len(x)) + # dt = time.time() - t + # print 'TIME', dt, str(cls) + + return vals + + @classmethod + def filter_asm_candidates(cls, instr, candidates): + o = [] + for c, v in candidates: + o += v + o.sort(key=lambda x: len(x)) + return o + + def value(self, mode): + # print 'PRIOenc', [(x, self.fields_order[x].order) for x in + # self.to_decode[::-1]] + todo = [(0, [(x, self.fields_order[x]) for x in self.to_decode[::-1]])] + # print todo + result = [] + done = [] + cpt = 0 + + # print 'VALUE'#, self.fields[6:] + while todo: + index, to_decode = todo.pop() + # TEST XXX + for i, f in to_decode: + setattr(self, f.fname, f) + # print 'todo:', len(todo), index, to_decode + # print "OOOOOOO" + # if (index, hash(tuple(to_decode))) in done: + if (index, [x[1].value for x in to_decode]) in done: + # print 'skip', to_decode + continue + done.append((index, [x[1].value for x in to_decode])) + + # done.append((index, to_decode)) + cpt += 1 + can_encode = True + for i, f in to_decode[index:]: + # print 'before', f.value, repr(f) + ret = f.encode() + # print 'encode', len(todo), index, f.fname, f.value, f.l, ret + # print 'ret', ret + if not ret: + log.debug('cannot encode %r' % f) + can_encode = False + break + index += 1 + if ret is True: + continue + + # print ret, index + gcpt = 0 + for i in ret: + gcpt += 1 + o = [] + if ((index, [xx[1].value for xx in to_decode]) in todo or + (index, [xx[1].value for xx in to_decode]) in done): + raise NotImplementedError('not fully functional') + continue + for p, f in to_decode: + fnew = f.clone() + o.append((p, fnew)) + todo.append((index, o)) + can_encode = False + # print 'gcpt', gcpt + break + if not can_encode: + continue + result.append(to_decode) + # print 'CPT', cpt + # print "HEX", len(result), result + return self.decoded2bytes(result) + + def encodefields(self, decoded): + bits = bitobj() + for p, f in decoded: + setattr(self, f.fname, f) + + if f.value is None: + continue + bits.putbits(f.value, f.l) + # if f.l: + # print f.l, hex(f.value), len(bits.bits), bits.bits + xx = bits.tostring() + return bits.tostring() + + def decoded2bytes(self, result): + if not result: + return [] + + out = [] + for decoded in result: + decoded.sort() + # print [f.value for p, f in decoded] + o = self.encodefields(decoded) + if o is None: + continue + out.append(o) + out = list(set(out)) + return out + + def gen_args(self, args): + out = ', '.join([str(x) for x in args]) + return out + + def args2str(self): + args = [] + for arg in self.args: + # XXX todo test + if not (isinstance(arg, Expr) or isinstance(arg.expr, Expr)): + raise ValueError('zarb arg type') + x = str(arg) + args.append(x) + return args + + def __str__(self): + o = "%-10s " % self.name + args = [] + for arg in self.args: + # XXX todo test + if not (isinstance(arg, Expr) or isinstance(arg.expr, Expr)): + raise ValueError('zarb arg type') + x = str(arg) + args.append(x) + + o += self.gen_args(args) + return o + + def parse_prefix(self, v): + return 0 + + def set_dst_symbol(self, symbol_pool): + dst = self.getdstflow(symbol_pool) + args = [] + for d in dst: + if isinstance(d, ExprInt): + l = symbol_pool.getby_offset_create(int(d.arg)) + # print l + a = ExprId(l.name, d.size) + else: + a = d + args.append(a) + self.args_symb = args + + def getdstflow(self, symbol_pool): + return [self.args[0].expr] + + +class imm_noarg(object): + # parser = str_int + intsize = 32 + intmask = (1 << intsize) - 1 + # expr2int = lambda self,x:int(self.expr.arg&self.lmask) + + def int2expr(self, v): + if (v & ~self.intmask) != 0: + return None + return ExprInt_fromsize(self.intsize, v) + + def expr2int(self, e): + if not isinstance(e, ExprInt): + return None + v = int(e.arg) + # print "testimm2", hex(v), hex(self.intmask) + if v & ~self.intmask != 0: + return None + return v + + def fromstring(self, s, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + else: + try: + e, start, stop = self.parser.scanString(s).next() + except StopIteration: + return None, None + if e is None: + return None, None + # print 'fromstring', hex(e), self.int2expr + assert(isinstance(e, Expr)) + if isinstance(e, tuple): + self.expr = self.int2expr(e[1]) + elif isinstance(e, Expr): + self.expr = e + else: + raise TypeError('zarb expr') + if self.expr is None: + log.debug('cannot fromstring int %r' % s) + return None, None + return start, stop + + def decodeval(self, v): + return v + + def encodeval(self, v): + if v > self.lmask: + return False + return v + + @staticmethod + def arg2str(e): + return str(e) + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + e = self.int2expr(v) + if not e: + return False + self.expr = e + return True + + def encode(self): + v = self.expr2int(self.expr) + if v is None: + return False + v = self.encodeval(v) + if v is False: + return False + self.value = v + return True + + +class imm08_noarg(object): + int2expr = lambda self, x: ExprInt08(x) + + +class imm16_noarg(object): + int2expr = lambda self, x: ExprInt16(x) + + +class imm32_noarg(object): + int2expr = lambda self, x: ExprInt32(x) + + +class imm64_noarg(object): + int2expr = lambda self, x: ExprInt64(x) + + +class int32_noarg(imm_noarg): + intsize = 32 + intmask = (1 << intsize) - 1 + + def decode(self, v): + v = sign_ext(v, self.l, self.intsize) + v = self.decodeval(v) + self.expr = self.int2expr(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + if sign_ext(v & self.lmask, self.l, self.intsize) != v: + return False + v = self.encodeval(v & self.lmask) + self.value = v & self.lmask + return True + + +def swap_uint(size, i): + if size == 8: + return i & 0xff + elif size == 16: + return struct.unpack('<H', struct.pack('>H', i & 0xffff))[0] + elif size == 32: + return struct.unpack('<I', struct.pack('>I', i & 0xffffffff))[0] + elif size == 64: + return struct.unpack('<Q', struct.pack('>Q', i & 0xffffffffffffffff))[0] + raise ValueError('unknown int len %r' % size) + + +def swap_sint(size, i): + if size == 8: + return i + elif size == 16: + return struct.unpack('<h', struct.pack('>H', i & 0xffff))[0] + elif size == 32: + return struct.unpack('<i', struct.pack('>I', i & 0xffffffff))[0] + elif size == 64: + return struct.unpack('<q', struct.pack('>Q', i & 0xffffffffffffffff))[0] + raise ValueError('unknown int len %r' % size) + + +def sign_ext(v, s_in, s_out): + assert(s_in <= s_out) + v &= (1 << s_in) - 1 + sign_in = v & (1 << (s_in - 1)) + if not sign_in: + return v + m = (1 << (s_out)) - 1 + m ^= (1 << s_in) - 1 + v |= m + return v diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py new file mode 100644 index 00000000..47047269 --- /dev/null +++ b/miasm2/core/graph.py @@ -0,0 +1,126 @@ +class DiGraph: + + def __init__(self): + self._nodes = set() + self._edges = [] + self._nodes_to = {} + self._nodes_from = {} + + def __repr__(self): + out = [] + for n in self._nodes: + out.append(str(n)) + for a, b in self._edges: + out.append("%s -> %s" % (a, b)) + return '\n'.join(out) + + def nodes(self): + return self._nodes + + def edges(self): + return self._edges + + def add_node(self, n): + if n in self._nodes: + return + self._nodes.add(n) + self._nodes_to[n] = [] + self._nodes_from[n] = [] + + def add_edge(self, a, b): + if not a in self._nodes: + self.add_node(a) + if not b in self._nodes: + self.add_node(b) + self._edges.append((a, b)) + self._nodes_to[a].append((a, b)) + self._nodes_from[b].append((a, b)) + + def add_uniq_edge(self, a, b): + if (a, b) in self._edges: + return + else: + self.add_edge(a, b) + + def del_edge(self, a, b): + self._edges.remove((a, b)) + self._nodes_to[a].remove((a, b)) + self._nodes_from[b].remove((a, b)) + + def predecessors_iter(self, n): + if not n in self._nodes_from: + raise StopIteration + for a, _ in self._nodes_from[n]: + yield a + + def predecessors(self, n): + return [x for x in self.predecessors_iter(n)] + + def successors_iter(self, n): + if not n in self._nodes_to: + raise StopIteration + for _, b in self._nodes_to[n]: + yield b + + def successors(self, n): + return [x for x in self.successors_iter(n)] + + def leaves_iter(self): + for n in self._nodes: + if len(self._nodes_to[n]) == 0: + yield n + + def leaves(self): + return [x for x in self.leaves_iter()] + + def roots_iter(self): + for n in self._nodes: + if len(self._nodes_from[n]) == 0: + yield n + + def roots(self): + return [x for x in self.roots_iter()] + + def find_path(self, a, b, cycles_count=0, done=None): + if done is None: + done = {} + if b in done and done[b] > cycles_count: + return [[]] + if a == b: + return [[a]] + out = [] + for n in self.predecessors(b): + done_n = dict(done) + done_n[b] = done_n.get(b, 0) + 1 + for path in self.find_path(a, n, cycles_count, done_n): + if path and path[0] == a: + out.append(path + [b]) + return out + + def node2str(self, n): + return str(n) + + def edge2str(self, a, b): + return "" + + def dot(self): + out = """ +digraph asm_graph { +graph [ +splines=polyline, +]; +node [ +fontsize = "16", +shape = "box" +]; +""" + for n in self.nodes(): + out += '%s [label="%s"];\n' % ( + hash(n) & 0xFFFFFFFFFFFFFFFF, self.node2str(n)) + + for a, b in self.edges(): + out += '%s -> %s [label="%s"]\n' % (hash(a) & 0xFFFFFFFFFFFFFFFF, + hash(b) & 0xFFFFFFFFFFFFFFFF, + self.edge2str(a, b)) + out += "}" + return out diff --git a/miasm2/core/interval.py b/miasm2/core/interval.py new file mode 100644 index 00000000..cd2a793e --- /dev/null +++ b/miasm2/core/interval.py @@ -0,0 +1,238 @@ +INT_EQ = 0 +INT_B_IN_A = 1 +INT_A_IN_B = -1 +INT_DISJOIN = 2 +INT_JOIN = 3 +INT_JOIN_AB = 4 +INT_JOIN_BA = 5 + +# 0 => eq +# 1 => b in a +# -1 => a in b +# 2 => disjoin +# 3 => join +# 4 => join a,b touch +# 5 => join b,a touch + + +def cmp_interval(a, b): + if a == b: + return INT_EQ + a1, a2 = a + b1, b2 = b + if a1 <= b1 and a2 >= b2: + return INT_B_IN_A + if b1 <= a1 and b2 >= a2: + return INT_A_IN_B + if a2 + 1 == b1: + return INT_JOIN_AB + if b2 + 1 == a1: + return INT_JOIN_BA + if a1 > b2 + 1 or b1 > a2 + 1: + return INT_DISJOIN + return INT_JOIN + +# interval is: [a, b] + + +class interval: + + def __init__(self, a=None): + if a is None: + a = [] + if isinstance(a, interval): + a = a.intervals + self.is_cannon = False + self.intervals = a + self.cannon() + + def __iter__(self): + for x in self.intervals: + yield x + + @classmethod + def cannon_list(cls, tmp): + """ + Return a cannonizes list of intervals + """ + tmp = sorted([x for x in tmp if x[0] <= x[1]]) + out = [] + if not tmp: + return out + out.append(tmp.pop()) + while tmp: + x = tmp.pop() + rez = cmp_interval(out[-1], x) + # print out[-1], x, rez + if rez == INT_EQ: + continue + elif rez == INT_DISJOIN: + out.append(x) + elif rez == INT_B_IN_A: + continue + elif rez in [INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]: + u, v = x + while out and cmp_interval(out[-1], (u, v)) in [ + INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]: + u = min(u, out[-1][0]) + v = max(v, out[-1][1]) + out.pop() + out.append((u, v)) + else: + raise ValueError('unknown state', rez) + return out[::-1] + + def cannon(self): + if self.is_cannon is True: + return + self.intervals = interval.cannon_list(self.intervals) + self.is_cannon = True + + def __repr__(self): + if self.intervals: + o = " U ".join(["[0x%X 0x%X]" % (x[0], x[1]) + for x in self.intervals]) + else: + o = "[]" + return o + + def __contains__(self, i): + if isinstance(i, interval): + for x in self.intervals: + is_out = True + for y in i.intervals: + if cmp_interval(x, y) in [INT_EQ, INT_B_IN_A]: + is_out = False + break + if is_out: + return False + return True + else: + for x in self.intervals: + if x[0] <= i <= x[1]: + return True + return False + + def __eq__(self, i): + return self.intervals == i.intervals + + def __add__(self, i): + if isinstance(i, interval): + i = i.intervals + i = interval(self.intervals + i) + return i + + def __sub__(self, v): + to_test = self.intervals[:] + i = -1 + to_del = v.intervals[:] + while i < len(to_test) - 1: + i += 1 + x = to_test[i] + if x[0] > x[1]: + del(to_test[i]) + i -= 1 + continue + + while to_del and to_del[0][1] < x[0]: + del(to_del[0]) + + for y in to_del: + if y[0] > x[1]: + break + rez = cmp_interval(x, y) + if rez == INT_DISJOIN: + continue + elif rez == INT_EQ: + del(to_test[i]) + i -= 1 + break + elif rez == INT_A_IN_B: + del(to_test[i]) + i -= 1 + break + elif rez == INT_B_IN_A: + del(to_test[i]) + i1 = (x[0], y[0] - 1) + i2 = (y[1] + 1, x[1]) + to_test[i:i] = [i1, i2] + i -= 1 + break + elif rez in [INT_JOIN_AB, INT_JOIN_BA]: + continue + elif rez == INT_JOIN: + del(to_test[i]) + if x[0] < y[0]: + to_test[i:i] = [(x[0], y[0] - 1)] + else: + to_test[i:i] = [(y[1] + 1, x[1])] + i -= 1 + break + else: + raise ValueError('unknown state', rez) + return interval(to_test) + + def __and__(self, v): + out = [] + for x in self.intervals: + # print "x", x + if x[0] > x[1]: + continue + for y in v.intervals: + # print 'y', y + rez = cmp_interval(x, y) + # print x, y, rez + if rez == INT_DISJOIN: + continue + elif rez == INT_EQ: + out.append(x) + continue + elif rez == INT_A_IN_B: + out.append(x) + continue + elif rez == INT_B_IN_A: + out.append(y) + continue + elif rez == INT_JOIN_AB: + continue + elif rez == INT_JOIN_BA: + continue + elif rez == INT_JOIN: + if x[0] < y[0]: + out.append((y[0], x[1])) + else: + out.append((x[0], y[1])) + continue + else: + raise ValueError('unknown state', rez) + return interval(out) + + def hull(self): + if not self.intervals: + return None, None + return self.intervals[0][0], self.intervals[-1][1] + + def show(self, img_x=1350, img_y=20, dry_run=False): + """ + show image representing the itnerval + """ + try: + import Image + import ImageDraw + except ImportError: + print 'cannot import python PIL imaging' + return + + img = Image.new('RGB', (img_x, img_y), (100, 100, 100)) + draw = ImageDraw.Draw(img) + i_min, i_max = self.hull() + + print hex(i_min), hex(i_max) + + def addr2x(addr): + return (addr - i_min) * img_x / (i_max - i_min) + for a, b in self.intervals: + draw.rectangle((addr2x(a), 0, addr2x(b), img_y), (200, 0, 0)) + + if dry_run is False: + img.show() diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py new file mode 100644 index 00000000..c8967dab --- /dev/null +++ b/miasm2/core/parse_asm.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- + +import re +import struct +import miasm2.expression.expression as m2_expr +from miasm2.core.asmbloc import * + +declarator = {'byte': 'B', + 'word': 'H', + 'dword': 'I', + 'qword': 'Q', + 'long': 'I', 'zero': 'I', + } + + +def guess_next_new_label(symbol_pool, gen_label_index=0): + i = 0 + gen_name = "loc_%.8X" + while True: + name = gen_name % i + l = symbol_pool.getby_name(name) + if l is None: + return symbol_pool.add_label(name) + i += 1 + + +def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): + if symbol_pool is None: + symbol_pool = asm_symbol_pool() + + lines_text = [] + lines_data = [] + lines_bss = [] + + lines = lines_text + # parse each line + for line in txt.split('\n'): + # empty + if re.match(r'\s*$', line): + continue + # comment + if re.match(r'\s*;\S*', line): + continue + # labels to forget + r = re.match(r'\s*\.LF[BE]\d\s*:', line) + if r: + continue + # label beginning with .L + r = re.match(r'\s*(\.L\S+)\s*:', line) + if r: + l = r.groups()[0] + l = symbol_pool.getby_name_create(l) + lines.append(l) + continue + # directive + if re.match(r'\s*\.', line): + r = re.match(r'\s*\.(\S+)', line) + directive = r.groups()[0] + if directive == 'text': + lines = lines_text + continue + if directive == 'data': + lines = lines_data + continue + if directive == 'bss': + lines = lines_bss + continue + if directive in ['string', 'ascii']: + # XXX HACK + line = line.replace(r'\n', '\n').replace(r'\r', '\r') + raw = line[line.find(r'"') + 1:line.rfind(r"'")] + if directive == 'string': + raw += "\x00" + lines.append(asm_raw(raw)) + continue + if directive == 'ustring': + # XXX HACK + line = line.replace(r'\n', '\n').replace(r'\r', '\r') + raw = line[line.find(r'"') + 1:line.rfind(r"'")] + "\x00" + raw = "".join(map(lambda x: x + '\x00', raw)) + lines.append(asm_raw(raw)) + continue + if directive in declarator: + data_raw = line[r.end():].split() + try: + data_int = [] + for b in data_raw: + if re.search(r'0x', b): + data_int.append(int(b, 16)) + else: + data_int.append(int(b) % (1 << 32)) + raw = reduce(lambda x, y: x + struct.pack( + declarator[directive], y), data_int, "") + except ValueError: + raw = line + lines.append(asm_raw(raw)) + continue + if directive == 'comm': + # TODO + continue + if directive == 'split': # custom command + lines.append(asm_raw(line.strip())) + continue + if directive == 'dontsplit': # custom command + lines.append(asm_raw(line.strip())) + continue + if directive in ['file', 'intel_syntax', 'globl', 'local', + 'type', 'size', 'align', 'ident', 'section']: + continue + if directive[0:4] == 'cfi_': + continue + + raise ValueError("unknown directive %s" % str(directive)) + + # label + r = re.match(r'\s*(\S+)\s*:', line) + if r: + l = r.groups()[0] + l = symbol_pool.getby_name_create(l) + lines.append(l) + continue + + # code + if ';' in line: + line = line[:line.find(';')] + line = line.strip(' ').strip('\t') + instr = mnemo.fromstring(line, attrib) + if instr.dstflow(): + instr.dstflow2label(symbol_pool) + lines.append(instr) + + log_asmbloc.info("___pre asm oki___") + # make blocs + # gen_label_index = 0 + + blocs_sections = [] + bloc_num = 0 + for lines in [lines_text, lines_data, lines_bss]: + state = 0 + i = 0 + blocs = [] + blocs_sections.append(blocs) + bloc_to_nlink = None + block_may_link = False + while i < len(lines): + # print 'DEAL', lines[i], state + # no current bloc + if state == 0: + if not isinstance(lines[i], asm_label): + l = guess_next_new_label(symbol_pool) + lines[i:i] = [l] + else: + l = lines[i] + b = asm_bloc(l) + b.bloc_num = bloc_num + bloc_num += 1 + blocs.append(b) + state = 1 + i += 1 + if bloc_to_nlink: + # print 'nlink!' + bloc_to_nlink.addto( + asm_constraint(b.label, asm_constraint.c_next)) + bloc_to_nlink = None + + # in bloc + elif state == 1: + # asm_raw + if isinstance(lines[i], asm_raw): + if lines[i].raw.startswith('.split'): + state = 0 + block_may_link = False + i += 1 + elif lines[i].raw.startswith('.dontsplit'): + # print 'dontsplit' + state = 1 + block_may_link = True + i += 1 + else: + b.addline(lines[i]) + i += 1 + # asm_label + elif isinstance(lines[i], asm_label): + if block_may_link: + # print 'nlink!' + b.addto( + asm_constraint(lines[i], asm_constraint.c_next)) + block_may_link = False + state = 0 + # instruction + else: + b.addline(lines[i]) + if lines[i].dstflow(): + ''' + mydst = lines[i].args + if len(mydst)==1 and mnemo.get_symbols(mydst[0]): + arg = dict(mydst[0]) + symbs = mnemo.get_symbols(arg) + """ + TODO XXX redo this (as many miasm parts) + """ + l = symbs[0][0] + lines[i].setdstflow([l]) + b.addto(asm_constraint(l, asm_constraint.c_to)) + ''' + for x in lines[i].getdstflow(symbol_pool): + if not isinstance(x, m2_expr.ExprId): + continue + if x in mnemo.regs.all_regs_ids: + continue + b.addto(asm_constraint(x, asm_constraint.c_to)) + + # TODO XXX redo this really + + if not lines[i].breakflow() and i + 1 < len(lines): + if isinstance(lines[i + 1], asm_label): + l = lines[i + 1] + else: + l = guess_next_new_label(symbol_pool) + lines[i + 1:i + 1] = [l] + else: + state = 0 + + if lines[i].splitflow(): + bloc_to_nlink = b + if not lines[i].breakflow() or lines[i].splitflow(): + block_may_link = True + else: + block_may_link = False + + i += 1 + + for b in blocs_sections[0]: + log_asmbloc.info(b) + + return blocs_sections, symbol_pool diff --git a/miasm2/core/utils.py b/miasm2/core/utils.py new file mode 100644 index 00000000..44d1e8e2 --- /dev/null +++ b/miasm2/core/utils.py @@ -0,0 +1,39 @@ +import struct + +upck8 = lambda x: struct.unpack('B', x)[0] +upck16 = lambda x: struct.unpack('H', x)[0] +upck32 = lambda x: struct.unpack('I', x)[0] +upck64 = lambda x: struct.unpack('Q', x)[0] +pck16 = lambda x: struct.pack('H', x) +pck32 = lambda x: struct.pack('I', x) +pck64 = lambda x: struct.pack('Q', x) + + +class Disasm_Exception(Exception): + pass + + +def hexdump(src, length=16): + FILTER = ''.join( + [(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)]) + lines = [] + for c in xrange(0, len(src), length): + chars = src[c:c + length] + hex = ' '.join(["%02x" % ord(x) for x in chars]) + printable = ''.join( + ["%s" % ((ord(x) <= 127 and FILTER[ord(x)]) or '.') for x in chars]) + lines.append("%04x %-*s %s\n" % (c, length * 3, hex, printable)) + print ''.join(lines) + +# stackoverflow.com/questions/2912231 + +import collections + + +class keydefaultdict(collections.defaultdict): + + def __missing__(self, key): + if self.default_factory is None: + raise KeyError(key) + value = self[key] = self.default_factory(key) + return value |