diff options
Diffstat (limited to 'miasm2/core')
| -rw-r--r-- | miasm2/core/asmblock.py | 104 | ||||
| -rw-r--r-- | miasm2/core/cpu.py | 59 | ||||
| -rw-r--r-- | miasm2/core/parse_asm.py | 4 | ||||
| -rw-r--r-- | miasm2/core/sembuilder.py | 28 |
4 files changed, 118 insertions, 77 deletions
diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index f763d85f..9764590a 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -1,11 +1,13 @@ #-*- coding:utf-8 -*- import logging -import inspect import warnings from collections import namedtuple -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprLoc, \ + get_expr_labels +from miasm2.core.asmblock import AsmSymbolPool +from miasm2.expression.expression import LocKey from miasm2.expression.simplifications import expr_simp from miasm2.expression.modint import moduint, modint from miasm2.core.utils import Disasm_Exception, pck @@ -25,20 +27,13 @@ def is_int(a): isinstance(a, moduint) or isinstance(a, modint) -def expr_is_label(e): - return isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel) - - -def expr_is_int_or_label(e): - return isinstance(e, m2_expr.ExprInt) or \ - (isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel)) - - class AsmLabel(object): "Stand for an assembly label" - def __init__(self, name="", offset=None): + def __init__(self, loc_key, name="", offset=None): + assert isinstance(loc_key, LocKey) + self.loc_key = loc_key self.fixedblocs = False if is_int(name): name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) @@ -77,6 +72,9 @@ class AsmRaw(object): def __str__(self): return repr(self.raw) + def to_string(self, symbol_pool): + return str(self) + class asm_raw(AsmRaw): @@ -363,6 +361,11 @@ class AsmSymbolPool(object): self._name2label = {} self._offset2label = {} self._label_num = 0 + self._loc_key_to_label = {} + + def loc_key_to_label(self, label_index): + assert isinstance(label_index, LocKey) + return self._loc_key_to_label.get(label_index.key, None) def add_label(self, name, offset=None): """ @@ -370,7 +373,7 @@ class AsmSymbolPool(object): @name: label's name @offset: (optional) label's offset """ - label = AsmLabel(name, offset) + label = AsmLabel(LocKey(self._label_num), name, offset) # Test for collisions if (label.offset in self._offset2label and @@ -383,6 +386,9 @@ class AsmSymbolPool(object): (label, self._name2label[label.name])) self._labels.add(label) + self._label_num += 1 + self._loc_key_to_label[label.loc_key.key] = label + if label.offset is not None: self._offset2label[label.offset] = label if label.name != "": @@ -480,6 +486,19 @@ class AsmSymbolPool(object): self._name2label.update(symbol_pool._name2label) self._offset2label.update(symbol_pool._offset2label) + def canonize_to_exprloc(self, expr): + """ + If expr is ExprInt, return ExprLoc with corresponding loc_key + Else, return expr + + @expr: Expr instance + """ + if expr.is_int(): + label = self.getby_offset_create(int(expr)) + ret = ExprLoc(label.loc_key, expr.size) + return ret + return expr + def gen_label(self): """Generate a new unpinned label""" label = self.add_label("lbl_gen_%.8X" % (self._label_num)) @@ -511,7 +530,9 @@ class AsmCFG(DiGraph): AsmCFGPending = namedtuple("AsmCFGPending", ["waiter", "constraint"]) - def __init__(self, *args, **kwargs): + def __init__(self, symbol_pool=None, *args, **kwargs): + if symbol_pool is None: + raise DeprecationWarning("AsmCFG needs a non empty symbol_pool") super(AsmCFG, self).__init__(*args, **kwargs) # Edges -> constraint self.edges2constraint = {} @@ -519,6 +540,15 @@ class AsmCFG(DiGraph): self._pendings = {} # Label2block built on the fly self._label2block = {} + # symbol_pool + self.symbol_pool = symbol_pool + + + def copy(self): + """Copy the current graph instance""" + graph = self.__class__(self.symbol_pool) + return graph + self + # Compatibility with old list API def append(self, *args, **kwargs): @@ -639,9 +669,9 @@ class AsmCFG(DiGraph): if self._dot_offset: yield [self.DotCellDescription(text="%.8X" % line.offset, attr={}), - self.DotCellDescription(text=str(line), attr={})] + self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={})] else: - yield self.DotCellDescription(text=str(line), attr={}) + yield self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={}) def node_attr(self, node): if isinstance(node, AsmBlockBad): @@ -784,7 +814,7 @@ class AsmCFG(DiGraph): if len(pred_next) > 1: raise RuntimeError("Too many next constraints for bloc %r" "(%s)" % (block.label, - map(lambda x: x.label, pred_next))) + [x.label for x in pred_next])) def guess_blocks_size(self, mnemo): """Asm and compute max block size @@ -971,13 +1001,13 @@ def conservative_asm(mnemo, instr, symbols, conservative): def fix_expr_val(expr, symbols): """Resolve an expression @expr using @symbols""" def expr_calc(e): - if isinstance(e, m2_expr.ExprId): + if isinstance(e, ExprId): s = symbols._name2label[e.name] - e = m2_expr.ExprInt(s.offset, e.size) + e = ExprInt(s.offset, e.size) return e result = expr.visit(expr_calc) result = expr_simp(result) - if not isinstance(result, m2_expr.ExprInt): + if not isinstance(result, ExprInt): raise RuntimeError('Cannot resolve symbol %s' % expr) return result @@ -1222,12 +1252,11 @@ def get_block_labels(block): if isinstance(instr, AsmRaw): if isinstance(instr.raw, list): for expr in instr.raw: - symbols.update(m2_expr.get_expr_ids(expr)) + symbols.update(get_expr_labels(expr)) else: for arg in instr.args: - symbols.update(m2_expr.get_expr_ids(arg)) - labels = filter_exprid_label(symbols) - return labels + symbols.update(get_expr_labels(arg)) + return symbols def assemble_block(mnemo, block, symbol_pool, conservative=False): @@ -1285,7 +1314,8 @@ def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): lbl2block = {block.label: block for block in blocks} blocks_using_label = {} for block in blocks: - labels = get_block_labels(block) + exprlocs = get_block_labels(block) + labels = set(symbol_pool.loc_key_to_label(expr.loc_key) for expr in exprlocs) for label in labels: blocks_using_label.setdefault(label, set()).add(block) @@ -1544,21 +1574,19 @@ class disasmEngine(object): # test split if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): add_next_offset = True - pass if instr.dstflow(): instr.dstflow2label(self.symbol_pool) - dst = instr.getdstflow(self.symbol_pool) - dstn = [] - for d in dst: - if isinstance(d, m2_expr.ExprId) and \ - isinstance(d.name, AsmLabel): - dstn.append(d.name) - if d.name.offset in self.dont_dis_retcall_funcs: - add_next_offset = False - dst = dstn + destinations = instr.getdstflow(self.symbol_pool) + known_dsts = [] + for dst in destinations: + if not dst.is_label(): + continue + label = self.symbol_pool.loc_key_to_label(dst.loc_key) + known_dsts.append(label) + if label.offset in self.dont_dis_retcall_funcs: + add_next_offset = False if (not instr.is_subcall()) or self.follow_call: - cur_block.bto.update( - [AsmConstraint(x, AsmConstraint.c_to) for x in dst]) + cur_block.bto.update([AsmConstraint(label, AsmConstraint.c_to) for label in known_dsts]) # get in delayslot mode in_delayslot = True @@ -1608,7 +1636,7 @@ class disasmEngine(object): log_asmblock.info("dis bloc all") job_done = set() if blocks is None: - blocks = AsmCFG() + blocks = AsmCFG(self.symbol_pool) todo = [offset] bloc_cpt = 0 diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index d75b99cf..8ea96e22 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -8,13 +8,12 @@ from collections import defaultdict import pyparsing import miasm2.expression.expression as m2_expr -from miasm2.core import asmblock from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception from miasm2.expression.simplifications import expr_simp -from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstOp log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() @@ -985,18 +984,24 @@ class instruction(object): self.mode = mode self.args = args self.additional_info = additional_info + self.offset = None + self.l = None + self.b = None def gen_args(self, args): out = ', '.join([str(x) for x in args]) return out def __str__(self): + return self.to_string() + + def to_string(self, symbol_pool=None): o = "%-10s " % self.name args = [] for i, arg in enumerate(self.args): if not isinstance(arg, m2_expr.Expr): raise ValueError('zarb arg type') - x = self.arg2str(arg, pos = i) + x = self.arg2str(arg, i, symbol_pool) args.append(x) o += self.gen_args(args) return o @@ -1011,40 +1016,38 @@ class instruction(object): if symbols is None: symbols = {} args_out = [] - for a in self.args: - e = a + for expr in self.args: # try to resolve symbols using symbols (0 for default value) - ids = m2_expr.get_expr_ids(e) - fixed_ids = {} - for x in ids: - if isinstance(x.name, asmblock.AsmLabel): - name = x.name.name - # special symbol $ - if name == '$': - fixed_ids[x] = self.get_asm_offset(x) - continue - if name == '_': - fixed_ids[x] = self.get_asm_next_offset(x) - continue - if not name in symbols: - raise ValueError('unresolved symbol! %r' % x) - else: - name = x.name - if not name in symbols: + labels = m2_expr.get_expr_labels(expr) + fixed_expr = {} + for exprloc in labels: + label = symbols.loc_key_to_label(exprloc.loc_key) + name = label.name + # special symbols + if name == '$': + fixed_expr[exprloc] = self.get_asm_offset(exprloc) + continue + if name == '_': + fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue + if not name in symbols: + raise ValueError('Unresolved symbol: %r' % exprloc) + if symbols[name].offset is None: raise ValueError('The offset of label "%s" cannot be ' 'determined' % name) else: - size = x.size + # Fix symbol with its offset + size = exprloc.size if size is None: - default_size = self.get_symbol_size(x, symbols) + default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(symbols[name].offset, size) - fixed_ids[x] = value - e = e.replace_expr(fixed_ids) - e = expr_simp(e) - args_out.append(e) + fixed_expr[exprloc] = value + + expr = expr.replace_expr(fixed_expr) + expr = expr_simp(expr) + args_out.append(expr) return args_out def get_info(self, c): diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index df419680..07155fd9 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -4,7 +4,7 @@ import re import miasm2.expression.expression as m2_expr import miasm2.core.asmblock as asmblock from miasm2.core.cpu import instruction, base_expr -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstInt, AstId, AstOp declarator = {'byte': 8, 'word': 16, @@ -237,7 +237,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): cur_block = None state = STATE_NO_BLOC i = 0 - blocks = asmblock.AsmCFG() + blocks = asmblock.AsmCFG(symbol_pool) block_to_nlink = None delayslot = 0 while i < len(lines): diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 8d6d3e07..68b4439f 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -142,11 +142,19 @@ class SemBuilder(object): def _create_labels(lbl_else=False): """Return the AST standing for label creations @lbl_else (optional): if set, create a label 'lbl_else'""" - lbl_end = "lbl_end = ExprId(ir.get_next_label(instr), ir.IRDst.size)" + lbl_end = "lbl_end = ir.get_next_label(instr)" + lbl_end_expr = "lbl_end_expr = ExprLoc(lbl_end.loc_key, ir.IRDst.size)" out = ast.parse(lbl_end).body - out += ast.parse("lbl_if = ExprId(ir.gen_label(), ir.IRDst.size)").body + out += ast.parse(lbl_end_expr).body + lbl_if = "lbl_if = ir.gen_label()" + lbl_if_expr = "lbl_if_expr = ExprLoc(lbl_if.loc_key, ir.IRDst.size)" + out += ast.parse(lbl_if).body + out += ast.parse(lbl_if_expr).body if lbl_else: - out += ast.parse("lbl_else = ExprId(ir.gen_label(), ir.IRDst.size)").body + lbl_else = "lbl_else = ir.gen_label()" + lbl_else_expr = "lbl_else_expr = ExprLoc(lbl_else.loc_key, ir.IRDst.size)" + out += ast.parse(lbl_else).body + out += ast.parse(lbl_else_expr).body return out def _parse_body(self, body, argument_names): @@ -200,9 +208,9 @@ class SemBuilder(object): cond = statement.test real_body += self._create_labels(lbl_else=True) - lbl_end = ast.Name(id='lbl_end', ctx=ast.Load()) - lbl_if = ast.Name(id='lbl_if', ctx=ast.Load()) - lbl_else = ast.Name(id='lbl_else', ctx=ast.Load()) \ + lbl_end = ast.Name(id='lbl_end_expr', ctx=ast.Load()) + lbl_if = ast.Name(id='lbl_if_expr', ctx=ast.Load()) + lbl_else = ast.Name(id='lbl_else_expr', ctx=ast.Load()) \ if statement.orelse else lbl_end dst = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()), @@ -261,9 +269,11 @@ class SemBuilder(object): ## Replace the block with a call to 'IRBlock' - lbl_if_name = ast.Attribute(value=ast.Name(id=lbl_name, - ctx=ast.Load()), - attr='name', ctx=ast.Load()) + lbl_if_name = value= ast.Attribute( + value=ast.Name(id=lbl_name, ctx=ast.Load()), + attr="loc_key", + ctx=ast.Load() + ) assignblks = ast.List(elts=[assignblk], ctx=ast.Load()) |