diff options
Diffstat (limited to 'miasm2/core')
| -rw-r--r-- | miasm2/core/asmblock.py | 953 | ||||
| -rw-r--r-- | miasm2/core/cpu.py | 95 | ||||
| -rw-r--r-- | miasm2/core/graph.py | 7 | ||||
| -rw-r--r-- | miasm2/core/locationdb.py | 453 | ||||
| -rw-r--r-- | miasm2/core/parse_asm.py | 105 | ||||
| -rw-r--r-- | miasm2/core/sembuilder.py | 52 |
6 files changed, 1041 insertions, 624 deletions
diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index 8740aeb7..c8af4056 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -1,16 +1,18 @@ #-*- coding:utf-8 -*- import logging -import inspect import warnings from collections import namedtuple -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprLoc, \ + get_expr_locs +from miasm2.expression.expression import LocKey from miasm2.expression.simplifications import expr_simp from miasm2.expression.modint import moduint, modint from miasm2.core.utils import Disasm_Exception, pck from miasm2.core.graph import DiGraph, DiGraphSimplifier, MatchGraphJoker from miasm2.core.interval import interval +from miasm2.core.locationdb import LocationDB log_asmblock = logging.getLogger("asmblock") @@ -25,49 +27,6 @@ def is_int(a): isinstance(a, moduint) or isinstance(a, modint) -def expr_is_label(e): - return isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel) - - -def expr_is_int_or_label(e): - return isinstance(e, m2_expr.ExprInt) or \ - (isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel)) - - -class AsmLabel(object): - - "Stand for an assembly label" - - def __init__(self, name="", offset=None): - self.fixedblocs = False - if is_int(name): - name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) - self.name = name - self.attrib = None - if offset is None: - self.offset = None - else: - self.offset = int(offset) - - def __str__(self): - if isinstance(self.offset, (int, long)): - return "%s:0x%08x" % (self.name, self.offset) - else: - return "%s:%s" % (self.name, str(self.offset)) - - def __repr__(self): - rep = '<%s ' % self.__class__.__name__ - if self.name: - rep += repr(self.name) + ' ' - rep += '>' - return rep - - -class asm_label(AsmLabel): - - def __init__(self, name="", offset=None): - warnings.warn('DEPRECATION WARNING: use "AsmLabel" instead of "asm_label"') - super(asm_label, self).__init__(name, offset) class AsmRaw(object): @@ -77,6 +36,9 @@ class AsmRaw(object): def __str__(self): return repr(self.raw) + def to_string(self, loc_db): + return str(self) + class asm_raw(AsmRaw): @@ -89,76 +51,115 @@ class AsmConstraint(object): c_to = "c_to" c_next = "c_next" - def __init__(self, label, c_t=c_to): + def __init__(self, loc_key, c_t=c_to): # Sanity check - assert isinstance(label, AsmLabel) + assert isinstance(loc_key, LocKey) - self.label = label + self.loc_key = loc_key self.c_t = c_t + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + def set_label(self, loc_key): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + self.loc_key = loc_key + + label = property(get_label, set_label) + + def to_string(self, loc_db=None): + if loc_db is None: + return "%s:%s" % (self.c_t, self.loc_key) + else: + return "%s:%s" % ( + self.c_t, + loc_db.pretty_str(self.loc_key) + ) + def __str__(self): - return "%s:%s" % (str(self.c_t), str(self.label)) + return self.to_string() class asm_constraint(AsmConstraint): - def __init__(self, label, c_t=AsmConstraint.c_to): + def __init__(self, loc_key, c_t=AsmConstraint.c_to): warnings.warn('DEPRECATION WARNING: use "AsmConstraint" instead of "asm_constraint"') - super(asm_constraint, self).__init__(label, c_t) + super(asm_constraint, self).__init__(loc_key, c_t) class AsmConstraintNext(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): super(AsmConstraintNext, self).__init__( - label, c_t=AsmConstraint.c_next) + loc_key, + c_t=AsmConstraint.c_next + ) class asm_constraint_next(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): warnings.warn('DEPRECATION WARNING: use "AsmConstraintNext" instead of "asm_constraint_next"') - super(asm_constraint_next, self).__init__(label) + super(asm_constraint_next, self).__init__(loc_key) class AsmConstraintTo(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): super(AsmConstraintTo, self).__init__( - label, c_t=AsmConstraint.c_to) + loc_key, + c_t=AsmConstraint.c_to + ) class asm_constraint_to(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): warnings.warn('DEPRECATION WARNING: use "AsmConstraintTo" instead of "asm_constraint_to"') - super(asm_constraint_to, self).__init__(label) + super(asm_constraint_to, self).__init__(loc_key) class AsmBlock(object): - def __init__(self, label, alignment=1): - assert isinstance(label, AsmLabel) + def __init__(self, loc_key, alignment=1): + assert isinstance(loc_key, LocKey) + self.bto = set() self.lines = [] - self.label = label + self._loc_key = loc_key self.alignment = alignment - def __str__(self): + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + loc_key = property(lambda self:self._loc_key) + label = property(get_label) + + + def to_string(self, loc_db=None): out = [] - out.append(str(self.label)) - for l in self.lines: - out.append(str(l)) + if loc_db is None: + out.append(str(self.loc_key)) + else: + out.append(loc_db.pretty_str(self.loc_key)) + + for instr in self.lines: + out.append(instr.to_string(loc_db)) if self.bto: lbls = ["->"] - for l in self.bto: - if l is None: + for dst in self.bto: + if dst is None: lbls.append("Unknown? ") else: - lbls.append(str(l) + " ") + lbls.append(dst.to_string(loc_db) + " ") lbls = '\t'.join(lbls) out.append(lbls) return '\n'.join(out) + def __str__(self): + return self.to_string() + def addline(self, l): self.lines.append(l) @@ -166,23 +167,25 @@ class AsmBlock(object): assert isinstance(self.bto, set) self.bto.add(c) - def split(self, offset, l): + def split(self, loc_db, offset): + loc_key = loc_db.get_or_create_offset_location(offset) log_asmblock.debug('split at %x', offset) i = -1 offsets = [x.offset for x in self.lines] - if not l.offset in offsets: + offset = loc_db.get_location_offset(loc_key) + if offset not in offsets: log_asmblock.warning( 'cannot split bloc at %X ' % offset + 'middle instruction? default middle') offsets.sort() return None - new_bloc = AsmBlock(l) + new_bloc = AsmBlock(loc_key) i = offsets.index(offset) self.lines, new_bloc.lines = self.lines[:i], self.lines[i:] flow_mod_instr = self.get_flow_instr() log_asmblock.debug('flow mod %r', flow_mod_instr) - c = AsmConstraint(l, AsmConstraint.c_next) + c = AsmConstraint(loc_key, AsmConstraint.c_next) # move dst if flowgraph modifier was in original bloc # (usecase: split delayslot bloc) if flow_mod_instr: @@ -209,16 +212,14 @@ class AsmBlock(object): def get_offsets(self): return [x.offset for x in self.lines] - def add_cst(self, offset, c_t, symbol_pool): - if isinstance(offset, (int, long)): - l = symbol_pool.getby_offset_create(offset) - elif isinstance(offset, str): - l = symbol_pool.getby_name_create(offset) - elif isinstance(offset, AsmLabel): - l = offset - else: - raise ValueError('unknown offset type %r' % offset) - c = AsmConstraint(l, c_t) + def add_cst(self, loc_key, constraint_type): + """ + Add constraint between current block and block at @loc_key + @loc_key: LocKey instance of constraint target + @constraint_type: AsmConstraint c_to/c_next + """ + assert isinstance(loc_key, LocKey) + c = AsmConstraint(loc_key, constraint_type) self.bto.add(c) def get_flow_instr(self): @@ -244,9 +245,9 @@ class AsmBlock(object): return None def get_next(self): - for x in self.bto: - if x.c_t == AsmConstraint.c_next: - return x.label + for constraint in self.bto: + if constraint.c_t == AsmConstraint.c_next: + return constraint.loc_key return None @staticmethod @@ -279,7 +280,7 @@ class AsmBlock(object): # destination -> associated constraints dests = {} for constraint in self.bto: - dests.setdefault(constraint.label, set()).add(constraint) + dests.setdefault(constraint.loc_key, set()).add(constraint) self.bto = set(self._filter_constraint(constraints) for constraints in dests.itervalues()) @@ -287,9 +288,9 @@ class AsmBlock(object): class asm_bloc(object): - def __init__(self, label, alignment=1): + def __init__(self, loc_key, alignment=1): warnings.warn('DEPRECATION WARNING: use "AsmBlock" instead of "asm_bloc"') - super(asm_bloc, self).__init__(label, alignment) + super(asm_bloc, self).__init__(loc_key, alignment) class AsmBlockBad(AsmBlock): @@ -297,23 +298,35 @@ class AsmBlockBad(AsmBlock): """Stand for a *bad* ASM block (malformed, unreachable, not disassembled, ...)""" - ERROR_TYPES = {-1: "Unknown error", - 0: "Unable to disassemble", - 1: "Null starting block", - 2: "Address forbidden by dont_dis", - } - def __init__(self, label=None, alignment=1, errno=-1, *args, **kwargs): + ERROR_UNKNOWN = -1 + ERROR_CANNOT_DISASM = 0 + ERROR_NULL_STARTING_BLOCK = 1 + ERROR_FORBIDDEN = 2 + ERROR_IO = 3 + + + ERROR_TYPES = { + ERROR_UNKNOWN: "Unknown error", + ERROR_CANNOT_DISASM: "Unable to disassemble", + ERROR_NULL_STARTING_BLOCK: "Null starting block", + ERROR_FORBIDDEN: "Address forbidden by dont_dis", + ERROR_IO: "IOError", + } + + def __init__(self, loc_key=None, alignment=1, errno=ERROR_UNKNOWN, *args, **kwargs): """Instanciate an AsmBlock_bad. - @label, @alignement: same as AsmBlock.__init__ + @loc_key, @alignement: same as AsmBlock.__init__ @errno: (optional) specify a error type associated with the block """ - super(AsmBlockBad, self).__init__(label, alignment, *args, **kwargs) + super(AsmBlockBad, self).__init__(loc_key, alignment, *args, **kwargs) self._errno = errno + errno = property(lambda self: self._errno) + def __str__(self): error_txt = self.ERROR_TYPES.get(self._errno, self._errno) - return "\n".join([str(self.label), + return "\n".join([str(self.loc_key), "\tBad block: %s" % error_txt]) def addline(self, *args, **kwargs): @@ -328,157 +341,21 @@ class AsmBlockBad(AsmBlock): class asm_block_bad(AsmBlockBad): - def __init__(self, label=None, alignment=1, errno=-1, *args, **kwargs): + def __init__(self, loc_key=None, alignment=1, errno=-1, *args, **kwargs): warnings.warn('DEPRECATION WARNING: use "AsmBlockBad" instead of "asm_block_bad"') - super(asm_block_bad, self).__init__(label, alignment, *args, **kwargs) - - -class AsmSymbolPool(object): - """ - Store symbols. + super(asm_block_bad, self).__init__(loc_key, alignment, *args, **kwargs) - A symbol links a name to an (optional) offset - - Rules and limitations: - - two different symbols cannot have the same offset - - two different symbols cannot have the same name - - symbols manipulation (comparison, creation ...) can only be done on - symbols generated by the same symbol pool - """ - - def __init__(self): - self._labels = set() - self._name2label = {} - self._offset2label = {} - self._label_num = 0 - - def add_label(self, name, offset=None): - """ - Create and add a label to the symbol_pool - @name: label's name - @offset: (optional) label's offset - """ - label = AsmLabel(name, offset) - - # Test for collisions - if (label.offset in self._offset2label and - label != self._offset2label[label.offset]): - raise ValueError('symbol %s has same offset as %s' % - (label, self._offset2label[label.offset])) - if (label.name in self._name2label and - label != self._name2label[label.name]): - raise ValueError('symbol %s has same name as %s' % - (label, self._name2label[label.name])) - - self._labels.add(label) - if label.offset is not None: - self._offset2label[label.offset] = label - if label.name != "": - self._name2label[label.name] = label - return label - - def remove_label(self, label): - """ - Delete a @label - """ - self._name2label.pop(label.name, None) - self._offset2label.pop(label.offset, None) - if label in self._labels: - self._labels.remove(label) - - def del_label_offset(self, label): - """Unpin the @label from its offset""" - self._offset2label.pop(label.offset, None) - label.offset = None - - def getby_offset(self, offset): - """Retrieve label using its @offset""" - return self._offset2label.get(offset, None) - - def getby_name(self, name): - """Retrieve label using its @name""" - return self._name2label.get(name, None) - - def getby_name_create(self, name): - """Get a label from its @name, create it if it doesn't exist""" - label = self.getby_name(name) - if label is None: - label = self.add_label(name) - return label - - def getby_offset_create(self, offset): - """Get a label from its @offset, create it if it doesn't exist""" - label = self.getby_offset(offset) - if label is None: - label = self.add_label(offset, offset) - return label - - def rename_label(self, label, newname): - """Rename the @label name to @newname""" - if newname in self._name2label: - raise ValueError('Symbol already known') - self._name2label.pop(label.name, None) - label.name = newname - self._name2label[label.name] = label - - def set_offset(self, label, offset): - """Pin the @label from at @offset - Note that there is a special case when the offset is a list - it happens when offsets are recomputed in resolve_symbol* - """ - if label is None: - raise ValueError('label should not be None') - if not label.name in self._name2label: - raise ValueError('label %s not in symbol pool' % label) - if offset is not None and offset in self._offset2label: - raise ValueError('Conflict in label %s' % label) - self._offset2label.pop(label.offset, None) - label.offset = offset - if is_int(label.offset): - self._offset2label[label.offset] = label - - @property - def labels(self): - """Return all labels""" - return self._labels - - @property - def items(self): - """Return all labels""" - warnings.warn('DEPRECATION WARNING: use "labels" instead of "items"') - return list(self._labels) - - - def __str__(self): - return reduce(lambda x, y: x + str(y) + '\n', self._labels, "") - - def __getitem__(self, item): - if item in self._name2label: - return self._name2label[item] - if item in self._offset2label: - return self._offset2label[item] - raise KeyError('unknown symbol %r' % item) - - def __contains__(self, item): - return item in self._name2label or item in self._offset2label - - def merge(self, symbol_pool): - """Merge with another @symbol_pool""" - self._labels.update(symbol_pool.labels) - self._name2label.update(symbol_pool._name2label) - self._offset2label.update(symbol_pool._offset2label) - - def gen_label(self): - """Generate a new unpinned label""" - label = self.add_label("lbl_gen_%.8X" % (self._label_num)) - self._label_num += 1 - return label +class AsmSymbolPool(LocationDB): + """[DEPRECATED API] use 'LocationDB' instead""" + def __init__(self, *args, **kwargs): + warnings.warn("Deprecated API, use 'LocationDB' instead") + super(AsmSymbolPool, self).__init__(*args, **kwargs) class asm_symbol_pool(AsmSymbolPool): def __init__(self): - warnings.warn('DEPRECATION WARNING: use "AsmSymbolPool" instead of "asm_symbol_pool"') + warnings.warn('DEPRECATION WARNING: use "LocationDB" instead of "asm_symbol_pool"') super(asm_symbol_pool, self).__init__() @@ -491,7 +368,7 @@ class AsmCFG(DiGraph): Specialized the .dot export and force the relation between block to be uniq, and associated with a constraint. - Offer helpers on AsmCFG management, such as research by label, sanity + Offer helpers on AsmCFG management, such as research by loc_key, sanity checking and mnemonic size guessing. """ @@ -499,14 +376,23 @@ class AsmCFG(DiGraph): AsmCFGPending = namedtuple("AsmCFGPending", ["waiter", "constraint"]) - def __init__(self, *args, **kwargs): + def __init__(self, loc_db=None, *args, **kwargs): super(AsmCFG, self).__init__(*args, **kwargs) # Edges -> constraint self.edges2constraint = {} - # Expected AsmLabel -> set( (src, dst), constraint ) + # Expected LocKey -> set( (src, dst), constraint ) self._pendings = {} - # Label2block built on the fly - self._label2block = {} + # Loc_Key2block built on the fly + self._loc_key_to_block = {} + # loc_db + self.loc_db = loc_db + + + def copy(self): + """Copy the current graph instance""" + graph = self.__class__(self.loc_db) + return graph + self + # Compatibility with old list API def append(self, *args, **kwargs): @@ -518,121 +404,170 @@ class AsmCFG(DiGraph): def __getitem__(self, *args, **kwargs): raise DeprecationWarning("Order of AsmCFG elements is not reliable") + def __contains__(self, _): + """ + DEPRECATED. Use: + - loc_key in AsmCFG.nodes() to test loc_key existence + """ + raise RuntimeError("DEPRECATED") + def __iter__(self): - """Iterator on AsmBlock composing the current graph""" - return iter(self._nodes) + """ + DEPRECATED. Use: + - AsmCFG.blocks() to iter on blocks + - loc_key in AsmCFG.nodes() to test loc_key existence + """ + raise RuntimeError("DEPRECATED") def __len__(self): """Return the number of blocks in AsmCFG""" return len(self._nodes) + blocks = property(lambda x:x._loc_key_to_block.itervalues()) + # Manage graph with associated constraints def add_edge(self, src, dst, constraint): """Add an edge to the graph - @src: AsmBlock instance, source - @dst: AsmBlock instance, destination + @src: LocKey instance, source + @dst: LocKey instance, destination @constraint: constraint associated to this edge """ # Sanity check - assert (src, dst) not in self.edges2constraint + assert isinstance(src, LocKey) + assert isinstance(dst, LocKey) + known_cst = self.edges2constraint.get((src, dst), None) + if known_cst is not None: + assert known_cst == constraint + return # Add the edge to src.bto if needed - if dst.label not in [cons.label for cons in src.bto]: - src.bto.add(AsmConstraint(dst.label, constraint)) + block_src = self.loc_key_to_block(src) + if block_src: + if dst not in [cons.loc_key for cons in block_src.bto]: + block_src.bto.add(AsmConstraint(dst, constraint)) # Add edge self.edges2constraint[(src, dst)] = constraint super(AsmCFG, self).add_edge(src, dst) def add_uniq_edge(self, src, dst, constraint): - """Add an edge from @src to @dst if it doesn't already exist""" - if (src not in self._nodes_succ or - dst not in self._nodes_succ[src]): - self.add_edge(src, dst, constraint) + """ + Synonym for `add_edge` + """ + self.add_edge(src, dst, constraint) def del_edge(self, src, dst): """Delete the edge @src->@dst and its associated constraint""" + src_blk = self.loc_key_to_block(src) + dst_blk = self.loc_key_to_block(dst) + assert src_blk is not None + assert dst_blk is not None # Delete from src.bto - to_remove = [cons for cons in src.bto if cons.label == dst.label] + to_remove = [cons for cons in src_blk.bto if cons.loc_key == dst] if to_remove: assert len(to_remove) == 1 - src.bto.remove(to_remove[0]) + src_blk.bto.remove(to_remove[0]) # Del edge del self.edges2constraint[(src, dst)] super(AsmCFG, self).del_edge(src, dst) - def add_node(self, block): - """Add the block @block to the current instance, if it is not already in + def del_block(self, block): + super(AsmCFG, self).del_node(block.loc_key) + del self._loc_key_to_block[block.loc_key] + + + def add_node(self, node): + assert isinstance(node, LocKey) + return super(AsmCFG, self).add_node(node) + + def add_block(self, block): + """ + Add the block @block to the current instance, if it is not already in @block: AsmBlock instance Edges will be created for @block.bto, if destinations are already in this instance. If not, they will be resolved when adding these aforementionned destinations. `self.pendings` indicates which blocks are not yet resolved. + """ - status = super(AsmCFG, self).add_node(block) + status = super(AsmCFG, self).add_node(block.loc_key) + if not status: return status # Update waiters - if block.label in self._pendings: - for bblpend in self._pendings[block.label]: - self.add_edge(bblpend.waiter, block, bblpend.constraint) - del self._pendings[block.label] + if block.loc_key in self._pendings: + for bblpend in self._pendings[block.loc_key]: + self.add_edge(bblpend.waiter.loc_key, block.loc_key, bblpend.constraint) + del self._pendings[block.loc_key] # Synchronize edges with block destinations - self._label2block[block.label] = block + self._loc_key_to_block[block.loc_key] = block + for constraint in block.bto: - dst = self._label2block.get(constraint.label, - None) + dst = self._loc_key_to_block.get(constraint.loc_key, + None) if dst is None: # Block is yet unknown, add it to pendings to_add = self.AsmCFGPending(waiter=block, constraint=constraint.c_t) - self._pendings.setdefault(constraint.label, + self._pendings.setdefault(constraint.loc_key, set()).add(to_add) else: # Block is already in known nodes - self.add_edge(block, dst, constraint.c_t) + self.add_edge(block.loc_key, dst.loc_key, constraint.c_t) return status - def del_node(self, block): - super(AsmCFG, self).del_node(block) - del self._label2block[block.label] - def merge(self, graph): """Merge with @graph, taking in account constraints""" - # -> add_edge(x, y, constraint) - for node in graph._nodes: + # Add known blocks + for block in graph.blocks: + self.add_block(block) + # Add nodes not already in it (ie. not linked to a block) + for node in graph.nodes(): self.add_node(node) + # -> add_edge(x, y, constraint) for edge in graph._edges: - # Use "_uniq_" beacause the edge can already exist due to add_node - self.add_uniq_edge(*edge, constraint=graph.edges2constraint[edge]) + # May fail if there is an incompatibility in edges constraints + # between the two graphs + self.add_edge(*edge, constraint=graph.edges2constraint[edge]) + def node2lines(self, node): - yield self.DotCellDescription(text=str(node.label.name), + if self.loc_db is None: + loc_key_name = str(node) + else: + loc_key_name = self.loc_db.pretty_str(node) + yield self.DotCellDescription(text=loc_key_name, attr={'align': 'center', 'colspan': 2, 'bgcolor': 'grey'}) - - if isinstance(node, AsmBlockBad): - yield [self.DotCellDescription( - text=node.ERROR_TYPES.get(node._errno, - node._errno), - attr={})] + block = self._loc_key_to_block.get(node, None) + if block is None: raise StopIteration - for line in node.lines: + if isinstance(block, AsmBlockBad): + yield [ + self.DotCellDescription( + text=block.ERROR_TYPES.get(block._errno, + block._errno + ), + attr={}) + ] + raise StopIteration + for line in block.lines: if self._dot_offset: yield [self.DotCellDescription(text="%.8X" % line.offset, attr={}), - self.DotCellDescription(text=str(line), attr={})] + self.DotCellDescription(text=line.to_string(self.loc_db), attr={})] else: - yield self.DotCellDescription(text=str(line), attr={}) + yield self.DotCellDescription(text=line.to_string(self.loc_db), attr={}) def node_attr(self, node): - if isinstance(node, AsmBlockBad): + block = self._loc_key_to_block.get(node, None) + if isinstance(block, AsmBlockBad): return {'style': 'filled', 'fillcolor': 'red'} return {} @@ -658,22 +593,19 @@ class AsmCFG(DiGraph): # Helpers @property def pendings(self): - """Dictionary of label -> set(AsmCFGPending instance) indicating - which label are missing in the current instance. - A label is missing if a block which is already in nodes has constraints + """Dictionary of loc_key -> set(AsmCFGPending instance) indicating + which loc_key are missing in the current instance. + A loc_key is missing if a block which is already in nodes has constraints with him (thanks to its .bto) and the corresponding block is not yet in nodes """ return self._pendings - def _build_label2block(self): - self._label2block = {block.label: block - for block in self._nodes} - - def label2block(self, label): - """Return the block corresponding to label @label - @label: AsmLabel instance or ExprId(AsmLabel) instance""" - return self._label2block[label] + def label2block(self, loc_key): + """Return the block corresponding to loc_key @loc_key + @loc_key: LocKey instance""" + warnings.warn('DEPRECATION WARNING: use "loc_key_to_block" instead of "label2block"') + return self.loc_key_to_block(loc_key) def rebuild_edges(self): """Consider blocks '.bto' and rebuild edges according to them, ie: @@ -684,20 +616,25 @@ class AsmCFG(DiGraph): This method should be called if a block's '.bto' in nodes have been modified without notifying this instance to resynchronize edges. """ - self._build_label2block() - for block in self._nodes: + for block in self.blocks: edges = [] # Rebuild edges from bto for constraint in block.bto: - dst = self._label2block.get(constraint.label, - None) + dst = self._loc_key_to_block.get(constraint.loc_key, + None) if dst is None: # Missing destination, add to pendings - self._pendings.setdefault(constraint.label, - set()).add(self.AsmCFGPending(block, - constraint.c_t)) + self._pendings.setdefault( + constraint.loc_key, + set() + ).add( + self.AsmCFGPending( + block, + constraint.c_t + ) + ) continue - edge = (block, dst) + edge = (block.loc_key, dst.loc_key) edges.append(edge) if edge in self._edges: # Already known edge, constraint may have changed @@ -707,43 +644,52 @@ class AsmCFG(DiGraph): self.add_edge(edge[0], edge[1], constraint.c_t) # Remove useless edges - for succ in self.successors(block): - edge = (block, succ) + for succ in self.successors(block.loc_key): + edge = (block.loc_key, succ) if edge not in edges: self.del_edge(*edge) def get_bad_blocks(self): """Iterator on AsmBlockBad elements""" # A bad asm block is always a leaf - for block in self.leaves(): + for loc_key in self.leaves(): + block = self._loc_key_to_block.get(loc_key, None) if isinstance(block, AsmBlockBad): yield block def get_bad_blocks_predecessors(self, strict=False): - """Iterator on block with an AsmBlockBad destination - @strict: (optional) if set, return block with only bad + """Iterator on loc_keys with an AsmBlockBad destination + @strict: (optional) if set, return loc_key with only bad successors """ # Avoid returning the same block done = set() for badblock in self.get_bad_blocks(): - for predecessor in self.predecessors_iter(badblock): + for predecessor in self.predecessors_iter(badblock.loc_key): if predecessor not in done: if (strict and - not all(isinstance(block, AsmBlockBad) + not all(isinstance(self._loc_key_to_block.get(block, None), AsmBlockBad) for block in self.successors_iter(predecessor))): continue yield predecessor done.add(predecessor) def getby_offset(self, offset): - """Return block containing @offset""" - for block in self: + """Return asmblock containing @offset""" + for block in self.blocks: if block.lines[0].offset <= offset < \ (block.lines[-1].offset + block.lines[-1].l): return block return None + def loc_key_to_block(self, loc_key): + """ + Return the asmblock corresponding to loc_key @loc_key, None if unknown + loc_key + @loc_key: LocKey instance + """ + return self._loc_key_to_block.get(loc_key, None) + def sanity_check(self): """Do sanity checks on blocks' constraints: * no pendings @@ -752,33 +698,37 @@ class AsmCFG(DiGraph): """ if len(self._pendings) != 0: - raise RuntimeError("Some blocks are missing: %s" % map(str, - self._pendings.keys())) + raise RuntimeError("Some blocks are missing: %s" % map( + str, + self._pendings.keys() + )) next_edges = {edge: constraint for edge, constraint in self.edges2constraint.iteritems() if constraint == AsmConstraint.c_next} - for block in self._nodes: + for loc_key in self._nodes: + if loc_key not in self._loc_key_to_block: + raise RuntimeError("Not supported yet: every node must have a corresponding AsmBlock") # No next constraint to self - if (block, block) in next_edges: + if (loc_key, loc_key) in next_edges: raise RuntimeError('Bad constraint: self in next') # No multiple next constraint to same block - pred_next = list(pblock - for (pblock, dblock) in next_edges - if dblock == block) + pred_next = list(ploc_key + for (ploc_key, dloc_key) in next_edges + if dloc_key == loc_key) if len(pred_next) > 1: raise RuntimeError("Too many next constraints for bloc %r" - "(%s)" % (block.label, - map(lambda x: x.label, pred_next))) + "(%s)" % (loc_key, + pred_next)) def guess_blocks_size(self, mnemo): """Asm and compute max block size Add a 'size' and 'max_size' attribute on each block @mnemo: metamn instance""" - for block in self._nodes: + for block in self.blocks: size = 0 for instr in block.lines: if isinstance(instr, AsmRaw): @@ -812,24 +762,26 @@ class AsmCFG(DiGraph): block.max_size = size log_asmblock.info("size: %d max: %d", block.size, block.max_size) - def apply_splitting(self, symbol_pool, dis_block_callback=None, **kwargs): + def apply_splitting(self, loc_db, dis_block_callback=None, **kwargs): """Consider @self' bto destinations and split block in @self if one of these destinations jumps in the middle of this block. - In order to work, they must be only one block in @self per label in - @symbol_pool (which is true if @self come from the same disasmEngine). + In order to work, they must be only one block in @self per loc_key in + @loc_db (which is true if @self come from the same disasmEngine). - @symbol_pool: AsmSymbolPool instance associated with @self'labels + @loc_db: LocationDB instance associated with @self'loc_keys @dis_block_callback: (optional) if set, this callback will be called on new block destinations @kwargs: (optional) named arguments to pass to dis_block_callback """ # Get all possible destinations not yet resolved, with a resolved # offset - block_dst = [label.offset - for label in self.pendings - if label.offset is not None] + block_dst = [] + for loc_key in self.pendings: + offset = loc_db.get_location_offset(loc_key) + if offset is not None: + block_dst.append(offset) - todo = self.nodes().copy() + todo = set(self.blocks) rebuild_needed = False while todo: @@ -841,9 +793,9 @@ class AsmCFG(DiGraph): if not (off > range_start and off < range_stop): continue - # `cur_block` must be splitted at offset `off` - label = symbol_pool.getby_offset_create(off) - new_b = cur_block.split(off, label) + # `cur_block` must be splitted at offset `off`from miasm2.core.locationdb import LocationDB + + new_b = cur_block.split(loc_db, off) log_asmblock.debug("Split block %x", off) if new_b is None: log_asmblock.error("Cannot split %x!!", off) @@ -852,22 +804,24 @@ class AsmCFG(DiGraph): # Remove pending from cur_block # Links from new_b will be generated in rebuild_edges for dst in new_b.bto: - if dst.label not in self.pendings: + if dst.loc_key not in self.pendings: continue - self.pendings[dst.label] = set(pending for pending in self.pendings[dst.label] - if pending.waiter != cur_block) + self.pendings[dst.loc_key] = set(pending for pending in self.pendings[dst.loc_key] + if pending.waiter != cur_block) # The new block destinations may need to be disassembled if dis_block_callback: - offsets_to_dis = set(constraint.label.offset - for constraint in new_b.bto) + offsets_to_dis = set( + self.loc_db.get_location_offset(constraint.loc_key) + for constraint in new_b.bto + ) dis_block_callback(cur_bloc=new_b, offsets_to_dis=offsets_to_dis, - symbol_pool=symbol_pool, **kwargs) + loc_db=loc_db, **kwargs) # Update structure rebuild_needed = True - self.add_node(new_b) + self.add_block(new_b) # The new block must be considered todo.add(new_b) @@ -879,18 +833,18 @@ class AsmCFG(DiGraph): def __str__(self): out = [] - for node in self.nodes(): - out.append(str(node)) - for nodeA, nodeB in self.edges(): - out.append("%s -> %s" % (nodeA.label, nodeB.label)) + for block in self.blocks: + out.append(str(block)) + for loc_key_a, loc_key_b in self.edges(): + out.append("%s -> %s" % (loc_key_a, loc_key_b)) return '\n'.join(out) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, hex(id(self))) # Out of _merge_blocks to be computed only once -_acceptable_block = lambda block: (not isinstance(block, AsmBlockBad) and - len(block.lines) > 0) +_acceptable_block = lambda graph, loc_key: (not isinstance(graph.loc_key_to_block(loc_key), AsmBlockBad) and + len(graph.loc_key_to_block(loc_key).lines) > 0) _parent = MatchGraphJoker(restrict_in=False, filt=_acceptable_block) _son = MatchGraphJoker(restrict_out=False, filt=_acceptable_block) _expgraph = _parent >> _son @@ -906,7 +860,9 @@ def _merge_blocks(dg, graph): for match in _expgraph.match(graph): # Get matching blocks - block, succ = match[_parent], match[_son] + lbl_block, lbl_succ = match[_parent], match[_son] + block = graph.loc_key_to_block(lbl_block) + succ = graph.loc_key_to_block(lbl_succ) # Ignore already deleted blocks if (block in to_ignore or @@ -926,11 +882,11 @@ def _merge_blocks(dg, graph): # Merge block block.lines += succ.lines - for nextb in graph.successors_iter(succ): - graph.add_edge(block, nextb, graph.edges2constraint[(succ, nextb)]) + for nextb in graph.successors_iter(lbl_succ): + graph.add_edge(lbl_block, nextb, graph.edges2constraint[(lbl_succ, nextb)]) - graph.del_node(succ) - to_ignore.add(succ) + graph.del_block(succ) + to_ignore.add(lbl_succ) bbl_simplifier = DiGraphSimplifier() @@ -959,34 +915,40 @@ def conservative_asm(mnemo, instr, symbols, conservative): def fix_expr_val(expr, symbols): """Resolve an expression @expr using @symbols""" def expr_calc(e): - if isinstance(e, m2_expr.ExprId): - s = symbols._name2label[e.name] - e = m2_expr.ExprInt(s.offset, e.size) + if isinstance(e, ExprId): + # Example: + # toto: + # .dword label + loc_key = symbols.get_name_location(e.name) + offset = symbols.get_location_offset(loc_key) + e = ExprInt(offset, e.size) return e result = expr.visit(expr_calc) result = expr_simp(result) - if not isinstance(result, m2_expr.ExprInt): + if not isinstance(result, ExprInt): raise RuntimeError('Cannot resolve symbol %s' % expr) return result -def fix_label_offset(symbol_pool, label, offset, modified): - """Fix the @label offset to @offset. If the @offset has changed, add @label +def fix_loc_offset(loc_db, loc_key, offset, modified): + """ + Fix the @loc_key offset to @offset. If the @offset has changed, add @loc_key to @modified - @symbol_pool: current symbol_pool + @loc_db: current loc_db """ - if label.offset == offset: + loc_offset = loc_db.get_location_offset(loc_key) + if loc_offset == offset: return - symbol_pool.set_offset(label, offset) - modified.add(label) + loc_db.set_location_offset(loc_key, offset, force=True) + modified.add(loc_key) class BlockChain(object): """Manage blocks linked with an asm_constraint_next""" - def __init__(self, symbol_pool, blocks): - self.symbol_pool = symbol_pool + def __init__(self, loc_db, blocks): + self.loc_db = loc_db self.blocks = blocks self.place() @@ -998,7 +960,8 @@ class BlockChain(object): def _set_pinned_block_idx(self): self.pinned_block_idx = None for i, block in enumerate(self.blocks): - if is_int(block.label.offset): + loc_key = block.loc_key + if self.loc_db.get_location_offset(loc_key) is not None: if self.pinned_block_idx is not None: raise ValueError("Multiples pinned block detected") self.pinned_block_idx = i @@ -1016,7 +979,8 @@ class BlockChain(object): if not self.pinned: return - offset_base = self.blocks[self.pinned_block_idx].label.offset + loc = self.blocks[self.pinned_block_idx].loc_key + offset_base = self.loc_db.get_location_offset(loc) assert(offset_base % self.blocks[self.pinned_block_idx].alignment == 0) self.offset_min = offset_base @@ -1036,48 +1000,48 @@ class BlockChain(object): self.place() return [self] - def fix_blocks(self, modified_labels): + def fix_blocks(self, modified_loc_keys): """Propagate a pinned to its blocks' neighbour - @modified_labels: store new pinned labels""" + @modified_loc_keys: store new pinned loc_keys""" if not self.pinned: raise ValueError('Trying to fix unpinned block') # Propagate offset to blocks before pinned block pinned_block = self.blocks[self.pinned_block_idx] - offset = pinned_block.label.offset + offset = self.loc_db.get_location_offset(pinned_block.loc_key) if offset % pinned_block.alignment != 0: raise RuntimeError('Bad alignment') for block in self.blocks[:self.pinned_block_idx - 1:-1]: new_offset = offset - block.size new_offset = new_offset - new_offset % pinned_block.alignment - fix_label_offset(self.symbol_pool, - block.label, - new_offset, - modified_labels) + fix_loc_offset(self.loc_db, + block.loc_key, + new_offset, + modified_loc_keys) # Propagate offset to blocks after pinned block - offset = pinned_block.label.offset + pinned_block.size + offset = self.loc_db.get_location_offset(pinned_block.loc_key) + pinned_block.size last_block = pinned_block for block in self.blocks[self.pinned_block_idx + 1:]: offset += (- offset) % last_block.alignment - fix_label_offset(self.symbol_pool, - block.label, - offset, - modified_labels) + fix_loc_offset(self.loc_db, + block.loc_key, + offset, + modified_loc_keys) offset += block.size last_block = block - return modified_labels + return modified_loc_keys class BlockChainWedge(object): """Stand for wedges between blocks""" - def __init__(self, symbol_pool, offset, size): - self.symbol_pool = symbol_pool + def __init__(self, loc_db, offset, size): + self.loc_db = loc_db self.offset = offset self.max_size = size self.offset_min = offset @@ -1086,23 +1050,22 @@ class BlockChainWedge(object): def merge(self, chain): """Best effort merge two block chains Return the list of resulting blockchains""" - self.symbol_pool.set_offset(chain.blocks[0].label, self.offset_max) + self.loc_db.set_location_offset(chain.blocks[0].loc_key, self.offset_max) chain.place() return [self, chain] -def group_constrained_blocks(symbol_pool, blocks): +def group_constrained_blocks(loc_db, asmcfg): """ - Return the BlockChains list built from grouped asm blocks linked by + Return the BlockChains list built from grouped blocks in asmcfg linked by asm_constraint_next - @blocks: a list of asm block + @asmcfg: an AsmCfg instance """ log_asmblock.info('group_constrained_blocks') - # Group adjacent blocks - remaining_blocks = list(blocks) + # Group adjacent asmcfg + remaining_blocks = list(asmcfg.blocks) known_block_chains = {} - lbl2block = {block.label: block for block in blocks} while remaining_blocks: # Create a new block chain @@ -1111,10 +1074,10 @@ def group_constrained_blocks(symbol_pool, blocks): # Find sons in remainings blocks linked with a next constraint while True: # Get next block - next_label = block_list[-1].get_next() - if next_label is None or next_label not in lbl2block: + next_loc_key = block_list[-1].get_next() + if next_loc_key is None or asmcfg.loc_key_to_block(next_loc_key) is None: break - next_block = lbl2block[next_label] + next_block = asmcfg.loc_key_to_block(next_loc_key) # Add the block at the end of the current chain if next_block not in remaining_blocks: @@ -1123,15 +1086,15 @@ def group_constrained_blocks(symbol_pool, blocks): remaining_blocks.remove(next_block) # Check if son is in a known block group - if next_label is not None and next_label in known_block_chains: - block_list += known_block_chains[next_label] - del known_block_chains[next_label] + if next_loc_key is not None and next_loc_key in known_block_chains: + block_list += known_block_chains[next_loc_key] + del known_block_chains[next_loc_key] - known_block_chains[block_list[0].label] = block_list + known_block_chains[block_list[0].loc_key] = block_list out_block_chains = [] - for label in known_block_chains: - chain = BlockChain(symbol_pool, known_block_chains[label]) + for loc_key in known_block_chains: + chain = BlockChain(loc_db, known_block_chains[loc_key]) out_block_chains.append(chain) return out_block_chains @@ -1151,7 +1114,7 @@ def get_blockchains_address_interval(blockChains, dst_interval): return allocated_interval -def resolve_symbol(blockChains, symbol_pool, dst_interval=None): +def resolve_symbol(blockChains, loc_db, dst_interval=None): """Place @blockChains in the @dst_interval""" log_asmblock.info('resolve_symbol') @@ -1169,7 +1132,7 @@ def resolve_symbol(blockChains, symbol_pool, dst_interval=None): # Add wedge in forbidden intervals for start, stop in forbidden_interval.intervals: wedge = BlockChainWedge( - symbol_pool, offset=start, size=stop + 1 - start) + loc_db, offset=start, size=stop + 1 - start) pinned_chains.append(wedge) # Try to place bigger blockChains first @@ -1198,28 +1161,22 @@ def resolve_symbol(blockChains, symbol_pool, dst_interval=None): return [chain for chain in fixed_chains if isinstance(chain, BlockChain)] -def filter_exprid_label(exprs): - """Extract labels from list of ExprId @exprs""" - return set(expr.name for expr in exprs if isinstance(expr.name, AsmLabel)) - - -def get_block_labels(block): - """Extract labels used by @block""" +def get_block_loc_keys(block): + """Extract loc_keys used by @block""" symbols = set() for instr in block.lines: if isinstance(instr, AsmRaw): if isinstance(instr.raw, list): for expr in instr.raw: - symbols.update(m2_expr.get_expr_ids(expr)) + symbols.update(get_expr_locs(expr)) else: for arg in instr.args: - symbols.update(m2_expr.get_expr_ids(arg)) - labels = filter_exprid_label(symbols) - return labels + symbols.update(get_expr_locs(arg)) + return symbols -def assemble_block(mnemo, block, symbol_pool, conservative=False): - """Assemble a @block using @symbol_pool +def assemble_block(mnemo, block, loc_db, conservative=False): + """Assemble a @block using @loc_db @conservative: (optional) use original bytes when possible """ offset_i = 0 @@ -1230,7 +1187,7 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): # Fix special AsmRaw data = "" for expr in instr.raw: - expr_int = fix_expr_val(expr, symbol_pool) + expr_int = fix_expr_val(expr, loc_db) data += pck[expr_int.size](expr_int.arg) instr.data = data @@ -1240,16 +1197,16 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): # Assemble an instruction saved_args = list(instr.args) - instr.offset = block.label.offset + offset_i + instr.offset = loc_db.get_location_offset(block.loc_key) + offset_i # Replace instruction's arguments by resolved ones - instr.args = instr.resolve_args_with_symbols(symbol_pool) + instr.args = instr.resolve_args_with_symbols(loc_db) if instr.dstflow(): instr.fixDstOffset() old_l = instr.l - cached_candidate, _ = conservative_asm(mnemo, instr, symbol_pool, + cached_candidate, _ = conservative_asm(mnemo, instr, loc_db, conservative) # Restore original arguments @@ -1263,19 +1220,19 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): offset_i += instr.l -def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): - """Resolve and assemble @blockChains using @symbol_pool until fixed point is +def asmblock_final(mnemo, asmcfg, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is reached""" log_asmblock.debug("asmbloc_final") # Init structures - lbl2block = {block.label: block for block in blocks} - blocks_using_label = {} - for block in blocks: - labels = get_block_labels(block) - for label in labels: - blocks_using_label.setdefault(label, set()).add(block) + blocks_using_loc_key = {} + for block in asmcfg.blocks: + exprlocs = get_block_loc_keys(block) + loc_keys = set(expr.loc_key for expr in exprlocs) + for loc_key in loc_keys: + blocks_using_loc_key.setdefault(loc_key, set()).add(block) block2chain = {} for chain in blockChains: @@ -1283,25 +1240,26 @@ def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): block2chain[block] = chain # Init worklist - blocks_to_rework = set(blocks) + blocks_to_rework = set(asmcfg.blocks) # Fix and re-assemble blocks until fixed point is reached while True: # Propagate pinned blocks into chains - modified_labels = set() + modified_loc_keys = set() for chain in blockChains: - chain.fix_blocks(modified_labels) + chain.fix_blocks(modified_loc_keys) - for label in modified_labels: + for loc_key in modified_loc_keys: # Retrive block with modified reference - if label in lbl2block: - blocks_to_rework.add(lbl2block[label]) + mod_block = asmcfg.loc_key_to_block(loc_key) + if mod_block is not None: + blocks_to_rework.add(mod_block) - # Enqueue blocks referencing a modified label - if label not in blocks_using_label: + # Enqueue blocks referencing a modified loc_key + if loc_key not in blocks_using_loc_key: continue - for block in blocks_using_label[label]: + for block in blocks_using_loc_key[loc_key]: blocks_to_rework.add(block) # No more work @@ -1310,33 +1268,36 @@ def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): while blocks_to_rework: block = blocks_to_rework.pop() - assemble_block(mnemo, block, symbol_pool, conservative) + assemble_block(mnemo, block, loc_db, conservative) -def asmbloc_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): - """Resolve and assemble @blockChains using @symbol_pool until fixed point is +def asmbloc_final(mnemo, blocks, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is reached""" warnings.warn('DEPRECATION WARNING: use "asmblock_final" instead of "asmbloc_final"') - asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative) + asmblock_final(mnemo, blocks, blockChains, loc_db, conservative) -def asm_resolve_final(mnemo, blocks, symbol_pool, dst_interval=None): - """Resolve and assemble @blocks using @symbol_pool into interval +def asm_resolve_final(mnemo, asmcfg, loc_db, dst_interval=None): + """Resolve and assemble @asmcfg using @loc_db into interval @dst_interval""" - blocks.sanity_check() + asmcfg.sanity_check() - blocks.guess_blocks_size(mnemo) - blockChains = group_constrained_blocks(symbol_pool, blocks) + asmcfg.guess_blocks_size(mnemo) + blockChains = group_constrained_blocks(loc_db, asmcfg) resolved_blockChains = resolve_symbol( - blockChains, symbol_pool, dst_interval) + blockChains, + loc_db, + dst_interval + ) - asmblock_final(mnemo, blocks, resolved_blockChains, symbol_pool) + asmblock_final(mnemo, asmcfg, resolved_blockChains, loc_db) patches = {} output_interval = interval() - for block in blocks: - offset = block.label.offset + for block in asmcfg.blocks: + offset = loc_db.get_location_offset(block.loc_key) for instr in block.lines: if not instr.data: # Empty line @@ -1375,7 +1336,7 @@ class disasmEngine(object): - blocs_wd: maximum number of distinct disassembled block + callback(arch, attrib, pool_bin, cur_bloc, offsets_to_dis, - symbol_pool) + loc_db) - dis_block_callback: callback after each new disassembled block """ @@ -1389,7 +1350,7 @@ class disasmEngine(object): self.arch = arch self.attrib = attrib self.bin_stream = bin_stream - self.symbol_pool = AsmSymbolPool() + self.loc_db = LocationDB() # Setup options self.dont_dis = [] @@ -1421,6 +1382,10 @@ class disasmEngine(object): warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""") self.dis_block_callback = function + @property + def symbol_pool(self): + warnings.warn("""DEPRECATION WARNING: use 'loc_db'""") + return self.loc_db # Deprecated job_done = property(get_job_done, set_job_done) @@ -1439,8 +1404,8 @@ class disasmEngine(object): delayslot_count = self.arch.delayslot offsets_to_dis = set() add_next_offset = False - label = self.symbol_pool.getby_offset_create(offset) - cur_block = AsmBlock(label) + loc_key = self.loc_db.get_or_create_offset_location(offset) + cur_block = AsmBlock(loc_key) log_asmblock.debug("dis at %X", int(offset)) while not in_delayslot or delayslot_count > 0: if in_delayslot: @@ -1450,17 +1415,17 @@ class disasmEngine(object): if not cur_block.lines: job_done.add(offset) # Block is empty -> bad block - cur_block = AsmBlockBad(label, errno=2) + cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_FORBIDDEN) else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - cur_block.add_cst(offset, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break if lines_cpt > 0 and offset in self.split_dis: - cur_block.add_cst(offset, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(offset) break @@ -1470,28 +1435,35 @@ class disasmEngine(object): break if offset in job_done: - cur_block.add_cst(offset, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break off_i = offset + error = None try: instr = self.arch.dis(self.bin_stream, self.attrib, offset) - except (Disasm_Exception, IOError), e: + except Disasm_Exception as e: + log_asmblock.warning(e) + instr = None + error = AsmBlockBad.ERROR_CANNOT_DISASM + except IOError as e: log_asmblock.warning(e) instr = None + error = AsmBlockBad.ERROR_IO + if instr is None: log_asmblock.warning("cannot disasm at %X", int(off_i)) if not cur_block.lines: job_done.add(offset) # Block is empty -> bad block - cur_block = AsmBlockBad(label, errno=0) + cur_block = AsmBlockBad(loc_key, errno=error) else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - cur_block.add_cst(off_i, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break # XXX TODO nul start block option @@ -1499,12 +1471,12 @@ class disasmEngine(object): log_asmblock.warning("reach nul instr at %X", int(off_i)) if not cur_block.lines: # Block is empty -> bad block - cur_block = AsmBlockBad(label, errno=1) + cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_NULL_STARTING_BLOCK) else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - cur_block.add_cst(off_i, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break # special case: flow graph modificator in delayslot @@ -1525,31 +1497,32 @@ class disasmEngine(object): # test split if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): add_next_offset = True - pass if instr.dstflow(): - instr.dstflow2label(self.symbol_pool) - dst = instr.getdstflow(self.symbol_pool) - dstn = [] - for d in dst: - if isinstance(d, m2_expr.ExprId) and \ - isinstance(d.name, AsmLabel): - dstn.append(d.name) - if d.name.offset in self.dont_dis_retcall_funcs: - add_next_offset = False - dst = dstn + instr.dstflow2label(self.loc_db) + destinations = instr.getdstflow(self.loc_db) + known_dsts = [] + for dst in destinations: + if not dst.is_loc(): + continue + loc_key = dst.loc_key + loc_key_offset = self.loc_db.get_location_offset(loc_key) + known_dsts.append(loc_key) + if loc_key_offset in self.dont_dis_retcall_funcs: + add_next_offset = False if (not instr.is_subcall()) or self.follow_call: - cur_block.bto.update( - [AsmConstraint(x, AsmConstraint.c_to) for x in dst]) + cur_block.bto.update([AsmConstraint(loc_key, AsmConstraint.c_to) for loc_key in known_dsts]) # get in delayslot mode in_delayslot = True delayslot_count = instr.delayslot for c in cur_block.bto: - offsets_to_dis.add(c.label.offset) + loc_key_offset = self.loc_db.get_location_offset(c.loc_key) + offsets_to_dis.add(loc_key_offset) if add_next_offset: - cur_block.add_cst(offset, AsmConstraint.c_next, self.symbol_pool) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(offset) # Fix multiple constraints @@ -1559,7 +1532,9 @@ class disasmEngine(object): self.dis_block_callback(mn=self.arch, attrib=self.attrib, pool_bin=self.bin_stream, cur_bloc=cur_block, offsets_to_dis=offsets_to_dis, - symbol_pool=self.symbol_pool) + loc_db=self.loc_db, + # Deprecated API + symbol_pool=self.loc_db) return cur_block, offsets_to_dis def dis_block(self, offset): @@ -1589,7 +1564,7 @@ class disasmEngine(object): log_asmblock.info("dis bloc all") job_done = set() if blocks is None: - blocks = AsmCFG() + blocks = AsmCFG(self.loc_db) todo = [offset] bloc_cpt = 0 @@ -1605,9 +1580,9 @@ class disasmEngine(object): continue cur_block, nexts = self._dis_block(target_offset, job_done) todo += nexts - blocks.add_node(cur_block) + blocks.add_block(cur_block) - blocks.apply_splitting(self.symbol_pool, + blocks.apply_splitting(self.loc_db, dis_block_callback=self.dis_block_callback, mn=self.arch, attrib=self.attrib, pool_bin=self.bin_stream) diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index 6c3de8a7..1326d08b 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -8,13 +8,12 @@ from collections import defaultdict import pyparsing import miasm2.expression.expression as m2_expr -from miasm2.core import asmblock from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception from miasm2.expression.simplifications import expr_simp -from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstOp log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() @@ -672,7 +671,7 @@ class bs_swapargs(bs_divert): class m_arg(object): - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -682,11 +681,11 @@ class m_arg(object): except StopIteration: return None, None arg = v[0] - expr = self.asm_ast_to_expr(arg, symbol_pool) + expr = self.asm_ast_to_expr(arg, loc_db) self.expr = expr return start, stop - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): raise NotImplementedError("Virtual") @@ -709,7 +708,7 @@ class reg_noarg(object): reg_info = None parser = None - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -719,7 +718,7 @@ class reg_noarg(object): except StopIteration: return None, None arg = v[0] - expr = self.parses_to_expr(arg, symbol_pool) + expr = self.parses_to_expr(arg, loc_db) self.expr = expr return start, stop @@ -985,18 +984,24 @@ class instruction(object): self.mode = mode self.args = args self.additional_info = additional_info + self.offset = None + self.l = None + self.b = None def gen_args(self, args): out = ', '.join([str(x) for x in args]) return out def __str__(self): + return self.to_string() + + def to_string(self, loc_db=None): o = "%-10s " % self.name args = [] for i, arg in enumerate(self.args): if not isinstance(arg, m2_expr.Expr): raise ValueError('zarb arg type') - x = self.arg2str(arg, pos = i) + x = self.arg2str(arg, i, loc_db) args.append(x) o += self.gen_args(args) return o @@ -1011,40 +1016,40 @@ class instruction(object): if symbols is None: symbols = {} args_out = [] - for a in self.args: - e = a + for expr in self.args: # try to resolve symbols using symbols (0 for default value) - ids = m2_expr.get_expr_ids(e) - fixed_ids = {} - for x in ids: - if isinstance(x.name, asmblock.AsmLabel): - name = x.name.name - # special symbol $ - if name == '$': - fixed_ids[x] = self.get_asm_offset(x) - continue - if name == '_': - fixed_ids[x] = self.get_asm_next_offset(x) - continue - if not name in symbols: - raise ValueError('unresolved symbol! %r' % x) - else: - name = x.name - if not name in symbols: + loc_keys = m2_expr.get_expr_locs(expr) + fixed_expr = {} + for exprloc in loc_keys: + loc_key = exprloc.loc_key + names = symbols.get_location_names(loc_key) + # special symbols + if '$' in names: + fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue - if symbols[name].offset is None: - raise ValueError('The offset of label "%s" cannot be ' - 'determined' % name) + if '_' in names: + fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) + continue + if not names: + raise ValueError('Unresolved symbol: %r' % exprloc) + + offset = symbols.get_location_offset(loc_key) + if offset is None: + raise ValueError( + 'The offset of loc_key "%s" cannot be determined' % name + ) else: - size = x.size + # Fix symbol with its offset + size = exprloc.size if size is None: - default_size = self.get_symbol_size(x, symbols) + default_size = self.get_symbol_size(exprloc, symbols) size = default_size - value = m2_expr.ExprInt(symbols[name].offset, size) - fixed_ids[x] = value - e = e.replace_expr(fixed_ids) - e = expr_simp(e) - args_out.append(e) + value = m2_expr.ExprInt(offset, size) + fixed_expr[exprloc] = value + + expr = expr.replace_expr(fixed_expr) + expr = expr_simp(expr) + args_out.append(expr) return args_out def get_info(self, c): @@ -1275,7 +1280,7 @@ class cls_mn(object): return out[0] @classmethod - def fromstring(cls, text, symbol_pool, mode = None): + def fromstring(cls, text, loc_db, mode = None): global total_scans name = re.search('(\S+)', text).groups() if not name: @@ -1315,11 +1320,11 @@ class cls_mn(object): if start != 0: v, start, stop = [None], None, None if v != [None]: - v = f.asm_ast_to_expr(v[0], symbol_pool) + v = f.asm_ast_to_expr(v[0], loc_db) if v is None: v, start, stop = [None], None, None parsers[(i, start_i)][p] = v, start, stop - start, stop = f.fromstring(args_str, symbol_pool, parsers[(i, start_i)]) + start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True @@ -1524,12 +1529,12 @@ class cls_mn(object): def parse_prefix(self, v): return 0 - def set_dst_symbol(self, symbol_pool): - dst = self.getdstflow(symbol_pool) + def set_dst_symbol(self, loc_db): + dst = self.getdstflow(loc_db) args = [] for d in dst: if isinstance(d, m2_expr.ExprInt): - l = symbol_pool.getby_offset_create(int(d)) + l = loc_db.get_or_create_offset_location(int(d)) a = m2_expr.ExprId(l.name, d.size) else: @@ -1537,7 +1542,7 @@ class cls_mn(object): args.append(a) self.args_symb = args - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0].expr] @@ -1558,7 +1563,7 @@ class imm_noarg(object): return None return v - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] else: diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py index ce17fc75..d88f8721 100644 --- a/miasm2/core/graph.py +++ b/miasm2/core/graph.py @@ -715,11 +715,12 @@ class MatchGraphJoker(object): matched node must be the same than the joker node in the associated MatchGraph @restrict_out: (optional) counterpart of @restrict_in for successors - @filt: (optional) function(node) -> boolean for filtering candidate node + @filt: (optional) function(graph, node) -> boolean for filtering + candidate node @name: (optional) helper for displaying the current joker """ if filt is None: - filt = lambda node: True + filt = lambda graph, node: True self.filt = filt if name is None: name = str(id(self)) @@ -816,7 +817,7 @@ class MatchGraph(DiGraph): return False # Check lambda filtering - if not expected.filt(candidate): + if not expected.filt(graph, candidate): return False # Check arity diff --git a/miasm2/core/locationdb.py b/miasm2/core/locationdb.py new file mode 100644 index 00000000..39c1c99a --- /dev/null +++ b/miasm2/core/locationdb.py @@ -0,0 +1,453 @@ +import warnings + +from miasm2.expression.expression import LocKey, ExprLoc +from miasm2.expression.modint import moduint, modint + + +def is_int(a): + return isinstance(a, (int, long, moduint, modint)) + + +class LocationDB(object): + """ + LocationDB is a "database" of information associated to location. + + An entry in a LocationDB is uniquely identified with a LocKey. + Additionnal information which can be associated with a LocKey are: + - an offset (uniq per LocationDB) + - several names (each are uniqs per LocationDB) + + As a schema: + loc_key 1 <-> 0..1 offset + 1 <-> 0..n name + + >>> loc_db = LocationDB() + # Add a location with no additionnal information + >>> loc_key1 = loc_db.add_location() + # Add a location with an offset + >>> loc_key2 = loc_db.add_location(offset=0x1234) + # Add a location with several names + >>> loc_key3 = loc_db.add_location(name="first_name") + >>> loc_db.add_location_name(loc_key3, "second_name") + # Associate an offset to an existing location + >>> loc_db.set_location_offset(loc_key3, 0x5678) + # Remove a name from an existing location + >>> loc_db.remove_location_name(loc_key3, "second_name") + + # Get back offset + >>> loc_db.get_location_offset(loc_key1) + None + >>> loc_db.get_location_offset(loc_key2) + 0x1234 + + # Display a location + >>> loc_db.pretty_str(loc_key1) + loc_key_1 + >>> loc_db.pretty_str(loc_key2) + loc_1234 + >>> loc_db.pretty_str(loc_key3) + first_name + """ + + def __init__(self): + # Known LocKeys + self._loc_keys = set() + + # Association tables + self._loc_key_to_offset = {} + self._loc_key_to_names = {} + self._name_to_loc_key = {} + self._offset_to_loc_key = {} + + # Counter for new LocKey generation + self._loc_key_num = 0 + + def get_location_offset(self, loc_key): + """ + Return the offset of @loc_key if any, None otherwise. + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return self._loc_key_to_offset.get(loc_key) + + def get_location_names(self, loc_key): + """ + Return the frozenset of names associated to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return frozenset(self._loc_key_to_names.get(loc_key, set())) + + def get_name_location(self, name): + """ + Return the LocKey of @name if any, None otherwise. + @name: target name + """ + return self._name_to_loc_key.get(name) + + def get_or_create_name_location(self, name): + """ + Return the LocKey of @name if any, create one otherwise. + @name: target name + """ + loc_key = self._name_to_loc_key.get(name) + if loc_key is not None: + return loc_key + return self.add_location(name=name) + + def get_offset_location(self, offset): + """ + Return the LocKey of @offset if any, None otherwise. + @name: target offset + """ + return self._offset_to_loc_key.get(offset) + + def get_or_create_offset_location(self, offset): + """ + Return the LocKey of @offset if any, create one otherwise. + @offset: target offset + """ + loc_key = self._offset_to_loc_key.get(offset) + if loc_key is not None: + return loc_key + return self.add_location(offset=offset) + + def add_location_name(self, loc_key, name): + """Associate a name @name to a given @loc_key + @name: str instance + @loc_key: LocKey instance + """ + assert loc_key in self._loc_keys + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + self._loc_key_to_names.setdefault(loc_key, set()).add(name) + self._name_to_loc_key[name] = loc_key + + def remove_location_name(self, loc_key, name): + """Disassociate a name @name from a given @loc_key + Fail if @name is not already associated to @loc_key + @name: str instance + @loc_key: LocKey instance + """ + assert loc_key in self._loc_keys + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is None: + raise KeyError("%r is not already associated" % name) + if already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + del self._name_to_loc_key[name] + self._loc_key_to_names[loc_key].remove(name) + + def set_location_offset(self, loc_key, offset, force=False): + """Associate the offset @offset to an LocKey @loc_key + + If @force is set, override silently. Otherwise, if an offset is already + associated to @loc_key, an error will be raised + """ + assert loc_key in self._loc_keys + already_existing_loc = self.get_offset_location(offset) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (offset, already_existing_loc)) + already_existing_off = self._loc_key_to_offset.get(loc_key) + if (already_existing_off is not None and + already_existing_off != offset): + if not force: + raise ValueError( + "%r already has an offset (0x%x). Use 'force=True'" + " for silent overriding" % ( + loc_key, already_existing_off + )) + else: + self.unset_location_offset(loc_key) + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + def unset_location_offset(self, loc_key): + """Disassociate LocKey @loc_key's offset + + Fail if there is already no offset associate with it + @loc_key: LocKey + """ + assert loc_key in self._loc_keys + already_existing_off = self._loc_key_to_offset.get(loc_key) + if already_existing_off is None: + raise ValueError("%r already has no offset" % (loc_key)) + del self._offset_to_loc_key[already_existing_off] + del self._loc_key_to_offset[loc_key] + + def consistency_check(self): + """Ensure internal structures are consistent with each others""" + assert set(self._loc_key_to_names).issubset(self._loc_keys) + assert set(self._loc_key_to_offset).issubset(self._loc_keys) + assert self._loc_key_to_offset == {v: k for k, v in self._offset_to_loc_key.iteritems()} + assert reduce( + lambda x, y:x.union(y), + self._loc_key_to_names.itervalues(), + set(), + ) == set(self._name_to_loc_key) + for name, loc_key in self._name_to_loc_key.iteritems(): + assert name in self._loc_key_to_names[loc_key] + + def add_location(self, name=None, offset=None, strict=True): + """Add a new location in the locationDB. Returns the corresponding LocKey. + If @name is set, also associate a name to this new location. + If @offset is set, also associate an offset to this new location. + + Strict mode (set by @strict, default): + If a location with @offset or @name already exists, an error will be + raised. + Otherwise: + If a location with @offset or @name already exists, the corresponding + LocKey will be returned. + """ + + # Deprecation handling + if is_int(name): + assert offset is None or offset == name + warnings.warn("Deprecated API: use 'add_location(offset=)' instead." + " An additionnal 'name=' can be provided to also " + "associate a name (there is no more default name)") + offset = name + name = None + + # Argument cleaning + offset_loc_key = None + if offset is not None: + offset = int(offset) + offset_loc_key = self.get_offset_location(offset) + + # Test for collisions + name_loc_key = None + if name is not None: + name_loc_key = self.get_name_location(name) + + if strict: + if name_loc_key is not None: + raise ValueError("An entry for %r already exists (%r), and " + "strict mode is enabled" % ( + name, name_loc_key + )) + if offset_loc_key is not None: + raise ValueError("An entry for 0x%x already exists (%r), and " + "strict mode is enabled" % ( + offset, offset_loc_key + )) + else: + # Non-strict mode + if name_loc_key is not None: + known_offset = self.get_offset_location(name_loc_key) + if known_offset != offset: + raise ValueError( + "Location with name '%s' already have an offset: 0x%x " + "(!= 0x%x)" % (name, offset, known_offset) + ) + # Name already known, same offset -> nothing to do + return name_loc_key + + elif offset_loc_key is not None: + if name is not None: + # This is an error. Check for already known name are checked above + raise ValueError( + "Location with offset 0x%x already exists." + "To add a name to this location, use the dedicated API" + "'add_location_name(%r, %r)'" % ( + offset_loc_key, + name + )) + # Offset already known, no name specified + return offset_loc_key + + # No collision, this is a brand new location + loc_key = LocKey(self._loc_key_num) + self._loc_key_num += 1 + self._loc_keys.add(loc_key) + + if offset is not None: + assert offset not in self._offset_to_loc_key + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + if name is not None: + self._name_to_loc_key[name] = loc_key + self._loc_key_to_names[loc_key] = set([name]) + + return loc_key + + def remove_location(self, loc_key): + """ + Delete the location corresponding to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + if loc_key not in self._loc_keys: + raise KeyError("Unknown loc_key %r" % loc_key) + names = self._loc_key_to_names.pop(loc_key, []) + for name in names: + del self._name_to_loc_key[name] + offset = self._loc_key_to_offset.pop(loc_key, None) + self._offset_to_loc_key.pop(offset, None) + self._loc_keys.remove(loc_key) + + def pretty_str(self, loc_key): + """Return a human readable version of @loc_key, according to information + available in this LocationDB instance""" + names = self.get_location_names(loc_key) + if names: + return ",".join(names) + offset = self.get_location_offset(loc_key) + if offset is not None: + return "loc_%x" % offset + return str(loc_key) + + @property + def loc_keys(self): + """Return all loc_keys""" + return self._loc_keys + + @property + def names(self): + """Return all known names""" + return self._name_to_loc_key.keys() + + @property + def offsets(self): + """Return all known offsets""" + return self._offset_to_loc_key.keys() + + def __str__(self): + out = [] + for loc_key in self._loc_keys: + names = self.get_location_names(loc_key) + offset = self.get_location_offset(loc_key) + out.append("%s: %s - %s" % ( + loc_key, + "0x%x" % offset if offset is not None else None, + ",".join(names) + )) + return "\n".join(out) + + def merge(self, location_db): + """Merge with another LocationDB @location_db + + WARNING: old reference to @location_db information (such as LocKeys) + must be retrieved from the updated version of this instance. The + dedicated "get_*" APIs may be used for this task + """ + # A simple merge is not doable here, because LocKey will certainly + # collides + + for foreign_loc_key in location_db.loc_keys: + foreign_names = location_db.get_location_names(foreign_loc_key) + foreign_offset = location_db.get_location_offset(foreign_loc_key) + if foreign_names: + init_name = list(foreign_names)[0] + else: + init_name = None + loc_key = self.add_location(offset=foreign_offset, name=init_name, + strict=False) + cur_names = self.get_location_names(loc_key) + for name in foreign_names: + if name not in cur_names and name != init_name: + self.add_location_name(loc_key, name=name) + + def canonize_to_exprloc(self, expr): + """ + If expr is ExprInt, return ExprLoc with corresponding loc_key + Else, return expr + + @expr: Expr instance + """ + if expr.is_int(): + loc_key = self.get_or_create_offset_location(int(expr)) + ret = ExprLoc(loc_key, expr.size) + return ret + return expr + + # Deprecated APIs + @property + def items(self): + """Return all loc_keys""" + warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"') + return list(self._loc_keys) + + def __getitem__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location"') + if item in self._name_to_loc_key: + return self._name_to_loc_key[item] + if item in self._offset_to_loc_key: + return self._offset_to_loc_key[item] + raise KeyError('unknown symbol %r' % item) + + def __contains__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location", or ".offsets" or ".names"') + return item in self._name_to_loc_key or item in self._offset_to_loc_key + + def loc_key_to_name(self, loc_key): + """[DEPRECATED API], see 'get_location_names'""" + warnings.warn("Deprecated API: use 'get_location_names'") + return sorted(self.get_location_names(loc_key))[0] + + def loc_key_to_offset(self, loc_key): + """[DEPRECATED API], see 'get_location_offset'""" + warnings.warn("Deprecated API: use 'get_location_offset'") + return self.get_location_offset(loc_key) + + def remove_loc_key(self, loc_key): + """[DEPRECATED API], see 'remove_location'""" + warnings.warn("Deprecated API: use 'remove_location'") + self.remove_location(loc_key) + + def del_loc_key_offset(self, loc_key): + """[DEPRECATED API], see 'unset_location_offset'""" + warnings.warn("Deprecated API: use 'unset_location_offset'") + self.unset_location_offset(loc_key) + + def getby_offset(self, offset): + """[DEPRECATED API], see 'get_offset_location'""" + warnings.warn("Deprecated API: use 'get_offset_location'") + return self.get_offset_location(offset) + + def getby_name(self, name): + """[DEPRECATED API], see 'get_name_location'""" + warnings.warn("Deprecated API: use 'get_name_location'") + return self.get_name_location(name) + + def getby_offset_create(self, offset): + """[DEPRECATED API], see 'get_or_create_offset_location'""" + warnings.warn("Deprecated API: use 'get_or_create_offset_location'") + return self.get_or_create_offset_location(offset) + + def getby_name_create(self, name): + """[DEPRECATED API], see 'get_or_create_name_location'""" + warnings.warn("Deprecated API: use 'get_or_create_name_location'") + return self.get_or_create_name_location(name) + + def rename_location(self, loc_key, newname): + """[DEPRECATED API], see 'add_name_location' and 'remove_location_name' + """ + warnings.warn("Deprecated API: use 'add_location_name' and " + "'remove_location_name'") + for name in self.get_location_names(loc_key): + self.remove_location_name(loc_key, name) + self.add_location_name(loc_key, name) + + def set_offset(self, loc_key, offset): + """[DEPRECATED API], see 'set_location_offset'""" + warnings.warn("Deprecated API: use 'set_location_offset'") + self.set_location_offset(loc_key, offset, force=True) + + def gen_loc_key(self): + """[DEPRECATED API], see 'add_location'""" + warnings.warn("Deprecated API: use 'add_location'") + return self.add_location() + + def str_loc_key(self, loc_key): + """[DEPRECATED API], see 'pretty_str'""" + warnings.warn("Deprecated API: use 'pretty_str'") + return self.pretty_str(loc_key) diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index df419680..7efa17d0 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -1,10 +1,11 @@ #-*- coding:utf-8 -*- import re -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprOp, ExprLoc, \ + LocKey import miasm2.core.asmblock as asmblock from miasm2.core.cpu import instruction, base_expr -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstInt, AstId, AstOp declarator = {'byte': 8, 'word': 16, @@ -59,72 +60,47 @@ class DirectiveDontSplit(Directive): pass -def guess_next_new_label(symbol_pool): +def guess_next_new_label(loc_db): """Generate a new label - @symbol_pool: the AsmSymbolPool instance""" + @loc_db: the LocationDB instance""" i = 0 gen_name = "loc_%.8X" while True: name = gen_name % i - label = symbol_pool.getby_name(name) + label = loc_db.get_name_location(name) if label is None: - return symbol_pool.add_label(name) + return loc_db.add_location(name) i += 1 -def replace_expr_labels(expr, symbol_pool, replace_id): - """Create AsmLabel of the expression @expr in the @symbol_pool - Update @replace_id""" - - if not (isinstance(expr, m2_expr.ExprId) and - isinstance(expr.name, asmblock.AsmLabel)): - return expr - - old_lbl = expr.name - new_lbl = symbol_pool.getby_name_create(old_lbl.name) - replace_id[expr] = m2_expr.ExprId(new_lbl, expr.size) - return replace_id[expr] - - -def replace_orphan_labels(instr, symbol_pool): - """Link orphan labels used by @instr to the @symbol_pool""" - - for i, arg in enumerate(instr.args): - replace_id = {} - arg.visit(lambda e: replace_expr_labels(e, - symbol_pool, - replace_id)) - instr.args[i] = instr.args[i].replace_expr(replace_id) - - STATE_NO_BLOC = 0 STATE_IN_BLOC = 1 -def asm_ast_to_expr_with_size(arg, symbol_pool, size): +def asm_ast_to_expr_with_size(arg, loc_db, size): if isinstance(arg, AstId): - return m2_expr.ExprId(arg.name, size) + return ExprId(arg.name, size) if isinstance(arg, AstOp): - args = [asm_ast_to_expr_with_size(tmp, symbol_pool, size) for tmp in arg.args] - return m2_expr.ExprOp(arg.op, *args) + args = [asm_ast_to_expr_with_size(tmp, loc_db, size) for tmp in arg.args] + return ExprOp(arg.op, *args) if isinstance(arg, AstInt): - return m2_expr.ExprInt(arg.value, size) + return ExprInt(arg.value, size) return None -def parse_txt(mnemo, attrib, txt, symbol_pool=None): - """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where - blocks is a list of asm_bloc and symbol_pool the associated AsmSymbolPool +def parse_txt(mnemo, attrib, txt, loc_db=None): + """Parse an assembly listing. Returns a couple (asmcfg, loc_db), where + asmcfg is an AsmCfg instance and loc_db the associated LocationDB @mnemo: architecture used @attrib: architecture attribute @txt: assembly listing - @symbol_pool: (optional) the AsmSymbolPool instance used to handle labels + @loc_db: (optional) the LocationDB instance used to handle labels of the listing """ - if symbol_pool is None: - symbol_pool = asmblock.AsmSymbolPool() + if loc_db is None: + loc_db = asmblock.LocationDB() C_NEXT = asmblock.AsmConstraint.c_next C_TO = asmblock.AsmConstraint.c_to @@ -145,7 +121,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) - label = symbol_pool.getby_name_create(label_name) + label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue # directive @@ -182,7 +158,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): for element in data_raw: element = element.strip() element_parsed = base_expr.parseString(element)[0] - element_expr = asm_ast_to_expr_with_size(element_parsed, symbol_pool, size) + element_expr = asm_ast_to_expr_with_size(element_parsed, loc_db, size) expr_list.append(element_expr) raw_data = asmblock.AsmRaw(expr_list) @@ -214,7 +190,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) - label = symbol_pool.getby_name_create(label_name) + label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue @@ -222,22 +198,19 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') - instr = mnemo.fromstring(line, symbol_pool, attrib) - - # replace orphan AsmLabel with labels from symbol_pool - replace_orphan_labels(instr, symbol_pool) + instr = mnemo.fromstring(line, loc_db, attrib) if instr.dstflow(): - instr.dstflow2label(symbol_pool) + instr.dstflow2label(loc_db) lines.append(instr) asmblock.log_asmblock.info("___pre asm oki___") - # make blocks + # make asmcfg cur_block = None state = STATE_NO_BLOC i = 0 - blocks = asmblock.AsmCFG() + asmcfg = asmblock.AsmCFG(loc_db) block_to_nlink = None delayslot = 0 while i < len(lines): @@ -256,21 +229,24 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): block_to_nlink = None i += 1 continue - elif not isinstance(line, asmblock.AsmLabel): + elif not isinstance(line, LocKey): # First line must be a label. If it's not the case, generate # it. - label = guess_next_new_label(symbol_pool) - cur_block = asmblock.AsmBlock(label, alignment=mnemo.alignment) + loc = guess_next_new_label(loc_db) + cur_block = asmblock.AsmBlock(loc, alignment=mnemo.alignment) else: cur_block = asmblock.AsmBlock(line, alignment=mnemo.alignment) i += 1 # Generate the current bloc - blocks.add_node(cur_block) + asmcfg.add_block(cur_block) state = STATE_IN_BLOC if block_to_nlink: block_to_nlink.addto( - asmblock.AsmConstraint(cur_block.label, - C_NEXT)) + asmblock.AsmConstraint( + cur_block.loc_key, + C_NEXT + ) + ) block_to_nlink = None continue @@ -287,10 +263,11 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): elif isinstance(line, asmblock.AsmRaw): cur_block.addline(line) block_to_nlink = cur_block - elif isinstance(line, asmblock.AsmLabel): + elif isinstance(line, LocKey): if block_to_nlink: cur_block.addto( - asmblock.AsmConstraint(line, C_NEXT)) + asmblock.AsmConstraint(line, C_NEXT) + ) block_to_nlink = None state = STATE_NO_BLOC continue @@ -304,8 +281,8 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if delayslot: raise RuntimeError("Cannot have breakflow in delayslot") if line.dstflow(): - for dst in line.getdstflow(symbol_pool): - if not isinstance(dst, m2_expr.ExprId): + for dst in line.getdstflow(loc_db): + if not isinstance(dst, ExprId): continue if dst in mnemo.regs.all_regs_ids: continue @@ -319,10 +296,10 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): raise RuntimeError("unknown class %s" % line.__class__) i += 1 - for block in blocks: + for block in asmcfg.blocks: # Fix multiple constraints block.fix_constraints() # Log block asmblock.log_asmblock.info(block) - return blocks, symbol_pool + return asmcfg, loc_db diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 8d6d3e07..ab1af953 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -139,14 +139,22 @@ class SemBuilder(object): return self._functions.copy() @staticmethod - def _create_labels(lbl_else=False): + def _create_labels(loc_else=False): """Return the AST standing for label creations - @lbl_else (optional): if set, create a label 'lbl_else'""" - lbl_end = "lbl_end = ExprId(ir.get_next_label(instr), ir.IRDst.size)" - out = ast.parse(lbl_end).body - out += ast.parse("lbl_if = ExprId(ir.gen_label(), ir.IRDst.size)").body - if lbl_else: - out += ast.parse("lbl_else = ExprId(ir.gen_label(), ir.IRDst.size)").body + @loc_else (optional): if set, create a label 'loc_else'""" + loc_end = "loc_end = ir.get_next_loc_key(instr)" + loc_end_expr = "loc_end_expr = ExprLoc(loc_end, ir.IRDst.size)" + out = ast.parse(loc_end).body + out += ast.parse(loc_end_expr).body + loc_if = "loc_if = ir.loc_db.add_location()" + loc_if_expr = "loc_if_expr = ExprLoc(loc_if, ir.IRDst.size)" + out += ast.parse(loc_if).body + out += ast.parse(loc_if_expr).body + if loc_else: + loc_else = "loc_else = ir.loc_db.add_location()" + loc_else_expr = "loc_else_expr = ExprLoc(loc_else, ir.IRDst.size)" + out += ast.parse(loc_else).body + out += ast.parse(loc_else_expr).body return out def _parse_body(self, body, argument_names): @@ -195,20 +203,20 @@ class SemBuilder(object): real_body.append(statement) elif isinstance(statement, ast.If): - # Create jumps : ir.IRDst = lbl_if if cond else lbl_end + # Create jumps : ir.IRDst = loc_if if cond else loc_end # if .. else .. are also handled cond = statement.test - real_body += self._create_labels(lbl_else=True) + real_body += self._create_labels(loc_else=True) - lbl_end = ast.Name(id='lbl_end', ctx=ast.Load()) - lbl_if = ast.Name(id='lbl_if', ctx=ast.Load()) - lbl_else = ast.Name(id='lbl_else', ctx=ast.Load()) \ - if statement.orelse else lbl_end + loc_end = ast.Name(id='loc_end_expr', ctx=ast.Load()) + loc_if = ast.Name(id='loc_if_expr', ctx=ast.Load()) + loc_else = ast.Name(id='loc_else_expr', ctx=ast.Load()) \ + if statement.orelse else loc_end dst = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()), args=[cond, - lbl_if, - lbl_else], + loc_if, + loc_else], keywords=[], starargs=None, kwargs=None) @@ -230,10 +238,10 @@ class SemBuilder(object): kwargs=None)) # Create the new blocks - elements = [(statement.body, 'lbl_if')] + elements = [(statement.body, 'loc_if')] if statement.orelse: - elements.append((statement.orelse, 'lbl_else')) - for content, lbl_name in elements: + elements.append((statement.orelse, 'loc_else')) + for content, loc_name in elements: sub_blocks, sub_body = self._parse_body(content, argument_names) if len(sub_blocks) > 1: @@ -242,7 +250,7 @@ class SemBuilder(object): ## Close the last block jmp_end = ast.Call(func=ast.Name(id='ExprAff', ctx=ast.Load()), - args=[IRDst, lbl_end], + args=[IRDst, loc_end], keywords=[], starargs=None, kwargs=None) @@ -261,16 +269,14 @@ class SemBuilder(object): ## Replace the block with a call to 'IRBlock' - lbl_if_name = ast.Attribute(value=ast.Name(id=lbl_name, - ctx=ast.Load()), - attr='name', ctx=ast.Load()) + loc_if_name = ast.Name(id=loc_name, ctx=ast.Load()) assignblks = ast.List(elts=[assignblk], ctx=ast.Load()) sub_blocks[-1] = ast.Call(func=ast.Name(id='IRBlock', ctx=ast.Load()), - args=[lbl_if_name, + args=[loc_if_name, assignblks], keywords=[], starargs=None, |