diff options
| author | serpilliere <serpilliere@users.noreply.github.com> | 2018-07-03 14:34:41 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-07-03 14:34:41 +0200 |
| commit | c0afde4d7c8ff51eaa31e4a074b9e06f080b3169 (patch) | |
| tree | 967c5856dcf37980cf80a2ea48cc7ee0e008e1bd /miasm2/core | |
| parent | c7ca6a23768178dd49c8fe97d7c7b1e0be02cd2e (diff) | |
| parent | d314460a5a19be1f3334baedf0105d9b72fc8620 (diff) | |
| download | miasm-c0afde4d7c8ff51eaa31e4a074b9e06f080b3169.tar.gz miasm-c0afde4d7c8ff51eaa31e4a074b9e06f080b3169.zip | |
Merge pull request #787 from commial/feature/locationdb
LocationDB
Diffstat (limited to '')
| -rw-r--r-- | miasm2/core/asmblock.py | 404 | ||||
| -rw-r--r-- | miasm2/core/cpu.py | 40 | ||||
| -rw-r--r-- | miasm2/core/locationdb.py | 453 | ||||
| -rw-r--r-- | miasm2/core/parse_asm.py | 42 | ||||
| -rw-r--r-- | miasm2/core/sembuilder.py | 4 |
5 files changed, 593 insertions, 350 deletions
diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index 08ff25e9..c8af4056 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -12,6 +12,7 @@ from miasm2.expression.modint import moduint, modint from miasm2.core.utils import Disasm_Exception, pck from miasm2.core.graph import DiGraph, DiGraphSimplifier, MatchGraphJoker from miasm2.core.interval import interval +from miasm2.core.locationdb import LocationDB log_asmblock = logging.getLogger("asmblock") @@ -35,7 +36,7 @@ class AsmRaw(object): def __str__(self): return repr(self.raw) - def to_string(self, symbol_pool): + def to_string(self, loc_db): return str(self) @@ -67,13 +68,13 @@ class AsmConstraint(object): label = property(get_label, set_label) - def to_string(self, symbol_pool=None): - if symbol_pool is None: + def to_string(self, loc_db=None): + if loc_db is None: return "%s:%s" % (self.c_t, self.loc_key) else: return "%s:%s" % ( self.c_t, - symbol_pool.str_loc_key(self.loc_key) + loc_db.pretty_str(self.loc_key) ) def __str__(self): @@ -136,22 +137,22 @@ class AsmBlock(object): label = property(get_label) - def to_string(self, symbol_pool=None): + def to_string(self, loc_db=None): out = [] - if symbol_pool is None: + if loc_db is None: out.append(str(self.loc_key)) else: - out.append(symbol_pool.str_loc_key(self.loc_key)) + out.append(loc_db.pretty_str(self.loc_key)) for instr in self.lines: - out.append(instr.to_string(symbol_pool)) + out.append(instr.to_string(loc_db)) if self.bto: lbls = ["->"] for dst in self.bto: if dst is None: lbls.append("Unknown? ") else: - lbls.append(dst.to_string(symbol_pool) + " ") + lbls.append(dst.to_string(loc_db) + " ") lbls = '\t'.join(lbls) out.append(lbls) return '\n'.join(out) @@ -166,12 +167,12 @@ class AsmBlock(object): assert isinstance(self.bto, set) self.bto.add(c) - def split(self, symbol_pool, offset): - loc_key = symbol_pool.getby_offset_create(offset) + def split(self, loc_db, offset): + loc_key = loc_db.get_or_create_offset_location(offset) log_asmblock.debug('split at %x', offset) i = -1 offsets = [x.offset for x in self.lines] - offset = symbol_pool.loc_key_to_offset(loc_key) + offset = loc_db.get_location_offset(loc_key) if offset not in offsets: log_asmblock.warning( 'cannot split bloc at %X ' % offset + @@ -344,235 +345,17 @@ class asm_block_bad(AsmBlockBad): warnings.warn('DEPRECATION WARNING: use "AsmBlockBad" instead of "asm_block_bad"') super(asm_block_bad, self).__init__(loc_key, alignment, *args, **kwargs) +class AsmSymbolPool(LocationDB): + """[DEPRECATED API] use 'LocationDB' instead""" -class AsmSymbolPool(object): - """ - Store symbols. - - A symbol links a name to an (optional) offset - - Rules and limitations: - - two different symbols cannot have the same offset - - two different symbols cannot have the same name - - symbols manipulation (comparison, creation ...) can only be done on - symbols generated by the same symbol pool - """ - - def __init__(self): - self._loc_keys = set() - - self._loc_key_to_offset = {} - self._loc_key_to_name = {} - - self._name_to_loc_key = {} - self._offset_to_loc_key = {} - - self._loc_key_num = 0 - - def loc_key_to_offset(self, loc_key): - """ - Return offset of @loc_key, None otherwise. - @loc_key: LocKey instance - """ - assert isinstance(loc_key, LocKey) - return self._loc_key_to_offset.get(loc_key) - - def loc_key_to_name(self, loc_key): - """ - Return name of @loc_key. - @loc_key: LocKey instance - """ - assert isinstance(loc_key, LocKey) - return self._loc_key_to_name[loc_key] - - def add_location(self, name, offset=None): - """ - Create and add a location to the symbol_pool - @name: loc_key's name (never empty). If offset is None and name is int, - generate loc_key with generic name and name as offset - @offset: (optional) loc_key's offset - """ - - if is_int(name): - assert offset is None or offset == name - offset = name - name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) - if offset is not None: - offset = int(offset) - - assert name - - # Test for collisions - known_loc_key = self.getby_name(name) - if known_loc_key is not None: - known_offset = self.loc_key_to_offset(known_loc_key) - if known_offset != offset: - raise ValueError( - 'symbol %s with different offset %s %s' % ( - name, offset, known_offset - ) - ) - return known_loc_key - - elif self.getby_offset(offset) is not None: - raise ValueError( - 'offset %s with different names %s' % ( - offset, - name - ) - ) - - loc_key = LocKey(self._loc_key_num) - self._loc_key_num += 1 - - self._loc_keys.add(loc_key) - - if offset is not None: - assert offset not in self._offset_to_loc_key - self._offset_to_loc_key[offset] = loc_key - self._loc_key_to_offset[loc_key] = offset - - self._name_to_loc_key[name] = loc_key - self._loc_key_to_name[loc_key] = name - return loc_key - - def remove_loc_key(self, loc_key): - """ - Delete a @loc_key - """ - name = self._loc_key_to_name.pop(loc_key, None) - self._name_to_loc_key.pop(name, None) - - offset = self._loc_key_to_offset.pop(loc_key, None) - self._offset_to_loc_key.pop(offset, None) - - self._loc_keys.remove(loc_key) - - def del_loc_key_offset(self, loc_key): - """Unpin the @loc_key from its offset""" - offset = self._loc_keys_to_offset.pop(loc_key) - self._offset_to_loc_key.pop(offset, None) - - def getby_offset(self, offset): - """ - Retrieve loc_key using its @offset, None otherwise. - @offset: int - """ - return self._offset_to_loc_key.get(offset) - - def getby_name(self, name): - """ - Retrieve loc_key using its @name, None otherwise. - @name: str - """ - return self._name_to_loc_key.get(name) - - def getby_name_create(self, name): - """Get a loc_key from its @name, create it if it doesn't exist""" - loc_key = self.getby_name(name) - if loc_key is None: - loc_key = self.add_location(name) - return loc_key - - def getby_offset_create(self, offset): - """Get a loc_key from its @offset, create it if it doesn't exist""" - loc_key = self.getby_offset(offset) - if loc_key is None: - loc_key = self.add_location(offset) - return loc_key - - def rename_location(self, loc_key, newname): - """Rename the @loc_key name to @newname""" - if newname in self._name_to_loc_key: - raise ValueError('Symbol already known') - name = self._loc_key_to_name[loc_key] - assert name is not None - self._name_to_loc_key.pop(name) - self._loc_key_to_name[loc_key] = newname - - def set_offset(self, loc_key, offset): - """Pin the @loc_key to an @offset - Note that there is a special case when the offset is a list - it happens when offsets are recomputed in resolve_symbol* - """ - assert isinstance(loc_key, LocKey) - assert offset not in self._offset_to_loc_key - if loc_key not in self._loc_keys: - raise ValueError('Foreign loc_key %s' % loc_key) - - old_offset = self._loc_key_to_offset.pop(loc_key, None) - self._offset_to_loc_key.pop(old_offset, None) - - self._loc_key_to_offset[loc_key] = offset - self._offset_to_loc_key[offset] = loc_key - - @property - def loc_keys(self): - """Return all loc_keys""" - return self._loc_keys - - @property - def items(self): - """Return all loc_keys""" - warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"') - return list(self._loc_keys) - - def __str__(self): - return "".join("%s\n" % loc_key for loc_key in self._loc_keys) - - def __getitem__(self, item): - warnings.warn('DEPRECATION WARNING: use "getby_name" or "getby_offset"') - if item in self._name_to_loc_key: - return self._name_to_loc_key[item] - if item in self._offset_to_loc_key: - return self._offset_to_loc_key[item] - raise KeyError('unknown symbol %r' % item) - - def __contains__(self, item): - warnings.warn('DEPRECATION WARNING: use "getby_name" or "getby_offset"') - return item in self._name_to_loc_key or item in self._offset_to_loc_key - - def merge(self, symbol_pool): - """Merge with another @symbol_pool""" - self._loc_keys.update(symbol_pool.loc_keys) - self._name_to_loc_key.update(symbol_pool._name_to_loc_key) - self._offset_to_loc_key.update(symbol_pool._offset_to_loc_key) - - def canonize_to_exprloc(self, expr): - """ - If expr is ExprInt, return ExprLoc with corresponding loc_key - Else, return expr - - @expr: Expr instance - """ - if expr.is_int(): - loc_key = self.getby_offset_create(int(expr)) - ret = ExprLoc(loc_key, expr.size) - return ret - return expr - - def gen_loc_key(self): - """Generate a new unpinned loc_key""" - loc_key = self.add_location("lbl_gen_%.8X" % (self._loc_key_num)) - return loc_key - - def str_loc_key(self, loc_key): - name = self.loc_key_to_name(loc_key) - offset = self.loc_key_to_offset(loc_key) - if name is None: - name = str(loc_key) - if offset is not None: - offset = hex(offset) - out = name - if offset is not None: - out = "%s:%s" % (out, offset) - return out - + def __init__(self, *args, **kwargs): + warnings.warn("Deprecated API, use 'LocationDB' instead") + super(AsmSymbolPool, self).__init__(*args, **kwargs) class asm_symbol_pool(AsmSymbolPool): def __init__(self): - warnings.warn('DEPRECATION WARNING: use "AsmSymbolPool" instead of "asm_symbol_pool"') + warnings.warn('DEPRECATION WARNING: use "LocationDB" instead of "asm_symbol_pool"') super(asm_symbol_pool, self).__init__() @@ -593,7 +376,7 @@ class AsmCFG(DiGraph): AsmCFGPending = namedtuple("AsmCFGPending", ["waiter", "constraint"]) - def __init__(self, symbol_pool=None, *args, **kwargs): + def __init__(self, loc_db=None, *args, **kwargs): super(AsmCFG, self).__init__(*args, **kwargs) # Edges -> constraint self.edges2constraint = {} @@ -601,13 +384,13 @@ class AsmCFG(DiGraph): self._pendings = {} # Loc_Key2block built on the fly self._loc_key_to_block = {} - # symbol_pool - self.symbol_pool = symbol_pool + # loc_db + self.loc_db = loc_db def copy(self): """Copy the current graph instance""" - graph = self.__class__(self.symbol_pool) + graph = self.__class__(self.loc_db) return graph + self @@ -754,10 +537,10 @@ class AsmCFG(DiGraph): def node2lines(self, node): - if self.symbol_pool is None: + if self.loc_db is None: loc_key_name = str(node) else: - loc_key_name = self.symbol_pool.str_loc_key(node) + loc_key_name = self.loc_db.pretty_str(node) yield self.DotCellDescription(text=loc_key_name, attr={'align': 'center', 'colspan': 2, @@ -778,9 +561,9 @@ class AsmCFG(DiGraph): if self._dot_offset: yield [self.DotCellDescription(text="%.8X" % line.offset, attr={}), - self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={})] + self.DotCellDescription(text=line.to_string(self.loc_db), attr={})] else: - yield self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={}) + yield self.DotCellDescription(text=line.to_string(self.loc_db), attr={}) def node_attr(self, node): block = self._loc_key_to_block.get(node, None) @@ -979,13 +762,13 @@ class AsmCFG(DiGraph): block.max_size = size log_asmblock.info("size: %d max: %d", block.size, block.max_size) - def apply_splitting(self, symbol_pool, dis_block_callback=None, **kwargs): + def apply_splitting(self, loc_db, dis_block_callback=None, **kwargs): """Consider @self' bto destinations and split block in @self if one of these destinations jumps in the middle of this block. In order to work, they must be only one block in @self per loc_key in - @symbol_pool (which is true if @self come from the same disasmEngine). + @loc_db (which is true if @self come from the same disasmEngine). - @symbol_pool: AsmSymbolPool instance associated with @self'loc_keys + @loc_db: LocationDB instance associated with @self'loc_keys @dis_block_callback: (optional) if set, this callback will be called on new block destinations @kwargs: (optional) named arguments to pass to dis_block_callback @@ -994,7 +777,7 @@ class AsmCFG(DiGraph): # offset block_dst = [] for loc_key in self.pendings: - offset = symbol_pool.loc_key_to_offset(loc_key) + offset = loc_db.get_location_offset(loc_key) if offset is not None: block_dst.append(offset) @@ -1010,8 +793,9 @@ class AsmCFG(DiGraph): if not (off > range_start and off < range_stop): continue - # `cur_block` must be splitted at offset `off` - new_b = cur_block.split(symbol_pool, off) + # `cur_block` must be splitted at offset `off`from miasm2.core.locationdb import LocationDB + + new_b = cur_block.split(loc_db, off) log_asmblock.debug("Split block %x", off) if new_b is None: log_asmblock.error("Cannot split %x!!", off) @@ -1028,12 +812,12 @@ class AsmCFG(DiGraph): # The new block destinations may need to be disassembled if dis_block_callback: offsets_to_dis = set( - self.symbol_pool.loc_key_to_offset(constraint.loc_key) + self.loc_db.get_location_offset(constraint.loc_key) for constraint in new_b.bto ) dis_block_callback(cur_bloc=new_b, offsets_to_dis=offsets_to_dis, - symbol_pool=symbol_pool, **kwargs) + loc_db=loc_db, **kwargs) # Update structure rebuild_needed = True @@ -1135,8 +919,8 @@ def fix_expr_val(expr, symbols): # Example: # toto: # .dword label - loc_key = symbols.getby_name(e.name) - offset = symbols.loc_key_to_offset(loc_key) + loc_key = symbols.get_name_location(e.name) + offset = symbols.get_location_offset(loc_key) e = ExprInt(offset, e.size) return e result = expr.visit(expr_calc) @@ -1146,16 +930,16 @@ def fix_expr_val(expr, symbols): return result -def fix_loc_offset(symbol_pool, loc_key, offset, modified): +def fix_loc_offset(loc_db, loc_key, offset, modified): """ Fix the @loc_key offset to @offset. If the @offset has changed, add @loc_key to @modified - @symbol_pool: current symbol_pool + @loc_db: current loc_db """ - loc_offset = symbol_pool.loc_key_to_offset(loc_key) + loc_offset = loc_db.get_location_offset(loc_key) if loc_offset == offset: return - symbol_pool.set_offset(loc_key, offset) + loc_db.set_location_offset(loc_key, offset, force=True) modified.add(loc_key) @@ -1163,8 +947,8 @@ class BlockChain(object): """Manage blocks linked with an asm_constraint_next""" - def __init__(self, symbol_pool, blocks): - self.symbol_pool = symbol_pool + def __init__(self, loc_db, blocks): + self.loc_db = loc_db self.blocks = blocks self.place() @@ -1177,7 +961,7 @@ class BlockChain(object): self.pinned_block_idx = None for i, block in enumerate(self.blocks): loc_key = block.loc_key - if self.symbol_pool.loc_key_to_offset(loc_key) is not None: + if self.loc_db.get_location_offset(loc_key) is not None: if self.pinned_block_idx is not None: raise ValueError("Multiples pinned block detected") self.pinned_block_idx = i @@ -1196,7 +980,7 @@ class BlockChain(object): return loc = self.blocks[self.pinned_block_idx].loc_key - offset_base = self.symbol_pool.loc_key_to_offset(loc) + offset_base = self.loc_db.get_location_offset(loc) assert(offset_base % self.blocks[self.pinned_block_idx].alignment == 0) self.offset_min = offset_base @@ -1225,25 +1009,25 @@ class BlockChain(object): # Propagate offset to blocks before pinned block pinned_block = self.blocks[self.pinned_block_idx] - offset = self.symbol_pool.loc_key_to_offset(pinned_block.loc_key) + offset = self.loc_db.get_location_offset(pinned_block.loc_key) if offset % pinned_block.alignment != 0: raise RuntimeError('Bad alignment') for block in self.blocks[:self.pinned_block_idx - 1:-1]: new_offset = offset - block.size new_offset = new_offset - new_offset % pinned_block.alignment - fix_loc_offset(self.symbol_pool, + fix_loc_offset(self.loc_db, block.loc_key, new_offset, modified_loc_keys) # Propagate offset to blocks after pinned block - offset = self.symbol_pool.loc_key_to_offset(pinned_block.loc_key) + pinned_block.size + offset = self.loc_db.get_location_offset(pinned_block.loc_key) + pinned_block.size last_block = pinned_block for block in self.blocks[self.pinned_block_idx + 1:]: offset += (- offset) % last_block.alignment - fix_loc_offset(self.symbol_pool, + fix_loc_offset(self.loc_db, block.loc_key, offset, modified_loc_keys) @@ -1256,8 +1040,8 @@ class BlockChainWedge(object): """Stand for wedges between blocks""" - def __init__(self, symbol_pool, offset, size): - self.symbol_pool = symbol_pool + def __init__(self, loc_db, offset, size): + self.loc_db = loc_db self.offset = offset self.max_size = size self.offset_min = offset @@ -1266,12 +1050,12 @@ class BlockChainWedge(object): def merge(self, chain): """Best effort merge two block chains Return the list of resulting blockchains""" - self.symbol_pool.set_offset(chain.blocks[0].loc_key, self.offset_max) + self.loc_db.set_location_offset(chain.blocks[0].loc_key, self.offset_max) chain.place() return [self, chain] -def group_constrained_blocks(symbol_pool, asmcfg): +def group_constrained_blocks(loc_db, asmcfg): """ Return the BlockChains list built from grouped blocks in asmcfg linked by asm_constraint_next @@ -1310,7 +1094,7 @@ def group_constrained_blocks(symbol_pool, asmcfg): out_block_chains = [] for loc_key in known_block_chains: - chain = BlockChain(symbol_pool, known_block_chains[loc_key]) + chain = BlockChain(loc_db, known_block_chains[loc_key]) out_block_chains.append(chain) return out_block_chains @@ -1330,7 +1114,7 @@ def get_blockchains_address_interval(blockChains, dst_interval): return allocated_interval -def resolve_symbol(blockChains, symbol_pool, dst_interval=None): +def resolve_symbol(blockChains, loc_db, dst_interval=None): """Place @blockChains in the @dst_interval""" log_asmblock.info('resolve_symbol') @@ -1348,7 +1132,7 @@ def resolve_symbol(blockChains, symbol_pool, dst_interval=None): # Add wedge in forbidden intervals for start, stop in forbidden_interval.intervals: wedge = BlockChainWedge( - symbol_pool, offset=start, size=stop + 1 - start) + loc_db, offset=start, size=stop + 1 - start) pinned_chains.append(wedge) # Try to place bigger blockChains first @@ -1391,8 +1175,8 @@ def get_block_loc_keys(block): return symbols -def assemble_block(mnemo, block, symbol_pool, conservative=False): - """Assemble a @block using @symbol_pool +def assemble_block(mnemo, block, loc_db, conservative=False): + """Assemble a @block using @loc_db @conservative: (optional) use original bytes when possible """ offset_i = 0 @@ -1403,7 +1187,7 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): # Fix special AsmRaw data = "" for expr in instr.raw: - expr_int = fix_expr_val(expr, symbol_pool) + expr_int = fix_expr_val(expr, loc_db) data += pck[expr_int.size](expr_int.arg) instr.data = data @@ -1413,16 +1197,16 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): # Assemble an instruction saved_args = list(instr.args) - instr.offset = symbol_pool.loc_key_to_offset(block.loc_key) + offset_i + instr.offset = loc_db.get_location_offset(block.loc_key) + offset_i # Replace instruction's arguments by resolved ones - instr.args = instr.resolve_args_with_symbols(symbol_pool) + instr.args = instr.resolve_args_with_symbols(loc_db) if instr.dstflow(): instr.fixDstOffset() old_l = instr.l - cached_candidate, _ = conservative_asm(mnemo, instr, symbol_pool, + cached_candidate, _ = conservative_asm(mnemo, instr, loc_db, conservative) # Restore original arguments @@ -1436,8 +1220,8 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): offset_i += instr.l -def asmblock_final(mnemo, asmcfg, blockChains, symbol_pool, conservative=False): - """Resolve and assemble @blockChains using @symbol_pool until fixed point is +def asmblock_final(mnemo, asmcfg, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is reached""" log_asmblock.debug("asmbloc_final") @@ -1484,36 +1268,36 @@ def asmblock_final(mnemo, asmcfg, blockChains, symbol_pool, conservative=False): while blocks_to_rework: block = blocks_to_rework.pop() - assemble_block(mnemo, block, symbol_pool, conservative) + assemble_block(mnemo, block, loc_db, conservative) -def asmbloc_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): - """Resolve and assemble @blockChains using @symbol_pool until fixed point is +def asmbloc_final(mnemo, blocks, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is reached""" warnings.warn('DEPRECATION WARNING: use "asmblock_final" instead of "asmbloc_final"') - asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative) + asmblock_final(mnemo, blocks, blockChains, loc_db, conservative) -def asm_resolve_final(mnemo, asmcfg, symbol_pool, dst_interval=None): - """Resolve and assemble @asmcfg using @symbol_pool into interval +def asm_resolve_final(mnemo, asmcfg, loc_db, dst_interval=None): + """Resolve and assemble @asmcfg using @loc_db into interval @dst_interval""" asmcfg.sanity_check() asmcfg.guess_blocks_size(mnemo) - blockChains = group_constrained_blocks(symbol_pool, asmcfg) + blockChains = group_constrained_blocks(loc_db, asmcfg) resolved_blockChains = resolve_symbol( blockChains, - symbol_pool, + loc_db, dst_interval ) - asmblock_final(mnemo, asmcfg, resolved_blockChains, symbol_pool) + asmblock_final(mnemo, asmcfg, resolved_blockChains, loc_db) patches = {} output_interval = interval() for block in asmcfg.blocks: - offset = symbol_pool.loc_key_to_offset(block.loc_key) + offset = loc_db.get_location_offset(block.loc_key) for instr in block.lines: if not instr.data: # Empty line @@ -1552,7 +1336,7 @@ class disasmEngine(object): - blocs_wd: maximum number of distinct disassembled block + callback(arch, attrib, pool_bin, cur_bloc, offsets_to_dis, - symbol_pool) + loc_db) - dis_block_callback: callback after each new disassembled block """ @@ -1566,7 +1350,7 @@ class disasmEngine(object): self.arch = arch self.attrib = attrib self.bin_stream = bin_stream - self.symbol_pool = AsmSymbolPool() + self.loc_db = LocationDB() # Setup options self.dont_dis = [] @@ -1598,6 +1382,10 @@ class disasmEngine(object): warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""") self.dis_block_callback = function + @property + def symbol_pool(self): + warnings.warn("""DEPRECATION WARNING: use 'loc_db'""") + return self.loc_db # Deprecated job_done = property(get_job_done, set_job_done) @@ -1616,7 +1404,7 @@ class disasmEngine(object): delayslot_count = self.arch.delayslot offsets_to_dis = set() add_next_offset = False - loc_key = self.symbol_pool.getby_offset_create(offset) + loc_key = self.loc_db.get_or_create_offset_location(offset) cur_block = AsmBlock(loc_key) log_asmblock.debug("dis at %X", int(offset)) while not in_delayslot or delayslot_count > 0: @@ -1631,12 +1419,12 @@ class disasmEngine(object): else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - loc_key_cst = self.symbol_pool.getby_offset_create(offset) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break if lines_cpt > 0 and offset in self.split_dis: - loc_key_cst = self.symbol_pool.getby_offset_create(offset) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(offset) break @@ -1647,7 +1435,7 @@ class disasmEngine(object): break if offset in job_done: - loc_key_cst = self.symbol_pool.getby_offset_create(offset) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break @@ -1674,7 +1462,7 @@ class disasmEngine(object): else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - loc_key_cst = self.symbol_pool.getby_offset_create(off_i) + loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break @@ -1687,7 +1475,7 @@ class disasmEngine(object): else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - loc_key_cst = self.symbol_pool.getby_offset_create(off_i) + loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break @@ -1710,14 +1498,14 @@ class disasmEngine(object): if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): add_next_offset = True if instr.dstflow(): - instr.dstflow2label(self.symbol_pool) - destinations = instr.getdstflow(self.symbol_pool) + instr.dstflow2label(self.loc_db) + destinations = instr.getdstflow(self.loc_db) known_dsts = [] for dst in destinations: if not dst.is_loc(): continue loc_key = dst.loc_key - loc_key_offset = self.symbol_pool.loc_key_to_offset(loc_key) + loc_key_offset = self.loc_db.get_location_offset(loc_key) known_dsts.append(loc_key) if loc_key_offset in self.dont_dis_retcall_funcs: add_next_offset = False @@ -1729,11 +1517,11 @@ class disasmEngine(object): delayslot_count = instr.delayslot for c in cur_block.bto: - loc_key_offset = self.symbol_pool.loc_key_to_offset(c.loc_key) + loc_key_offset = self.loc_db.get_location_offset(c.loc_key) offsets_to_dis.add(loc_key_offset) if add_next_offset: - loc_key_cst = self.symbol_pool.getby_offset_create(offset) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(offset) @@ -1744,7 +1532,9 @@ class disasmEngine(object): self.dis_block_callback(mn=self.arch, attrib=self.attrib, pool_bin=self.bin_stream, cur_bloc=cur_block, offsets_to_dis=offsets_to_dis, - symbol_pool=self.symbol_pool) + loc_db=self.loc_db, + # Deprecated API + symbol_pool=self.loc_db) return cur_block, offsets_to_dis def dis_block(self, offset): @@ -1774,7 +1564,7 @@ class disasmEngine(object): log_asmblock.info("dis bloc all") job_done = set() if blocks is None: - blocks = AsmCFG(self.symbol_pool) + blocks = AsmCFG(self.loc_db) todo = [offset] bloc_cpt = 0 @@ -1792,7 +1582,7 @@ class disasmEngine(object): todo += nexts blocks.add_block(cur_block) - blocks.apply_splitting(self.symbol_pool, + blocks.apply_splitting(self.loc_db, dis_block_callback=self.dis_block_callback, mn=self.arch, attrib=self.attrib, pool_bin=self.bin_stream) diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index 80f81aff..dc6fc392 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -671,7 +671,7 @@ class bs_swapargs(bs_divert): class m_arg(object): - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -681,11 +681,11 @@ class m_arg(object): except StopIteration: return None, None arg = v[0] - expr = self.asm_ast_to_expr(arg, symbol_pool) + expr = self.asm_ast_to_expr(arg, loc_db) self.expr = expr return start, stop - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): raise NotImplementedError("Virtual") @@ -708,7 +708,7 @@ class reg_noarg(object): reg_info = None parser = None - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -718,7 +718,7 @@ class reg_noarg(object): except StopIteration: return None, None arg = v[0] - expr = self.parses_to_expr(arg, symbol_pool) + expr = self.parses_to_expr(arg, loc_db) self.expr = expr return start, stop @@ -995,13 +995,13 @@ class instruction(object): def __str__(self): return self.to_string() - def to_string(self, symbol_pool=None): + def to_string(self, loc_db=None): o = "%-10s " % self.name args = [] for i, arg in enumerate(self.args): if not isinstance(arg, m2_expr.Expr): raise ValueError('zarb arg type') - x = self.arg2str(arg, i, symbol_pool) + x = self.arg2str(arg, i, loc_db) args.append(x) o += self.gen_args(args) return o @@ -1022,18 +1022,18 @@ class instruction(object): fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key - name = symbols.loc_key_to_name(loc_key) + names = symbols.get_location_names(loc_key) # special symbols - if name == '$': + if '$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue - if name == '_': + if '_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue - if symbols.getby_name(name) is None: + if not names: raise ValueError('Unresolved symbol: %r' % exprloc) - offset = symbols.loc_key_to_offset(loc_key) + offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % name @@ -1280,7 +1280,7 @@ class cls_mn(object): return out[0] @classmethod - def fromstring(cls, text, symbol_pool, mode = None): + def fromstring(cls, text, loc_db, mode = None): global total_scans name = re.search('(\S+)', text).groups() if not name: @@ -1320,11 +1320,11 @@ class cls_mn(object): if start != 0: v, start, stop = [None], None, None if v != [None]: - v = f.asm_ast_to_expr(v[0], symbol_pool) + v = f.asm_ast_to_expr(v[0], loc_db) if v is None: v, start, stop = [None], None, None parsers[(i, start_i)][p] = v, start, stop - start, stop = f.fromstring(args_str, symbol_pool, parsers[(i, start_i)]) + start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True @@ -1529,12 +1529,12 @@ class cls_mn(object): def parse_prefix(self, v): return 0 - def set_dst_symbol(self, symbol_pool): - dst = self.getdstflow(symbol_pool) + def set_dst_symbol(self, loc_db): + dst = self.getdstflow(loc_db) args = [] for d in dst: if isinstance(d, m2_expr.ExprInt): - l = symbol_pool.getby_offset_create(int(d)) + l = loc_db.get_or_create_offset_location(int(d)) a = m2_expr.ExprId(l.name, d.size) else: @@ -1542,7 +1542,7 @@ class cls_mn(object): args.append(a) self.args_symb = args - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0].expr] @@ -1563,7 +1563,7 @@ class imm_noarg(object): return None return v - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] else: diff --git a/miasm2/core/locationdb.py b/miasm2/core/locationdb.py new file mode 100644 index 00000000..39c1c99a --- /dev/null +++ b/miasm2/core/locationdb.py @@ -0,0 +1,453 @@ +import warnings + +from miasm2.expression.expression import LocKey, ExprLoc +from miasm2.expression.modint import moduint, modint + + +def is_int(a): + return isinstance(a, (int, long, moduint, modint)) + + +class LocationDB(object): + """ + LocationDB is a "database" of information associated to location. + + An entry in a LocationDB is uniquely identified with a LocKey. + Additionnal information which can be associated with a LocKey are: + - an offset (uniq per LocationDB) + - several names (each are uniqs per LocationDB) + + As a schema: + loc_key 1 <-> 0..1 offset + 1 <-> 0..n name + + >>> loc_db = LocationDB() + # Add a location with no additionnal information + >>> loc_key1 = loc_db.add_location() + # Add a location with an offset + >>> loc_key2 = loc_db.add_location(offset=0x1234) + # Add a location with several names + >>> loc_key3 = loc_db.add_location(name="first_name") + >>> loc_db.add_location_name(loc_key3, "second_name") + # Associate an offset to an existing location + >>> loc_db.set_location_offset(loc_key3, 0x5678) + # Remove a name from an existing location + >>> loc_db.remove_location_name(loc_key3, "second_name") + + # Get back offset + >>> loc_db.get_location_offset(loc_key1) + None + >>> loc_db.get_location_offset(loc_key2) + 0x1234 + + # Display a location + >>> loc_db.pretty_str(loc_key1) + loc_key_1 + >>> loc_db.pretty_str(loc_key2) + loc_1234 + >>> loc_db.pretty_str(loc_key3) + first_name + """ + + def __init__(self): + # Known LocKeys + self._loc_keys = set() + + # Association tables + self._loc_key_to_offset = {} + self._loc_key_to_names = {} + self._name_to_loc_key = {} + self._offset_to_loc_key = {} + + # Counter for new LocKey generation + self._loc_key_num = 0 + + def get_location_offset(self, loc_key): + """ + Return the offset of @loc_key if any, None otherwise. + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return self._loc_key_to_offset.get(loc_key) + + def get_location_names(self, loc_key): + """ + Return the frozenset of names associated to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return frozenset(self._loc_key_to_names.get(loc_key, set())) + + def get_name_location(self, name): + """ + Return the LocKey of @name if any, None otherwise. + @name: target name + """ + return self._name_to_loc_key.get(name) + + def get_or_create_name_location(self, name): + """ + Return the LocKey of @name if any, create one otherwise. + @name: target name + """ + loc_key = self._name_to_loc_key.get(name) + if loc_key is not None: + return loc_key + return self.add_location(name=name) + + def get_offset_location(self, offset): + """ + Return the LocKey of @offset if any, None otherwise. + @name: target offset + """ + return self._offset_to_loc_key.get(offset) + + def get_or_create_offset_location(self, offset): + """ + Return the LocKey of @offset if any, create one otherwise. + @offset: target offset + """ + loc_key = self._offset_to_loc_key.get(offset) + if loc_key is not None: + return loc_key + return self.add_location(offset=offset) + + def add_location_name(self, loc_key, name): + """Associate a name @name to a given @loc_key + @name: str instance + @loc_key: LocKey instance + """ + assert loc_key in self._loc_keys + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + self._loc_key_to_names.setdefault(loc_key, set()).add(name) + self._name_to_loc_key[name] = loc_key + + def remove_location_name(self, loc_key, name): + """Disassociate a name @name from a given @loc_key + Fail if @name is not already associated to @loc_key + @name: str instance + @loc_key: LocKey instance + """ + assert loc_key in self._loc_keys + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is None: + raise KeyError("%r is not already associated" % name) + if already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + del self._name_to_loc_key[name] + self._loc_key_to_names[loc_key].remove(name) + + def set_location_offset(self, loc_key, offset, force=False): + """Associate the offset @offset to an LocKey @loc_key + + If @force is set, override silently. Otherwise, if an offset is already + associated to @loc_key, an error will be raised + """ + assert loc_key in self._loc_keys + already_existing_loc = self.get_offset_location(offset) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (offset, already_existing_loc)) + already_existing_off = self._loc_key_to_offset.get(loc_key) + if (already_existing_off is not None and + already_existing_off != offset): + if not force: + raise ValueError( + "%r already has an offset (0x%x). Use 'force=True'" + " for silent overriding" % ( + loc_key, already_existing_off + )) + else: + self.unset_location_offset(loc_key) + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + def unset_location_offset(self, loc_key): + """Disassociate LocKey @loc_key's offset + + Fail if there is already no offset associate with it + @loc_key: LocKey + """ + assert loc_key in self._loc_keys + already_existing_off = self._loc_key_to_offset.get(loc_key) + if already_existing_off is None: + raise ValueError("%r already has no offset" % (loc_key)) + del self._offset_to_loc_key[already_existing_off] + del self._loc_key_to_offset[loc_key] + + def consistency_check(self): + """Ensure internal structures are consistent with each others""" + assert set(self._loc_key_to_names).issubset(self._loc_keys) + assert set(self._loc_key_to_offset).issubset(self._loc_keys) + assert self._loc_key_to_offset == {v: k for k, v in self._offset_to_loc_key.iteritems()} + assert reduce( + lambda x, y:x.union(y), + self._loc_key_to_names.itervalues(), + set(), + ) == set(self._name_to_loc_key) + for name, loc_key in self._name_to_loc_key.iteritems(): + assert name in self._loc_key_to_names[loc_key] + + def add_location(self, name=None, offset=None, strict=True): + """Add a new location in the locationDB. Returns the corresponding LocKey. + If @name is set, also associate a name to this new location. + If @offset is set, also associate an offset to this new location. + + Strict mode (set by @strict, default): + If a location with @offset or @name already exists, an error will be + raised. + Otherwise: + If a location with @offset or @name already exists, the corresponding + LocKey will be returned. + """ + + # Deprecation handling + if is_int(name): + assert offset is None or offset == name + warnings.warn("Deprecated API: use 'add_location(offset=)' instead." + " An additionnal 'name=' can be provided to also " + "associate a name (there is no more default name)") + offset = name + name = None + + # Argument cleaning + offset_loc_key = None + if offset is not None: + offset = int(offset) + offset_loc_key = self.get_offset_location(offset) + + # Test for collisions + name_loc_key = None + if name is not None: + name_loc_key = self.get_name_location(name) + + if strict: + if name_loc_key is not None: + raise ValueError("An entry for %r already exists (%r), and " + "strict mode is enabled" % ( + name, name_loc_key + )) + if offset_loc_key is not None: + raise ValueError("An entry for 0x%x already exists (%r), and " + "strict mode is enabled" % ( + offset, offset_loc_key + )) + else: + # Non-strict mode + if name_loc_key is not None: + known_offset = self.get_offset_location(name_loc_key) + if known_offset != offset: + raise ValueError( + "Location with name '%s' already have an offset: 0x%x " + "(!= 0x%x)" % (name, offset, known_offset) + ) + # Name already known, same offset -> nothing to do + return name_loc_key + + elif offset_loc_key is not None: + if name is not None: + # This is an error. Check for already known name are checked above + raise ValueError( + "Location with offset 0x%x already exists." + "To add a name to this location, use the dedicated API" + "'add_location_name(%r, %r)'" % ( + offset_loc_key, + name + )) + # Offset already known, no name specified + return offset_loc_key + + # No collision, this is a brand new location + loc_key = LocKey(self._loc_key_num) + self._loc_key_num += 1 + self._loc_keys.add(loc_key) + + if offset is not None: + assert offset not in self._offset_to_loc_key + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + if name is not None: + self._name_to_loc_key[name] = loc_key + self._loc_key_to_names[loc_key] = set([name]) + + return loc_key + + def remove_location(self, loc_key): + """ + Delete the location corresponding to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + if loc_key not in self._loc_keys: + raise KeyError("Unknown loc_key %r" % loc_key) + names = self._loc_key_to_names.pop(loc_key, []) + for name in names: + del self._name_to_loc_key[name] + offset = self._loc_key_to_offset.pop(loc_key, None) + self._offset_to_loc_key.pop(offset, None) + self._loc_keys.remove(loc_key) + + def pretty_str(self, loc_key): + """Return a human readable version of @loc_key, according to information + available in this LocationDB instance""" + names = self.get_location_names(loc_key) + if names: + return ",".join(names) + offset = self.get_location_offset(loc_key) + if offset is not None: + return "loc_%x" % offset + return str(loc_key) + + @property + def loc_keys(self): + """Return all loc_keys""" + return self._loc_keys + + @property + def names(self): + """Return all known names""" + return self._name_to_loc_key.keys() + + @property + def offsets(self): + """Return all known offsets""" + return self._offset_to_loc_key.keys() + + def __str__(self): + out = [] + for loc_key in self._loc_keys: + names = self.get_location_names(loc_key) + offset = self.get_location_offset(loc_key) + out.append("%s: %s - %s" % ( + loc_key, + "0x%x" % offset if offset is not None else None, + ",".join(names) + )) + return "\n".join(out) + + def merge(self, location_db): + """Merge with another LocationDB @location_db + + WARNING: old reference to @location_db information (such as LocKeys) + must be retrieved from the updated version of this instance. The + dedicated "get_*" APIs may be used for this task + """ + # A simple merge is not doable here, because LocKey will certainly + # collides + + for foreign_loc_key in location_db.loc_keys: + foreign_names = location_db.get_location_names(foreign_loc_key) + foreign_offset = location_db.get_location_offset(foreign_loc_key) + if foreign_names: + init_name = list(foreign_names)[0] + else: + init_name = None + loc_key = self.add_location(offset=foreign_offset, name=init_name, + strict=False) + cur_names = self.get_location_names(loc_key) + for name in foreign_names: + if name not in cur_names and name != init_name: + self.add_location_name(loc_key, name=name) + + def canonize_to_exprloc(self, expr): + """ + If expr is ExprInt, return ExprLoc with corresponding loc_key + Else, return expr + + @expr: Expr instance + """ + if expr.is_int(): + loc_key = self.get_or_create_offset_location(int(expr)) + ret = ExprLoc(loc_key, expr.size) + return ret + return expr + + # Deprecated APIs + @property + def items(self): + """Return all loc_keys""" + warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"') + return list(self._loc_keys) + + def __getitem__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location"') + if item in self._name_to_loc_key: + return self._name_to_loc_key[item] + if item in self._offset_to_loc_key: + return self._offset_to_loc_key[item] + raise KeyError('unknown symbol %r' % item) + + def __contains__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location", or ".offsets" or ".names"') + return item in self._name_to_loc_key or item in self._offset_to_loc_key + + def loc_key_to_name(self, loc_key): + """[DEPRECATED API], see 'get_location_names'""" + warnings.warn("Deprecated API: use 'get_location_names'") + return sorted(self.get_location_names(loc_key))[0] + + def loc_key_to_offset(self, loc_key): + """[DEPRECATED API], see 'get_location_offset'""" + warnings.warn("Deprecated API: use 'get_location_offset'") + return self.get_location_offset(loc_key) + + def remove_loc_key(self, loc_key): + """[DEPRECATED API], see 'remove_location'""" + warnings.warn("Deprecated API: use 'remove_location'") + self.remove_location(loc_key) + + def del_loc_key_offset(self, loc_key): + """[DEPRECATED API], see 'unset_location_offset'""" + warnings.warn("Deprecated API: use 'unset_location_offset'") + self.unset_location_offset(loc_key) + + def getby_offset(self, offset): + """[DEPRECATED API], see 'get_offset_location'""" + warnings.warn("Deprecated API: use 'get_offset_location'") + return self.get_offset_location(offset) + + def getby_name(self, name): + """[DEPRECATED API], see 'get_name_location'""" + warnings.warn("Deprecated API: use 'get_name_location'") + return self.get_name_location(name) + + def getby_offset_create(self, offset): + """[DEPRECATED API], see 'get_or_create_offset_location'""" + warnings.warn("Deprecated API: use 'get_or_create_offset_location'") + return self.get_or_create_offset_location(offset) + + def getby_name_create(self, name): + """[DEPRECATED API], see 'get_or_create_name_location'""" + warnings.warn("Deprecated API: use 'get_or_create_name_location'") + return self.get_or_create_name_location(name) + + def rename_location(self, loc_key, newname): + """[DEPRECATED API], see 'add_name_location' and 'remove_location_name' + """ + warnings.warn("Deprecated API: use 'add_location_name' and " + "'remove_location_name'") + for name in self.get_location_names(loc_key): + self.remove_location_name(loc_key, name) + self.add_location_name(loc_key, name) + + def set_offset(self, loc_key, offset): + """[DEPRECATED API], see 'set_location_offset'""" + warnings.warn("Deprecated API: use 'set_location_offset'") + self.set_location_offset(loc_key, offset, force=True) + + def gen_loc_key(self): + """[DEPRECATED API], see 'add_location'""" + warnings.warn("Deprecated API: use 'add_location'") + return self.add_location() + + def str_loc_key(self, loc_key): + """[DEPRECATED API], see 'pretty_str'""" + warnings.warn("Deprecated API: use 'pretty_str'") + return self.pretty_str(loc_key) diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index 3b97cbb6..7efa17d0 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -60,16 +60,16 @@ class DirectiveDontSplit(Directive): pass -def guess_next_new_label(symbol_pool): +def guess_next_new_label(loc_db): """Generate a new label - @symbol_pool: the AsmSymbolPool instance""" + @loc_db: the LocationDB instance""" i = 0 gen_name = "loc_%.8X" while True: name = gen_name % i - label = symbol_pool.getby_name(name) + label = loc_db.get_name_location(name) if label is None: - return symbol_pool.add_location(name) + return loc_db.add_location(name) i += 1 @@ -77,30 +77,30 @@ STATE_NO_BLOC = 0 STATE_IN_BLOC = 1 -def asm_ast_to_expr_with_size(arg, symbol_pool, size): +def asm_ast_to_expr_with_size(arg, loc_db, size): if isinstance(arg, AstId): return ExprId(arg.name, size) if isinstance(arg, AstOp): - args = [asm_ast_to_expr_with_size(tmp, symbol_pool, size) for tmp in arg.args] + args = [asm_ast_to_expr_with_size(tmp, loc_db, size) for tmp in arg.args] return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, size) return None -def parse_txt(mnemo, attrib, txt, symbol_pool=None): - """Parse an assembly listing. Returns a couple (asmcfg, symbol_pool), where - asmcfg is an AsmCfg instance and symbol_pool the associated AsmSymbolPool +def parse_txt(mnemo, attrib, txt, loc_db=None): + """Parse an assembly listing. Returns a couple (asmcfg, loc_db), where + asmcfg is an AsmCfg instance and loc_db the associated LocationDB @mnemo: architecture used @attrib: architecture attribute @txt: assembly listing - @symbol_pool: (optional) the AsmSymbolPool instance used to handle labels + @loc_db: (optional) the LocationDB instance used to handle labels of the listing """ - if symbol_pool is None: - symbol_pool = asmblock.AsmSymbolPool() + if loc_db is None: + loc_db = asmblock.LocationDB() C_NEXT = asmblock.AsmConstraint.c_next C_TO = asmblock.AsmConstraint.c_to @@ -121,7 +121,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) - label = symbol_pool.getby_name_create(label_name) + label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue # directive @@ -158,7 +158,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): for element in data_raw: element = element.strip() element_parsed = base_expr.parseString(element)[0] - element_expr = asm_ast_to_expr_with_size(element_parsed, symbol_pool, size) + element_expr = asm_ast_to_expr_with_size(element_parsed, loc_db, size) expr_list.append(element_expr) raw_data = asmblock.AsmRaw(expr_list) @@ -190,7 +190,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) - label = symbol_pool.getby_name_create(label_name) + label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue @@ -198,10 +198,10 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') - instr = mnemo.fromstring(line, symbol_pool, attrib) + instr = mnemo.fromstring(line, loc_db, attrib) if instr.dstflow(): - instr.dstflow2label(symbol_pool) + instr.dstflow2label(loc_db) lines.append(instr) asmblock.log_asmblock.info("___pre asm oki___") @@ -210,7 +210,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): cur_block = None state = STATE_NO_BLOC i = 0 - asmcfg = asmblock.AsmCFG(symbol_pool) + asmcfg = asmblock.AsmCFG(loc_db) block_to_nlink = None delayslot = 0 while i < len(lines): @@ -232,7 +232,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): elif not isinstance(line, LocKey): # First line must be a label. If it's not the case, generate # it. - loc = guess_next_new_label(symbol_pool) + loc = guess_next_new_label(loc_db) cur_block = asmblock.AsmBlock(loc, alignment=mnemo.alignment) else: cur_block = asmblock.AsmBlock(line, alignment=mnemo.alignment) @@ -281,7 +281,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if delayslot: raise RuntimeError("Cannot have breakflow in delayslot") if line.dstflow(): - for dst in line.getdstflow(symbol_pool): + for dst in line.getdstflow(loc_db): if not isinstance(dst, ExprId): continue if dst in mnemo.regs.all_regs_ids: @@ -302,4 +302,4 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): # Log block asmblock.log_asmblock.info(block) - return asmcfg, symbol_pool + return asmcfg, loc_db diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 530685db..ab1af953 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -146,12 +146,12 @@ class SemBuilder(object): loc_end_expr = "loc_end_expr = ExprLoc(loc_end, ir.IRDst.size)" out = ast.parse(loc_end).body out += ast.parse(loc_end_expr).body - loc_if = "loc_if = ir.symbol_pool.gen_loc_key()" + loc_if = "loc_if = ir.loc_db.add_location()" loc_if_expr = "loc_if_expr = ExprLoc(loc_if, ir.IRDst.size)" out += ast.parse(loc_if).body out += ast.parse(loc_if_expr).body if loc_else: - loc_else = "loc_else = ir.symbol_pool.gen_loc_key()" + loc_else = "loc_else = ir.loc_db.add_location()" loc_else_expr = "loc_else_expr = ExprLoc(loc_else, ir.IRDst.size)" out += ast.parse(loc_else).body out += ast.parse(loc_else_expr).body |