diff options
Diffstat (limited to 'miasm2/analysis')
| -rw-r--r-- | miasm2/analysis/binary.py | 22 | ||||
| -rw-r--r-- | miasm2/analysis/cst_propag.py | 42 | ||||
| -rw-r--r-- | miasm2/analysis/data_analysis.py | 137 | ||||
| -rw-r--r-- | miasm2/analysis/data_flow.py | 62 | ||||
| -rw-r--r-- | miasm2/analysis/debugging.py | 2 | ||||
| -rw-r--r-- | miasm2/analysis/depgraph.py | 214 | ||||
| -rw-r--r-- | miasm2/analysis/disasm_cb.py | 43 | ||||
| -rw-r--r-- | miasm2/analysis/dse.py | 81 | ||||
| -rw-r--r-- | miasm2/analysis/sandbox.py | 11 |
9 files changed, 271 insertions, 343 deletions
diff --git a/miasm2/analysis/binary.py b/miasm2/analysis/binary.py index 6073e126..16e573bb 100644 --- a/miasm2/analysis/binary.py +++ b/miasm2/analysis/binary.py @@ -1,8 +1,9 @@ import logging +import warnings from miasm2.core.bin_stream import bin_stream_str, bin_stream_elf, bin_stream_pe from miasm2.jitter.csts import PAGE_READ -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB log = logging.getLogger("binary") @@ -94,7 +95,7 @@ class Container(object): self._bin_stream = None self._entry_point = None self._arch = None - self._symbol_pool = AsmSymbolPool() + self._loc_db = LocationDB() # Launch parsing self.parse(*args, **kwargs) @@ -120,10 +121,15 @@ class Container(object): return self._arch @property - def symbol_pool(self): - "AsmSymbolPool instance preloaded with container symbols (if any)" - return self._symbol_pool + def loc_db(self): + "LocationDB instance preloaded with container symbols (if any)" + return self._loc_db + @property + def symbol_pool(self): + "[DEPRECATED API]" + warnings.warn("Deprecated API: use 'loc_db'") + return self.loc_db ## Format dependent classes class ContainerPE(Container): @@ -202,14 +208,16 @@ class ContainerELF(Container): offset = symb.value if offset == 0: continue + if not name: + continue try: - self._symbol_pool.add_label(name, offset) + self._loc_db.add_location(name, offset) except ValueError: # Two symbols points on the same offset log.warning("Same offset (%s) for %s and %s", (hex(offset), name, - self._symbol_pool.getby_offset(offset))) + self._loc_db.get_offset_location(offset))) continue diff --git a/miasm2/analysis/cst_propag.py b/miasm2/analysis/cst_propag.py index 18829627..7f74324f 100644 --- a/miasm2/analysis/cst_propag.py +++ b/miasm2/analysis/cst_propag.py @@ -17,21 +17,20 @@ class SymbExecState(SymbolicExecutionEngine): """ State manager for SymbolicExecution """ - def __init__(self, ir_arch, state): + def __init__(self, ir_arch, ircfg, state): super(SymbExecState, self).__init__(ir_arch, {}) self.set_state(state) -def add_state(ir_arch, todo, states, addr, state): +def add_state(ircfg, todo, states, addr, state): """ Add or merge the computed @state for the block at @addr. Update @todo - @ir_arch: IR instance @todo: modified block set @states: dictionnary linking a label to its entering state. @addr: address of the concidered block @state: computed state """ - addr = ir_arch.get_label(addr) + addr = ircfg.get_loc_key(addr) todo.add(addr) if addr not in states: states[addr] = state @@ -67,7 +66,8 @@ class SymbExecStateFix(SymbolicExecutionEngine): # Function used to test if an Expression is considered as a constant is_expr_cst = lambda _, ir_arch, expr: is_expr_cst(ir_arch, expr) - def __init__(self, ir_arch, state, cst_propag_link): + def __init__(self, ir_arch, ircfg, state, cst_propag_link): + self.ircfg = ircfg super(SymbExecStateFix, self).__init__(ir_arch, {}) self.set_state(state) self.cst_propag_link = cst_propag_link @@ -108,14 +108,14 @@ class SymbExecStateFix(SymbolicExecutionEngine): for arg in assignblk.instr.args: new_arg = self.propag_expr_cst(arg) links[new_arg] = arg - self.cst_propag_link[(irb.label, index)] = links + self.cst_propag_link[(irb.loc_key, index)] = links self.eval_updt_assignblk(assignblk) assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) - self.ir_arch.blocks[irb.label] = IRBlock(irb.label, assignblks) + self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, assignblks) -def compute_cst_propagation_states(ir_arch, init_addr, init_infos): +def compute_cst_propagation_states(ir_arch, ircfg, init_addr, init_infos): """ Propagate "constant expressions" in a function. The attribute "constant expression" is true if the expression is based on @@ -128,7 +128,7 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): done = set() state = SymbExecState.StateEngine(init_infos) - lbl = ir_arch.get_label(init_addr) + lbl = ircfg.get_loc_key(init_addr) todo = set([lbl]) states = {lbl: state} @@ -140,11 +140,11 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): if (lbl, state) in done: continue done.add((lbl, state)) - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec_engine = SymbExecState(ir_arch, state) - addr = symbexec_engine.run_block_at(lbl) + symbexec_engine = SymbExecState(ir_arch, ircfg, state) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) for dst in possible_values(addr): @@ -153,14 +153,16 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): LOG_CST_PROPAG.warning('Bad destination: %s', value) continue elif value.is_int(): - value = ir_arch.get_label(value) - add_state(ir_arch, todo, states, value, - symbexec_engine.get_state()) + value = ircfg.get_loc_key(value) + add_state( + ircfg, todo, states, value, + symbexec_engine.get_state() + ) return states -def propagate_cst_expr(ir_arch, addr, init_infos): +def propagate_cst_expr(ir_arch, ircfg, addr, init_infos): """ Propagate "constant expressions" in a @ir_arch. The attribute "constant expression" is true if the expression is based on @@ -172,11 +174,11 @@ def propagate_cst_expr(ir_arch, addr, init_infos): Returns a mapping between replaced Expression and their new values. """ - states = compute_cst_propagation_states(ir_arch, addr, init_infos) + states = compute_cst_propagation_states(ir_arch, ircfg, addr, init_infos) cst_propag_link = {} for lbl, state in states.iteritems(): - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec = SymbExecStateFix(ir_arch, state, cst_propag_link) - symbexec.eval_updt_irblock(ir_arch.blocks[lbl]) + symbexec = SymbExecStateFix(ir_arch, ircfg, state, cst_propag_link) + symbexec.eval_updt_irblock(ircfg.blocks[lbl]) return cst_propag_link diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index bceb0bd8..9c21fd51 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -4,12 +4,11 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine def get_node_name(label, i, n): - # n_name = "%s_%d_%s"%(label.name, i, n) n_name = (label, i, n) return n_name -def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): +def intra_block_flow_raw(ir_arch, ircfg, flow_graph, irb, in_nodes, out_nodes): """ Create data flow for an irbloc using raw IR expressions """ @@ -27,7 +26,7 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): continue for n in all_mems: - node_n_w = get_node_name(irb.label, i, n) + node_n_w = get_node_name(irb.loc_key, i, n) if not n in nodes_r: continue o_r = n.arg.get_r(mem_read=False, cst_read=True) @@ -35,7 +34,7 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irb.label, i, n_r) + node_n_r = get_node_name(irb.loc_key, i, n_r) current_nodes[n_r] = node_n_r in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) @@ -46,80 +45,40 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irb.label, i, n_r) + node_n_r = get_node_name(irb.loc_key, i, n_r) current_nodes[n_r] = node_n_r in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) - node_n_w = get_node_name(irb.label, i + 1, node_w) + node_n_w = get_node_name(irb.loc_key, i + 1, node_w) out_nodes[node_w] = node_n_w flow_graph.add_node(node_n_w) flow_graph.add_uniq_edge(node_n_r, node_n_w) -def intra_block_flow_symbexec(ir_arch, flow_graph, irb, in_nodes, out_nodes): - """ - Create data flow for an irbloc using symbolic execution - """ - current_nodes = {} - - symbols_init = dict(ir_arch.arch.regs.regs_init) - - sb = SymbolicExecutionEngine(ir_arch, dict(symbols_init)) - sb.emulbloc(irb) - # print "*"*40 - # print irb - # print sb.dump_id() - # print sb.dump_mem() - - for n_w in sb.symbols: - # print n_w - v = sb.symbols[n_w] - if n_w in symbols_init and symbols_init[n_w] == v: - continue - read_values = v.get_r(cst_read=True) - # print n_w, v, [str(x) for x in read_values] - node_n_w = get_node_name(irb.label, len(irb), n_w) - for n_r in read_values: - if n_r in current_nodes: - node_n_r = current_nodes[n_r] - else: - node_n_r = get_node_name(irb.label, 0, n_r) - current_nodes[n_r] = node_n_r - in_nodes[n_r] = node_n_r - - out_nodes[n_w] = node_n_w - flow_graph.add_uniq_edge(node_n_r, node_n_w) - - -def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): +def inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): lbl, current_nodes, exec_nodes = todo - # print 'TODO' - # print lbl - # print [(str(x[0]), str(x[1])) for x in current_nodes] current_nodes = dict(current_nodes) # link current nodes to bloc in_nodes - if not lbl in ir_arch.blocks: + if not lbl in ircfg.blocks: print "cannot find bloc!!", lbl return set() - irb = ir_arch.blocks[lbl] - # pp(('IN', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) + irb = ircfg.blocks[lbl] to_del = set() - for n_r, node_n_r in irb_in_nodes[irb.label].items(): + for n_r, node_n_r in irb_in_nodes[irb.loc_key].items(): if not n_r in current_nodes: continue - # print 'add link', current_nodes[n_r], node_n_r flow_graph.add_uniq_edge(current_nodes[n_r], node_n_r) to_del.add(n_r) # if link exec to data, all nodes depends on exec nodes if link_exec_to_data: for n_x_r in exec_nodes: - for n_r, node_n_r in irb_in_nodes[irb.label].items(): + for n_r, node_n_r in irb_in_nodes[irb.loc_key].items(): if not n_x_r in current_nodes: continue if isinstance(n_r, ExprInt): @@ -127,18 +86,16 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo flow_graph.add_uniq_edge(current_nodes[n_x_r], node_n_r) # update current nodes using bloc out_nodes - for n_w, node_n_w in irb_out_nodes[irb.label].items(): + for n_w, node_n_w in irb_out_nodes[irb.loc_key].items(): current_nodes[n_w] = node_n_w # get nodes involved in exec flow x_nodes = tuple(sorted(list(irb.dst.get_r()))) todo = set() - for lbl_dst in ir_arch.graph.successors(irb.label): + for lbl_dst in ircfg.successors(irb.loc_key): todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) - # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) - return todo @@ -150,36 +107,29 @@ def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes): while todo: lbl = todo.pop() irb = ir_arch.blocks[lbl] - for lbl_son in ir_arch.graph.successors(irb.label): + for lbl_son in ir_arch.graph.successors(irb.loc_key): if not lbl_son in ir_arch.blocks: print "cannot find bloc!!", lbl continue irb_son = ir_arch.blocks[lbl_son] - for n_r in irb_in_nodes[irb_son.label]: - if n_r in irb_out_nodes[irb.label]: + for n_r in irb_in_nodes[irb_son.loc_key]: + if n_r in irb_out_nodes[irb.loc_key]: continue if not isinstance(n_r, ExprId): continue - # print "###", n_r - # print "###", irb - # print "###", 'OUT', [str(x) for x in irb.out_nodes] - # print "###", irb_son - # print "###", 'IN', [str(x) for x in irb_son.in_nodes] - - node_n_w = irb.label, len(irb), n_r - irb_out_nodes[irb.label][n_r] = node_n_w - if not n_r in irb_in_nodes[irb.label]: - irb_in_nodes[irb.label][n_r] = irb.label, 0, n_r - node_n_r = irb_in_nodes[irb.label][n_r] - # print "###", node_n_r - for lbl_p in ir_arch.graph.predecessors(irb.label): + node_n_w = irb.loc_key, len(irb), n_r + irb_out_nodes[irb.loc_key][n_r] = node_n_w + if not n_r in irb_in_nodes[irb.loc_key]: + irb_in_nodes[irb.loc_key][n_r] = irb.loc_key, 0, n_r + node_n_r = irb_in_nodes[irb.loc_key][n_r] + for lbl_p in ir_arch.graph.predecessors(irb.loc_key): todo.add(lbl_p) flow_graph.add_uniq_edge(node_n_r, node_n_w) -def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): +def inter_block_flow(ir_arch, ircfg, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): todo = set() done = set() @@ -190,7 +140,7 @@ def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, li if state in done: continue done.add(state) - out = inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) + out = inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) todo.update(out) @@ -220,46 +170,11 @@ class symb_exec_func: b = self.ir_arch.get_block(ad) if b is None: raise ValueError("unknown bloc! %s" % ad) - """ - dead = b.dead[0] - for d in dead: - if d in variables: - del(variables[d]) - """ variables = variables.items() s = parent, ad, tuple(sorted(variables)) - """ - state_var = s[1] - if s in self.states_var_done: - print 'skip state' - return - if not ad in self.stateby_ad: - self.stateby_ad[ad] = set() - self.stateby_ad[ad].add(state_var) - - """ self.todo.add(s) - """ - if not ad in self.cpt: - self.cpt[ad] = 0 - """ - """ - def get_next_min(self): - state_by_ad = {} - for state in self.todo: - ad = state[1] - if not ad in state_by_ad: - state_by_ad[ad] = [] - state_by_ad[ad].append(state) - print "XX", [len(x) for x in state_by_ad.values()] - state_by_ad = state_by_ad.items() - state_by_ad.sort(key=lambda x:len(x[1])) - state_by_ad.reverse() - return state_by_ad.pop()[1][0] - """ - def get_next_state(self): state = self.todo.pop() return state @@ -273,16 +188,10 @@ class symb_exec_func: self.total_done += 1 print 'CPT', self.total_done while self.todo: - # if self.total_done>20: - # self.get_next_min() - # state = self.todo.pop() state = self.get_next_state() parent, ad, s = state self.states_done.add(state) self.states_var_done.add(state) - # if s in self.states_var_done: - # print "state done" - # continue sb = SymbolicExecutionEngine(self.ir_arch, dict(s)) diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index d9f61c56..9e5203a6 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -29,16 +29,16 @@ class ReachingDefinitions(dict): { (block, index): { lvalue: set((block, index)) } } """ - ir_a = None + ircfg = None - def __init__(self, ir_a): + def __init__(self, ircfg): super(ReachingDefinitions, self).__init__() - self.ir_a = ir_a + self.ircfg = ircfg self.compute() def get_definitions(self, block_lbl, assignblk_index): """Returns the dict { lvalue: set((def_block_lbl, def_index)) } - associated with self.ir_a.@block.assignblks[@assignblk_index] + associated with self.ircfg.@block.assignblks[@assignblk_index] or {} if it is not yet computed """ return self.get((block_lbl, assignblk_index), {}) @@ -48,7 +48,7 @@ class ReachingDefinitions(dict): modified = True while modified: modified = False - for block in self.ir_a.blocks.itervalues(): + for block in self.ircfg.blocks.itervalues(): modified |= self.process_block(block) def process_block(self, block): @@ -57,15 +57,15 @@ class ReachingDefinitions(dict): the assignblk in block @block. """ predecessor_state = {} - for pred_lbl in self.ir_a.graph.predecessors(block.label): - pred = self.ir_a.blocks[pred_lbl] + for pred_lbl in self.ircfg.predecessors(block.loc_key): + pred = self.ircfg.blocks[pred_lbl] for lval, definitions in self.get_definitions(pred_lbl, len(pred)).iteritems(): predecessor_state.setdefault(lval, set()).update(definitions) - modified = self.get((block.label, 0)) != predecessor_state + modified = self.get((block.loc_key, 0)) != predecessor_state if not modified: return False - self[(block.label, 0)] = predecessor_state + self[(block.loc_key, 0)] = predecessor_state for index in xrange(len(block)): modified |= self.process_assignblock(block, index) @@ -80,13 +80,13 @@ class ReachingDefinitions(dict): """ assignblk = block[assignblk_index] - defs = self.get_definitions(block.label, assignblk_index).copy() + defs = self.get_definitions(block.loc_key, assignblk_index).copy() for lval in assignblk: - defs.update({lval: set([(block.label, assignblk_index)])}) + defs.update({lval: set([(block.loc_key, assignblk_index)])}) - modified = self.get((block.label, assignblk_index + 1)) != defs + modified = self.get((block.loc_key, assignblk_index + 1)) != defs if modified: - self[(block.label, assignblk_index + 1)] = defs + self[(block.loc_key, assignblk_index + 1)] = defs return modified @@ -126,7 +126,7 @@ class DiGraphDefUse(DiGraph): # For dot display self._filter_node = None self._dot_offset = None - self._blocks = reaching_defs.ir_a.blocks + self._blocks = reaching_defs.ircfg.blocks super(DiGraphDefUse, self).__init__(*args, **kwargs) self._compute_def_use(reaching_defs, @@ -149,9 +149,9 @@ class DiGraphDefUse(DiGraph): def _compute_def_use_block(self, block, reaching_defs, deref_mem=False): for index, assignblk in enumerate(block): - assignblk_reaching_defs = reaching_defs.get_definitions(block.label, index) + assignblk_reaching_defs = reaching_defs.get_definitions(block.loc_key, index) for lval, expr in assignblk.iteritems(): - self.add_node(AssignblkNode(block.label, index, lval)) + self.add_node(AssignblkNode(block.loc_key, index, lval)) read_vars = expr.get_r(mem_read=deref_mem) if deref_mem and lval.is_mem(): @@ -159,7 +159,7 @@ class DiGraphDefUse(DiGraph): for read_var in read_vars: for reach in assignblk_reaching_defs.get(read_var, set()): self.add_data_edge(AssignblkNode(reach[0], reach[1], read_var), - AssignblkNode(block.label, index, lval)) + AssignblkNode(block.loc_key, index, lval)) def del_edge(self, src, dst): super(DiGraphDefUse, self).del_edge(src, dst) @@ -189,7 +189,7 @@ class DiGraphDefUse(DiGraph): yield self.DotCellDescription(text="", attr={}) -def dead_simp_useful_assignblks(defuse, reaching_defs): +def dead_simp_useful_assignblks(irarch, defuse, reaching_defs): """Mark useful statements using previous reach analysis and defuse Source : Kennedy, K. (1979). A survey of data flow analysis techniques. @@ -200,13 +200,13 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): PRE: compute_reach(self) """ - ir_a = reaching_defs.ir_a + ircfg = reaching_defs.ircfg useful = set() - for block_lbl, block in ir_a.blocks.iteritems(): - successors = ir_a.graph.successors(block_lbl) + for block_lbl, block in ircfg.blocks.iteritems(): + successors = ircfg.successors(block_lbl) for successor in successors: - if successor not in ir_a.blocks: + if successor not in ircfg.blocks: keep_all_definitions = True break else: @@ -217,7 +217,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): valid_definitions = reaching_defs.get_definitions(block_lbl, len(block)) for lval, definitions in valid_definitions.iteritems(): - if (lval in ir_a.get_out_regs(block) + if (lval in irarch.get_out_regs(block) or keep_all_definitions): for definition in definitions: useful.add(AssignblkNode(definition[0], definition[1], lval)) @@ -226,7 +226,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): for index, assignblk in enumerate(block): for lval, rval in assignblk.iteritems(): if (lval.is_mem() - or ir_a.IRDst == lval + or irarch.IRDst == lval or rval.is_function_call()): useful.add(AssignblkNode(block_lbl, index, lval)) @@ -235,7 +235,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): for parent in defuse.reachable_parents(node): yield parent -def dead_simp(ir_a): +def dead_simp(irarch, ircfg): """ Remove useless affectations. @@ -245,21 +245,21 @@ def dead_simp(ir_a): Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - @ir_a: IntermediateRepresentation instance + @ircfg: IntermediateRepresentation instance """ modified = False - reaching_defs = ReachingDefinitions(ir_a) + reaching_defs = ReachingDefinitions(ircfg) defuse = DiGraphDefUse(reaching_defs, deref_mem=True) - useful = set(dead_simp_useful_assignblks(defuse, reaching_defs)) - for block in ir_a.blocks.itervalues(): + useful = set(dead_simp_useful_assignblks(irarch, defuse, reaching_defs)) + for block in ircfg.blocks.itervalues(): irs = [] for idx, assignblk in enumerate(block): new_assignblk = dict(assignblk) for lval in assignblk: - if AssignblkNode(block.label, idx, lval) not in useful: + if AssignblkNode(block.loc_key, idx, lval) not in useful: del new_assignblk[lval] modified = True irs.append(AssignBlock(new_assignblk, assignblk.instr)) - ir_a.blocks[block.label] = IRBlock(block.label, irs) + ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs) return modified diff --git a/miasm2/analysis/debugging.py b/miasm2/analysis/debugging.py index fc03eb17..6b88f00a 100644 --- a/miasm2/analysis/debugging.py +++ b/miasm2/analysis/debugging.py @@ -273,7 +273,7 @@ class DebugCmd(cmd.Cmd, object): def add_breakpoints(self, bp_addr): for addr in bp_addr: - addr = int(addr, 0) + addr = int(addr, 0) good = True for i, dbg_obj in enumerate(self.dbg.bp_list): diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index f7949c88..93b3edb5 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -1,8 +1,8 @@ """Provide dependency graph""" -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprInt, ExprLoc, ExprAff from miasm2.core.graph import DiGraph -from miasm2.core.asmblock import AsmLabel, expr_is_int_or_label, expr_is_label +from miasm2.core.locationdb import LocationDB from miasm2.expression.simplifications import expr_simp from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IRBlock, AssignBlock @@ -20,23 +20,23 @@ class DependencyNode(object): """Node elements of a DependencyGraph A dependency node stands for the dependency on the @element at line number - @line_nb in the IRblock named @label, *before* the evaluation of this + @line_nb in the IRblock named @loc_key, *before* the evaluation of this line. """ - __slots__ = ["_label", "_element", "_line_nb", "_hash"] + __slots__ = ["_loc_key", "_element", "_line_nb", "_hash"] - def __init__(self, label, element, line_nb): + def __init__(self, loc_key, element, line_nb): """Create a dependency node with: - @label: AsmLabel instance + @loc_key: LocKey instance @element: Expr instance @line_nb: int """ - self._label = label + self._loc_key = loc_key self._element = element self._line_nb = line_nb self._hash = hash( - (self._label, self._element, self._line_nb)) + (self._loc_key, self._element, self._line_nb)) def __hash__(self): """Returns a hash of @self to uniquely identify @self""" @@ -46,7 +46,7 @@ class DependencyNode(object): """Returns True if @self and @depnode are equals.""" if not isinstance(depnode, self.__class__): return False - return (self.label == depnode.label and + return (self.loc_key == depnode.loc_key and self.element == depnode.element and self.line_nb == depnode.line_nb) @@ -55,13 +55,13 @@ class DependencyNode(object): if not isinstance(node, self.__class__): return cmp(self.__class__, node.__class__) - return cmp((self.label, self.element, self.line_nb), - (node.label, node.element, node.line_nb)) + return cmp((self.loc_key, self.element, self.line_nb), + (node.loc_key, node.element, node.line_nb)) def __str__(self): """Returns a string representation of DependencyNode""" return "<%s %s %s %s>" % (self.__class__.__name__, - self.label.name, self.element, + self.loc_key, self.element, self.line_nb) def __repr__(self): @@ -69,9 +69,9 @@ class DependencyNode(object): return self.__str__() @property - def label(self): + def loc_key(self): "Name of the current IRBlock" - return self._label + return self._loc_key @property def element(self): @@ -90,9 +90,9 @@ class DependencyState(object): Store intermediate depnodes states during dependencygraph analysis """ - def __init__(self, label, pending, line_nb=None): - self.label = label - self.history = [label] + def __init__(self, loc_key, pending, line_nb=None): + self.loc_key = loc_key + self.history = [loc_key] self.pending = {k: set(v) for k, v in pending.iteritems()} self.line_nb = line_nb self.links = set() @@ -101,22 +101,22 @@ class DependencyState(object): self._graph = None def __repr__(self): - return "<State: %r (%r) (%r)>" % (self.label, + return "<State: %r (%r) (%r)>" % (self.loc_key, self.pending, self.links) - def extend(self, label): + def extend(self, loc_key): """Return a copy of itself, with itself in history - @label: AsmLabel instance for the new DependencyState's label + @loc_key: LocKey instance for the new DependencyState's loc_key """ - new_state = self.__class__(label, self.pending) + new_state = self.__class__(loc_key, self.pending) new_state.links = set(self.links) - new_state.history = self.history + [label] + new_state.history = self.history + [loc_key] return new_state def get_done_state(self): """Returns immutable object representing current state""" - return (self.label, frozenset(self.links)) + return (self.loc_key, frozenset(self.links)) def as_graph(self): """Generates a Digraph of dependencies""" @@ -157,7 +157,7 @@ class DependencyState(object): @line_nb: the element's line """ - depnode = DependencyNode(self.label, element, line_nb) + depnode = DependencyNode(self.loc_key, element, line_nb) if not self.pending[element]: # Create start node self.links.add((depnode, None)) @@ -175,14 +175,14 @@ class DependencyState(object): @future_pending: the future dependencies """ - depnode = DependencyNode(self.label, element, line_nb) + depnode = DependencyNode(self.loc_key, element, line_nb) # Update pending, add link to unfollowed nodes for dependency in dependencies: if not dependency.follow: # Add non followed dependencies to the dependency graph parent = DependencyNode( - self.label, dependency.element, line_nb) + self.loc_key, dependency.element, line_nb) self.links.add((parent, depnode)) continue # Create future pending between new dependency and the current @@ -194,15 +194,15 @@ class DependencyResult(DependencyState): """Container and methods for DependencyGraph results""" - def __init__(self, ira, initial_state, state, inputs): + def __init__(self, ircfg, initial_state, state, inputs): self.initial_state = initial_state - self.label = state.label + self.loc_key = state.loc_key self.history = state.history self.pending = state.pending self.line_nb = state.line_nb self.inputs = inputs self.links = state.links - self._ira = ira + self._ircfg = ircfg # Init lazy elements self._graph = None @@ -212,7 +212,7 @@ class DependencyResult(DependencyState): def unresolved(self): """Set of nodes whose dependencies weren't found""" return set(element for element in self.pending - if element != self._ira.IRDst) + if element != self._ircfg.IRDst) @property def relevant_nodes(self): @@ -225,17 +225,17 @@ class DependencyResult(DependencyState): return output @property - def relevant_labels(self): - """List of labels containing nodes influencing inputs. + def relevant_loc_keys(self): + """List of loc_keys containing nodes influencing inputs. The history order is preserved.""" - # Get used labels - used_labels = set(depnode.label for depnode in self.relevant_nodes) + # Get used loc_keys + used_loc_keys = set(depnode.loc_key for depnode in self.relevant_nodes) # Keep history order output = [] - for label in self.history: - if label in used_labels: - output.append(label) + for loc_key in self.history: + if loc_key in used_loc_keys: + output.append(loc_key) return output @@ -255,7 +255,7 @@ class DependencyResult(DependencyState): assignblks = [] line2elements = {} for depnode in self.relevant_nodes: - if depnode.label != irb.label: + if depnode.loc_key != irb.loc_key: continue line2elements.setdefault(depnode.line_nb, set()).add(depnode.element) @@ -266,40 +266,42 @@ class DependencyResult(DependencyState): assignmnts = {} for element in elements: if element in irb[line_nb]: - # constants, label, ... are not in destination + # constants, loc_key, ... are not in destination assignmnts[element] = irb[line_nb][element] assignblks.append(AssignBlock(assignmnts)) - return IRBlock(irb.label, assignblks) + return IRBlock(irb.loc_key, assignblks) - def emul(self, ctx=None, step=False): + def emul(self, ir_arch, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of inputs nodes' elements + @ir_arch: IntermediateRepresentation instance @ctx: (optional) Initial context as dictionnary @step: (optional) Verbose execution Warning: The emulation is not sound if the inputs nodes depend on loop variant. """ # Init - ctx_init = self._ira.arch.regs.regs_init + ctx_init = {} if ctx is not None: ctx_init.update(ctx) assignblks = [] # Build a single affectation block according to history - last_index = len(self.relevant_labels) - for index, label in enumerate(reversed(self.relevant_labels), 1): - if index == last_index and label == self.initial_state.label: + last_index = len(self.relevant_loc_keys) + for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1): + if index == last_index and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None - assignblks += self.irblock_slice(self._ira.blocks[label], + assignblks += self.irblock_slice(self._ircfg.blocks[loc_key], line_nb).assignblks # Eval the block - temp_label = AsmLabel("Temp") - symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) - symb_exec.eval_updt_irblock(IRBlock(temp_label, assignblks), step=step) + loc_db = LocationDB() + temp_loc = loc_db.get_or_create_name_location("Temp") + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) + symb_exec.eval_updt_irblock(IRBlock(temp_loc, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] @@ -314,30 +316,31 @@ class DependencyResultImplicit(DependencyResult): # Z3 Solver instance _solver = None - unsat_expr = m2_expr.ExprAff(m2_expr.ExprInt(0, 1), - m2_expr.ExprInt(1, 1)) + unsat_expr = ExprAff(ExprInt(0, 1), ExprInt(1, 1)) def _gen_path_constraints(self, translator, expr, expected): """Generate path constraint from @expr. Handle special case with - generated labels + generated loc_keys """ out = [] - expected_is_label = expr_is_label(expected) + expected = self._ircfg.loc_db.canonize_to_exprloc(expected) + expected_is_loc_key = expected.is_loc() for consval in possible_values(expr): - if (expected_is_label and - consval.value != expected): + value = self._ircfg.loc_db.canonize_to_exprloc(consval.value) + if expected_is_loc_key and value != expected: continue - if (not expected_is_label and - expr_is_label(consval.value)): + if not expected_is_loc_key and value.is_loc_key(): continue conds = z3.And(*[translator.from_expr(cond.to_constraint()) for cond in consval.constraints]) - if expected != consval.value: - conds = z3.And(conds, - translator.from_expr( - m2_expr.ExprAff(consval.value, - expected))) + if expected != value: + conds = z3.And( + conds, + translator.from_expr( + ExprAff(value, + expected)) + ) out.append(conds) if out: @@ -348,35 +351,33 @@ class DependencyResultImplicit(DependencyResult): conds = translator.from_expr(self.unsat_expr) return conds - def emul(self, ctx=None, step=False): + def emul(self, ir_arch, ctx=None, step=False): # Init - ctx_init = self._ira.arch.regs.regs_init + ctx_init = {} if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() - symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") - size = self._ira.IRDst.size + size = self._ircfg.IRDst.size - for hist_nb, label in enumerate(history, 1): - if hist_nb == history_size and label == self.initial_state.label: + for hist_nb, loc_key in enumerate(history, 1): + if hist_nb == history_size and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None - irb = self.irblock_slice(self._ira.blocks[label], line_nb) + irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) # Add constraint if hist_nb < history_size: - next_label = history[hist_nb] - expected = symb_exec.eval_expr(m2_expr.ExprId(next_label, - size)) - solver.add( - self._gen_path_constraints(translator, dst, expected)) + next_loc_key = history[hist_nb] + expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size)) + solver.add(self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver @@ -412,17 +413,17 @@ class FollowExpr(object): return '%s(%r, %r)' % (self.__class__.__name__, self.follow, self.element) @staticmethod - def to_depnodes(follow_exprs, label, line): + def to_depnodes(follow_exprs, loc_key, line): """Build a set of FollowExpr(DependencyNode) from the @follow_exprs set of FollowExpr @follow_exprs: set of FollowExpr - @label: AsmLabel instance + @loc_key: LocKey instance @line: integer """ dependencies = set() for follow_expr in follow_exprs: dependencies.add(FollowExpr(follow_expr.follow, - DependencyNode(label, + DependencyNode(loc_key, follow_expr.element, line))) return dependencies @@ -446,12 +447,12 @@ class DependencyGraph(object): *explicitely* or *implicitely* involved in the equation of given element. """ - def __init__(self, ira, implicit=False, apply_simp=True, follow_mem=True, + def __init__(self, ircfg, + implicit=False, apply_simp=True, follow_mem=True, follow_call=True): - """Create a DependencyGraph linked to @ira - The IRA graph must have been computed + """Create a DependencyGraph linked to @ircfg - @ira: IRAnalysis instance + @ircfg: DiGraphIR instance @implicit: (optional) Track IRDst for each block in the resulting path Following arguments define filters used to generate dependencies @@ -460,7 +461,7 @@ class DependencyGraph(object): @follow_call: (optional) Track through "call" """ # Init - self._ira = ira + self._ircfg = ircfg self._implicit = implicit # Create callback filters. The order is relevant. @@ -470,7 +471,7 @@ class DependencyGraph(object): self._cb_follow.append(lambda exprs: self._follow_exprs(exprs, follow_mem, follow_call)) - self._cb_follow.append(self._follow_nolabel) + self._cb_follow.append(self._follow_no_loc_key) @staticmethod def _follow_simp_expr(exprs): @@ -491,11 +492,11 @@ class DependencyGraph(object): @follow: set of nodes to follow @nofollow: set of nodes not to follow """ - if isinstance(expr, m2_expr.ExprId): + if expr.is_id(): follow.add(expr) - elif isinstance(expr, m2_expr.ExprInt): + elif expr.is_int(): nofollow.add(expr) - elif isinstance(expr, m2_expr.ExprMem): + elif expr.is_mem(): follow.add(expr) return expr @@ -508,7 +509,7 @@ class DependencyGraph(object): @follow_mem: force the visit of memory sub expressions @follow_call: force the visit of call sub expressions """ - if not follow_mem and isinstance(expr, m2_expr.ExprMem): + if not follow_mem and expr.is_mem(): nofollow.add(expr) return False if not follow_call and expr.is_function_call(): @@ -530,12 +531,13 @@ class DependencyGraph(object): return follow, nofollow @staticmethod - def _follow_nolabel(exprs): - """Do not follow labels""" + def _follow_no_loc_key(exprs): + """Do not follow loc_keys""" follow = set() for expr in exprs: - if not expr_is_int_or_label(expr): - follow.add(expr) + if expr.is_int() or expr.is_loc(): + continue + follow.add(expr) return follow, set() @@ -562,7 +564,7 @@ class DependencyGraph(object): if dst not in state.pending: continue # Track IRDst in implicit mode only - if dst == self._ira.IRDst and not self._implicit: + if dst == self._ircfg.IRDst and not self._implicit: continue assert dst not in node_resolved node_resolved.add(dst) @@ -580,25 +582,25 @@ class DependencyGraph(object): """Follow dependencies tracked in @state in the current irbloc @state: instance of DependencyState""" - irb = self._ira.blocks[state.label] + irb = self._ircfg.blocks[state.loc_key] line_nb = len(irb) if state.line_nb is None else state.line_nb for cur_line_nb, assignblk in reversed(list(enumerate(irb[:line_nb]))): self._track_exprs(state, assignblk, cur_line_nb) - def get(self, label, elements, line_nb, heads): + def get(self, loc_key, elements, line_nb, heads): """Compute the dependencies of @elements at line number @line_nb in - the block named @label in the current IRA, before the execution of + the block named @loc_key in the current DiGraphIR, before the execution of this line. Dependency check stop if one of @heads is reached - @label: AsmLabel instance + @loc_key: LocKey instance @element: set of Expr instances @line_nb: int - @heads: set of AsmLabel instances + @heads: set of LocKey instances Return an iterator on DiGraph(DependencyNode) """ # Init the algorithm inputs = {element: set() for element in elements} - initial_state = DependencyState(label, inputs, line_nb) + initial_state = DependencyState(loc_key, inputs, line_nb) todo = set([initial_state]) done = set() dpResultcls = DependencyResultImplicit if self._implicit else DependencyResult @@ -611,27 +613,27 @@ class DependencyGraph(object): continue done.add(done_state) if (not state.pending or - state.label in heads or - not self._ira.graph.predecessors(state.label)): - yield dpResultcls(self._ira, initial_state, state, elements) + state.loc_key in heads or + not self._ircfg.predecessors(state.loc_key)): + yield dpResultcls(self._ircfg, initial_state, state, elements) if not state.pending: continue if self._implicit: # Force IRDst to be tracked, except in the input block - state.pending[self._ira.IRDst] = set() + state.pending[self._ircfg.IRDst] = set() # Propagate state to parents - for pred in self._ira.graph.predecessors_iter(state.label): + for pred in self._ircfg.predecessors_iter(state.loc_key): todo.add(state.extend(pred)) def get_from_depnodes(self, depnodes, heads): """Alias for the get() method. Use the attributes of @depnodes as argument. - PRE: Labels and lines of depnodes have to be equals + PRE: Loc_Keys and lines of depnodes have to be equals @depnodes: set of DependencyNode instances - @heads: set of AsmLabel instances + @heads: set of LocKey instances """ lead = list(depnodes)[0] elements = set(depnode.element for depnode in depnodes) - return self.get(lead.label, elements, lead.line_nb, heads) + return self.get(lead.loc_key, elements, lead.line_nb, heads) diff --git a/miasm2/analysis/disasm_cb.py b/miasm2/analysis/disasm_cb.py index e759e313..d3278cb4 100644 --- a/miasm2/analysis/disasm_cb.py +++ b/miasm2/analysis/disasm_cb.py @@ -2,10 +2,9 @@ from miasm2.expression.expression import ExprInt, ExprId, ExprMem, match_expr from miasm2.expression.simplifications import expr_simp -from miasm2.core.asmblock \ - import AsmSymbolPool, AsmConstraintNext, AsmConstraintTo +from miasm2.core.asmblock import AsmConstraintNext, AsmConstraintTo +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import upck32 -# from miasm2.core.graph import DiGraph def get_ira(mnemo, attrib): @@ -22,21 +21,19 @@ def get_ira(mnemo, attrib): def arm_guess_subcall( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): ira = get_ira(mnemo, attrib) - sp = AsmSymbolPool() + sp = LocationDB() ir_arch = ira(sp) + ircfg = ira.new_ircfg() print '###' print cur_bloc - ir_arch.add_block(cur_bloc) + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - ir_blocks = ir_arch.blocks.values() - # flow_graph = DiGraph() + ir_blocks = ircfg.blocks.values() to_add = set() for irblock in ir_blocks: - # print 'X'*40 - # print irblock pc_val = None lr_val = None for exprs in irblock: @@ -53,43 +50,35 @@ def arm_guess_subcall( l = cur_bloc.lines[-1] if lr_val.arg != l.offset + l.l: continue - # print 'IS CALL!' - l = symbol_pool.getby_offset_create(int(lr_val)) + l = loc_db.get_or_create_offset_location(int(lr_val)) c = AsmConstraintNext(l) to_add.add(c) offsets_to_dis.add(int(lr_val)) - # if to_add: - # print 'R'*70 for c in to_add: - # print c cur_bloc.addto(c) def arm_guess_jump_table( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): ira = get_ira(mnemo, attrib) jra = ExprId('jra') jrb = ExprId('jrb') - sp = AsmSymbolPool() + sp = LocationDB() ir_arch = ira(sp) - ir_arch.add_block(cur_bloc) + ircfg = ira.new_ircfg() + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - ir_blocks = ir_arch.blocks.values() + ir_blocks = ircfg.blocks.values() for irblock in ir_blocks: - # print 'X'*40 - # print irblock pc_val = None - # lr_val = None for exprs in irblock: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src - # if e.dst == mnemo.regs.LR: - # lr_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): @@ -124,7 +113,7 @@ def arm_guess_jump_table( for ad in addrs: offsets_to_dis.add(ad) - l = symbol_pool.getby_offset_create(ad) + l = loc_db.get_or_create_offset_location(ad) c = AsmConstraintTo(l) cur_bloc.addto(c) @@ -132,6 +121,6 @@ guess_funcs = [] def guess_multi_cb( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): for f in guess_funcs: - f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) + f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db) diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 427a8bd0..0c01610f 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -56,15 +56,14 @@ except ImportError: z3 = None from miasm2.expression.expression import ExprMem, ExprInt, ExprCompose, \ - ExprAff, ExprId + ExprAff, ExprId, ExprLoc, LocKey from miasm2.core.bin_stream import bin_stream_vm -from miasm2.core.asmblock import expr_is_label from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec from miasm2.expression.expression_helper import possible_values from miasm2.ir.translators import Translator from miasm2.analysis.expression_range import expr_range from miasm2.analysis.modularintervals import ModularIntervals - +from miasm2.core.locationdb import LocationDB DriftInfo = namedtuple("DriftInfo", ["symbol", "computed", "expected"]) @@ -72,7 +71,7 @@ class DriftException(Exception): """Raised when the emulation drift from the reference engine""" def __init__(self, info): - super(Exception, self).__init__() + super(DriftException, self).__init__() self.info = info def __str__(self): @@ -150,10 +149,12 @@ class DSEEngine(object): def __init__(self, machine): self.machine = machine + self.loc_db = LocationDB() self.handler = {} # addr -> callback(DSEEngine instance) self.instrumentation = {} # addr -> callback(DSEEngine instance) self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock} - self.ir_arch = self.machine.ir() # corresponding IR + self.ir_arch = self.machine.ir(loc_db=self.loc_db) # corresponding IR + self.ircfg = self.ir_arch.new_ircfg() # corresponding IR # Defined after attachment self.jitter = None # Jitload (concrete execution) @@ -165,20 +166,24 @@ class DSEEngine(object): """Prepare the environment for attachment with a jitter""" # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), - lines_wd=1) + lines_wd=1, + loc_db=self.loc_db) # Symbexec engine ## Prepare symbexec engines self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm, self.ir_arch, {}) self.symb.enable_emulated_simplifications() - self.symb_concrete = EmulatedSymbExec(self.jitter.cpu, self.jitter.vm, - self.ir_arch, {}) + self.symb_concrete = EmulatedSymbExec( + self.jitter.cpu, self.jitter.vm, + self.ir_arch, {} + ) ## Update registers value - self.symb.symbols[self.ir_arch.IRDst] = ExprInt(getattr(self.jitter.cpu, - self.ir_arch.pc.name), - self.ir_arch.IRDst.size) + self.symb.symbols[self.ir_arch.IRDst] = ExprInt( + getattr(self.jitter.cpu, self.ir_arch.pc.name), + self.ir_arch.IRDst.size + ) # Avoid memory write self.symb.func_write = None @@ -188,7 +193,7 @@ class DSEEngine(object): self.jitter.exec_cb = self.callback # Clean jit cache to avoid multi-line basic blocks already jitted - self.jitter.jit.lbl2jitbloc.clear() + self.jitter.jit.clear_jitted_blocks() def attach(self, emulator): """Attach the DSE to @emulator @@ -215,9 +220,9 @@ class DSEEngine(object): self.prepare() def handle(self, cur_addr): - """Handle destination + r"""Handle destination @cur_addr: Expr of the next address in concrete execution - /!\ cur_addr may be a lbl_gen + /!\ cur_addr may be a loc_key In this method, self.symb is in the "just before branching" state """ @@ -295,6 +300,9 @@ class DSEEngine(object): # Call callbacks associated to the current address cur_addr = self.jitter.pc + if isinstance(cur_addr, LocKey): + lbl = self.ir_arch.loc_db.loc_key_to_label(cur_addr) + cur_addr = lbl.offset if cur_addr in self.handler: self.handler[cur_addr](self) @@ -312,24 +320,24 @@ class DSEEngine(object): # Get IR blocks if cur_addr in self.addr_to_cacheblocks: - self.ir_arch.blocks.clear() - self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr]) + self.ircfg.blocks.clear() + self.ircfg.blocks.update(self.addr_to_cacheblocks[cur_addr]) else: ## Reset cache structures - self.ir_arch.blocks.clear()# = {} + self.ircfg.blocks.clear()# = {} ## Update current state asm_block = self.mdis.dis_block(cur_addr) - self.ir_arch.add_block(asm_block) - self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) + self.ir_arch.add_asmblock_to_ircfg(asm_block, self.ircfg) + self.addr_to_cacheblocks[cur_addr] = dict(self.ircfg.blocks) # Emulate the current instruction self.symb.reset_modified() # Is the symbolic execution going (potentially) to jump on a lbl_gen? - if len(self.ir_arch.blocks) == 1: - next_addr = self.symb.run_at(cur_addr) + if len(self.ircfg.blocks) == 1: + self.symb.run_at(self.ircfg, cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain @@ -339,11 +347,16 @@ class DSEEngine(object): # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete) while True: - next_addr_concrete = self.symb_concrete.run_block_at(cur_addr) - self.symb.run_block_at(cur_addr) - if not(expr_is_label(next_addr_concrete) and - next_addr_concrete.name.offset is None): + next_addr_concrete = self.symb_concrete.run_block_at( + self.ircfg, cur_addr + ) + self.symb.run_block_at(self.ircfg, cur_addr) + + if not (isinstance(next_addr_concrete, ExprLoc) and + self.ir_arch.loc_db.get_location_offset( + next_addr_concrete.loc_key + ) is None): # Not a lbl_gen, exit break @@ -351,6 +364,7 @@ class DSEEngine(object): self.handle(next_addr_concrete) cur_addr = next_addr_concrete + # At this stage, symbolic engine is one instruction after the concrete # engine @@ -428,7 +442,7 @@ class DSEEngine(object): symbexec.symbols[reg] = value def update_state_from_concrete(self, cpu=True, mem=False): - """Update the symbolic state with concrete values from the concrete + r"""Update the symbolic state with concrete values from the concrete engine @cpu: (optional) if set, update registers' value @@ -596,13 +610,19 @@ class DSEPathConstraint(DSEEngine): self.cur_solver.add(self.z3_trans.from_expr(cons)) def handle(self, cur_addr): + cur_addr = self.ir_arch.loc_db.canonize_to_exprloc(cur_addr) symb_pc = self.eval_expr(self.ir_arch.IRDst) possibilities = possible_values(symb_pc) cur_path_constraint = set() # path_constraint for the concrete path if len(possibilities) == 1: - assert next(iter(possibilities)).value == cur_addr + dst = next(iter(possibilities)).value + dst = self.ir_arch.loc_db.canonize_to_exprloc(dst) + assert dst == cur_addr else: for possibility in possibilities: + target_addr = self.ir_arch.loc_db.canonize_to_exprloc( + possibility.value + ) path_constraint = set() # Set of ExprAff for the possible path # Get constraint associated to the possible path @@ -642,11 +662,11 @@ class DSEPathConstraint(DSEEngine): "address 0x%x" % address) path_constraint.add(ExprAff(expr_mem, value)) - if possibility.value == cur_addr: + if target_addr == cur_addr: # Add path constraint cur_path_constraint = path_constraint - elif self.produce_solution(possibility.value): + elif self.produce_solution(target_addr): # Looking for a new solution self.cur_solver.push() for cons in path_constraint: @@ -657,8 +677,7 @@ class DSEPathConstraint(DSEEngine): result = self.cur_solver.check() if result == z3.sat: model = self.cur_solver.model() - self.handle_solution(model, possibility.value) + self.handle_solution(model, target_addr) self.cur_solver.pop() self.handle_correct_destination(cur_addr, cur_path_constraint) - diff --git a/miasm2/analysis/sandbox.py b/miasm2/analysis/sandbox.py index e77b1669..b1147adb 100644 --- a/miasm2/analysis/sandbox.py +++ b/miasm2/analysis/sandbox.py @@ -57,16 +57,15 @@ class Sandbox(object): cls.__init__(self, **kwargs) # Logging options - if self.options.singlestep: - self.jitter.jit.log_mn = True - self.jitter.jit.log_regs = True + self.jitter.set_trace_log( + trace_instr=self.options.singlestep, + trace_regs=self.options.singlestep, + trace_new_blocks=self.options.dumpblocs + ) if not self.options.quiet_function_calls: log_func.setLevel(logging.INFO) - if self.options.dumpblocs: - self.jitter.jit.log_newbloc = True - @classmethod def parser(cls, *args, **kwargs): """ |