diff options
Diffstat (limited to 'miasm2')
| -rw-r--r-- | miasm2/analysis/cst_propag.py | 40 | ||||
| -rw-r--r-- | miasm2/analysis/data_analysis.py | 15 | ||||
| -rw-r--r-- | miasm2/analysis/data_flow.py | 42 | ||||
| -rw-r--r-- | miasm2/analysis/depgraph.py | 49 | ||||
| -rw-r--r-- | miasm2/analysis/disasm_cb.py | 10 | ||||
| -rw-r--r-- | miasm2/analysis/dse.py | 40 | ||||
| -rw-r--r-- | miasm2/arch/arm/sem.py | 18 | ||||
| -rw-r--r-- | miasm2/arch/mips32/ira.py | 8 | ||||
| -rw-r--r-- | miasm2/arch/ppc/ira.py | 32 | ||||
| -rw-r--r-- | miasm2/ir/analysis.py | 47 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 604 | ||||
| -rw-r--r-- | miasm2/ir/symbexec.py | 21 | ||||
| -rw-r--r-- | miasm2/ir/symbexec_top.py | 1 | ||||
| -rw-r--r-- | miasm2/ir/symbexec_types.py | 6 | ||||
| -rw-r--r-- | miasm2/jitter/jitcore.py | 4 | ||||
| -rw-r--r-- | miasm2/jitter/jitcore_python.py | 9 | ||||
| -rw-r--r-- | miasm2/jitter/jitload.py | 5 |
17 files changed, 508 insertions, 443 deletions
diff --git a/miasm2/analysis/cst_propag.py b/miasm2/analysis/cst_propag.py index 4b5d7834..7f74324f 100644 --- a/miasm2/analysis/cst_propag.py +++ b/miasm2/analysis/cst_propag.py @@ -17,21 +17,20 @@ class SymbExecState(SymbolicExecutionEngine): """ State manager for SymbolicExecution """ - def __init__(self, ir_arch, state): + def __init__(self, ir_arch, ircfg, state): super(SymbExecState, self).__init__(ir_arch, {}) self.set_state(state) -def add_state(ir_arch, todo, states, addr, state): +def add_state(ircfg, todo, states, addr, state): """ Add or merge the computed @state for the block at @addr. Update @todo - @ir_arch: IR instance @todo: modified block set @states: dictionnary linking a label to its entering state. @addr: address of the concidered block @state: computed state """ - addr = ir_arch.get_loc_key(addr) + addr = ircfg.get_loc_key(addr) todo.add(addr) if addr not in states: states[addr] = state @@ -67,7 +66,8 @@ class SymbExecStateFix(SymbolicExecutionEngine): # Function used to test if an Expression is considered as a constant is_expr_cst = lambda _, ir_arch, expr: is_expr_cst(ir_arch, expr) - def __init__(self, ir_arch, state, cst_propag_link): + def __init__(self, ir_arch, ircfg, state, cst_propag_link): + self.ircfg = ircfg super(SymbExecStateFix, self).__init__(ir_arch, {}) self.set_state(state) self.cst_propag_link = cst_propag_link @@ -112,10 +112,10 @@ class SymbExecStateFix(SymbolicExecutionEngine): self.eval_updt_assignblk(assignblk) assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) - self.ir_arch.blocks[irb.loc_key] = IRBlock(irb.loc_key, assignblks) + self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, assignblks) -def compute_cst_propagation_states(ir_arch, init_addr, init_infos): +def compute_cst_propagation_states(ir_arch, ircfg, init_addr, init_infos): """ Propagate "constant expressions" in a function. The attribute "constant expression" is true if the expression is based on @@ -128,7 +128,7 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): done = set() state = SymbExecState.StateEngine(init_infos) - lbl = ir_arch.get_loc_key(init_addr) + lbl = ircfg.get_loc_key(init_addr) todo = set([lbl]) states = {lbl: state} @@ -140,11 +140,11 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): if (lbl, state) in done: continue done.add((lbl, state)) - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec_engine = SymbExecState(ir_arch, state) - addr = symbexec_engine.run_block_at(lbl) + symbexec_engine = SymbExecState(ir_arch, ircfg, state) + addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) for dst in possible_values(addr): @@ -153,14 +153,16 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): LOG_CST_PROPAG.warning('Bad destination: %s', value) continue elif value.is_int(): - value = ir_arch.get_loc_key(value) - add_state(ir_arch, todo, states, value, - symbexec_engine.get_state()) + value = ircfg.get_loc_key(value) + add_state( + ircfg, todo, states, value, + symbexec_engine.get_state() + ) return states -def propagate_cst_expr(ir_arch, addr, init_infos): +def propagate_cst_expr(ir_arch, ircfg, addr, init_infos): """ Propagate "constant expressions" in a @ir_arch. The attribute "constant expression" is true if the expression is based on @@ -172,11 +174,11 @@ def propagate_cst_expr(ir_arch, addr, init_infos): Returns a mapping between replaced Expression and their new values. """ - states = compute_cst_propagation_states(ir_arch, addr, init_infos) + states = compute_cst_propagation_states(ir_arch, ircfg, addr, init_infos) cst_propag_link = {} for lbl, state in states.iteritems(): - if lbl not in ir_arch.blocks: + if lbl not in ircfg.blocks: continue - symbexec = SymbExecStateFix(ir_arch, state, cst_propag_link) - symbexec.eval_updt_irblock(ir_arch.blocks[lbl]) + symbexec = SymbExecStateFix(ir_arch, ircfg, state, cst_propag_link) + symbexec.eval_updt_irblock(ircfg.blocks[lbl]) return cst_propag_link diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index aa1c0d1a..9c21fd51 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -8,7 +8,7 @@ def get_node_name(label, i, n): return n_name -def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): +def intra_block_flow_raw(ir_arch, ircfg, flow_graph, irb, in_nodes, out_nodes): """ Create data flow for an irbloc using raw IR expressions """ @@ -58,15 +58,16 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): flow_graph.add_uniq_edge(node_n_r, node_n_w) -def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): + +def inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): lbl, current_nodes, exec_nodes = todo current_nodes = dict(current_nodes) # link current nodes to bloc in_nodes - if not lbl in ir_arch.blocks: + if not lbl in ircfg.blocks: print "cannot find bloc!!", lbl return set() - irb = ir_arch.blocks[lbl] + irb = ircfg.blocks[lbl] to_del = set() for n_r, node_n_r in irb_in_nodes[irb.loc_key].items(): if not n_r in current_nodes: @@ -92,7 +93,7 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo x_nodes = tuple(sorted(list(irb.dst.get_r()))) todo = set() - for lbl_dst in ir_arch.graph.successors(irb.loc_key): + for lbl_dst in ircfg.successors(irb.loc_key): todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) return todo @@ -128,7 +129,7 @@ def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes): flow_graph.add_uniq_edge(node_n_r, node_n_w) -def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): +def inter_block_flow(ir_arch, ircfg, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): todo = set() done = set() @@ -139,7 +140,7 @@ def inter_block_flow(ir_arch, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, li if state in done: continue done.add(state) - out = inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) + out = inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) todo.update(out) diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index e780f70c..9e5203a6 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -29,16 +29,16 @@ class ReachingDefinitions(dict): { (block, index): { lvalue: set((block, index)) } } """ - ir_a = None + ircfg = None - def __init__(self, ir_a): + def __init__(self, ircfg): super(ReachingDefinitions, self).__init__() - self.ir_a = ir_a + self.ircfg = ircfg self.compute() def get_definitions(self, block_lbl, assignblk_index): """Returns the dict { lvalue: set((def_block_lbl, def_index)) } - associated with self.ir_a.@block.assignblks[@assignblk_index] + associated with self.ircfg.@block.assignblks[@assignblk_index] or {} if it is not yet computed """ return self.get((block_lbl, assignblk_index), {}) @@ -48,7 +48,7 @@ class ReachingDefinitions(dict): modified = True while modified: modified = False - for block in self.ir_a.blocks.itervalues(): + for block in self.ircfg.blocks.itervalues(): modified |= self.process_block(block) def process_block(self, block): @@ -57,8 +57,8 @@ class ReachingDefinitions(dict): the assignblk in block @block. """ predecessor_state = {} - for pred_lbl in self.ir_a.graph.predecessors(block.loc_key): - pred = self.ir_a.blocks[pred_lbl] + for pred_lbl in self.ircfg.predecessors(block.loc_key): + pred = self.ircfg.blocks[pred_lbl] for lval, definitions in self.get_definitions(pred_lbl, len(pred)).iteritems(): predecessor_state.setdefault(lval, set()).update(definitions) @@ -126,7 +126,7 @@ class DiGraphDefUse(DiGraph): # For dot display self._filter_node = None self._dot_offset = None - self._blocks = reaching_defs.ir_a.blocks + self._blocks = reaching_defs.ircfg.blocks super(DiGraphDefUse, self).__init__(*args, **kwargs) self._compute_def_use(reaching_defs, @@ -189,7 +189,7 @@ class DiGraphDefUse(DiGraph): yield self.DotCellDescription(text="", attr={}) -def dead_simp_useful_assignblks(defuse, reaching_defs): +def dead_simp_useful_assignblks(irarch, defuse, reaching_defs): """Mark useful statements using previous reach analysis and defuse Source : Kennedy, K. (1979). A survey of data flow analysis techniques. @@ -200,13 +200,13 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): PRE: compute_reach(self) """ - ir_a = reaching_defs.ir_a + ircfg = reaching_defs.ircfg useful = set() - for block_lbl, block in ir_a.blocks.iteritems(): - successors = ir_a.graph.successors(block_lbl) + for block_lbl, block in ircfg.blocks.iteritems(): + successors = ircfg.successors(block_lbl) for successor in successors: - if successor not in ir_a.blocks: + if successor not in ircfg.blocks: keep_all_definitions = True break else: @@ -217,7 +217,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): valid_definitions = reaching_defs.get_definitions(block_lbl, len(block)) for lval, definitions in valid_definitions.iteritems(): - if (lval in ir_a.get_out_regs(block) + if (lval in irarch.get_out_regs(block) or keep_all_definitions): for definition in definitions: useful.add(AssignblkNode(definition[0], definition[1], lval)) @@ -226,7 +226,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): for index, assignblk in enumerate(block): for lval, rval in assignblk.iteritems(): if (lval.is_mem() - or ir_a.IRDst == lval + or irarch.IRDst == lval or rval.is_function_call()): useful.add(AssignblkNode(block_lbl, index, lval)) @@ -235,7 +235,7 @@ def dead_simp_useful_assignblks(defuse, reaching_defs): for parent in defuse.reachable_parents(node): yield parent -def dead_simp(ir_a): +def dead_simp(irarch, ircfg): """ Remove useless affectations. @@ -245,14 +245,14 @@ def dead_simp(ir_a): Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 - @ir_a: IntermediateRepresentation instance + @ircfg: IntermediateRepresentation instance """ modified = False - reaching_defs = ReachingDefinitions(ir_a) + reaching_defs = ReachingDefinitions(ircfg) defuse = DiGraphDefUse(reaching_defs, deref_mem=True) - useful = set(dead_simp_useful_assignblks(defuse, reaching_defs)) - for block in ir_a.blocks.itervalues(): + useful = set(dead_simp_useful_assignblks(irarch, defuse, reaching_defs)) + for block in ircfg.blocks.itervalues(): irs = [] for idx, assignblk in enumerate(block): new_assignblk = dict(assignblk) @@ -261,5 +261,5 @@ def dead_simp(ir_a): del new_assignblk[lval] modified = True irs.append(AssignBlock(new_assignblk, assignblk.instr)) - ir_a.blocks[block.loc_key] = IRBlock(block.loc_key, irs) + ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs) return modified diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index 11476f79..93b3edb5 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -194,7 +194,7 @@ class DependencyResult(DependencyState): """Container and methods for DependencyGraph results""" - def __init__(self, ira, initial_state, state, inputs): + def __init__(self, ircfg, initial_state, state, inputs): self.initial_state = initial_state self.loc_key = state.loc_key self.history = state.history @@ -202,7 +202,7 @@ class DependencyResult(DependencyState): self.line_nb = state.line_nb self.inputs = inputs self.links = state.links - self._ira = ira + self._ircfg = ircfg # Init lazy elements self._graph = None @@ -212,7 +212,7 @@ class DependencyResult(DependencyState): def unresolved(self): """Set of nodes whose dependencies weren't found""" return set(element for element in self.pending - if element != self._ira.IRDst) + if element != self._ircfg.IRDst) @property def relevant_nodes(self): @@ -272,9 +272,10 @@ class DependencyResult(DependencyState): return IRBlock(irb.loc_key, assignblks) - def emul(self, ctx=None, step=False): + def emul(self, ir_arch, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of inputs nodes' elements + @ir_arch: IntermediateRepresentation instance @ctx: (optional) Initial context as dictionnary @step: (optional) Verbose execution Warning: The emulation is not sound if the inputs nodes depend on loop @@ -293,13 +294,13 @@ class DependencyResult(DependencyState): line_nb = self.initial_state.line_nb else: line_nb = None - assignblks += self.irblock_slice(self._ira.blocks[loc_key], + assignblks += self.irblock_slice(self._ircfg.blocks[loc_key], line_nb).assignblks # Eval the block loc_db = LocationDB() temp_loc = loc_db.get_or_create_name_location("Temp") - symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) symb_exec.eval_updt_irblock(IRBlock(temp_loc, assignblks), step=step) # Return only inputs values (others could be wrongs) @@ -322,10 +323,10 @@ class DependencyResultImplicit(DependencyResult): generated loc_keys """ out = [] - expected = self._ira.loc_db.canonize_to_exprloc(expected) + expected = self._ircfg.loc_db.canonize_to_exprloc(expected) expected_is_loc_key = expected.is_loc() for consval in possible_values(expr): - value = self._ira.loc_db.canonize_to_exprloc(consval.value) + value = self._ircfg.loc_db.canonize_to_exprloc(consval.value) if expected_is_loc_key and value != expected: continue if not expected_is_loc_key and value.is_loc_key(): @@ -350,24 +351,24 @@ class DependencyResultImplicit(DependencyResult): conds = translator.from_expr(self.unsat_expr) return conds - def emul(self, ctx=None, step=False): + def emul(self, ir_arch, ctx=None, step=False): # Init ctx_init = {} if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() - symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") - size = self._ira.IRDst.size + size = self._ircfg.IRDst.size for hist_nb, loc_key in enumerate(history, 1): if hist_nb == history_size and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None - irb = self.irblock_slice(self._ira.blocks[loc_key], line_nb) + irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) @@ -446,12 +447,12 @@ class DependencyGraph(object): *explicitely* or *implicitely* involved in the equation of given element. """ - def __init__(self, ira, implicit=False, apply_simp=True, follow_mem=True, + def __init__(self, ircfg, + implicit=False, apply_simp=True, follow_mem=True, follow_call=True): - """Create a DependencyGraph linked to @ira - The IRA graph must have been computed + """Create a DependencyGraph linked to @ircfg - @ira: IRAnalysis instance + @ircfg: DiGraphIR instance @implicit: (optional) Track IRDst for each block in the resulting path Following arguments define filters used to generate dependencies @@ -460,7 +461,7 @@ class DependencyGraph(object): @follow_call: (optional) Track through "call" """ # Init - self._ira = ira + self._ircfg = ircfg self._implicit = implicit # Create callback filters. The order is relevant. @@ -563,7 +564,7 @@ class DependencyGraph(object): if dst not in state.pending: continue # Track IRDst in implicit mode only - if dst == self._ira.IRDst and not self._implicit: + if dst == self._ircfg.IRDst and not self._implicit: continue assert dst not in node_resolved node_resolved.add(dst) @@ -581,7 +582,7 @@ class DependencyGraph(object): """Follow dependencies tracked in @state in the current irbloc @state: instance of DependencyState""" - irb = self._ira.blocks[state.loc_key] + irb = self._ircfg.blocks[state.loc_key] line_nb = len(irb) if state.line_nb is None else state.line_nb for cur_line_nb, assignblk in reversed(list(enumerate(irb[:line_nb]))): @@ -589,7 +590,7 @@ class DependencyGraph(object): def get(self, loc_key, elements, line_nb, heads): """Compute the dependencies of @elements at line number @line_nb in - the block named @loc_key in the current IRA, before the execution of + the block named @loc_key in the current DiGraphIR, before the execution of this line. Dependency check stop if one of @heads is reached @loc_key: LocKey instance @element: set of Expr instances @@ -613,17 +614,17 @@ class DependencyGraph(object): done.add(done_state) if (not state.pending or state.loc_key in heads or - not self._ira.graph.predecessors(state.loc_key)): - yield dpResultcls(self._ira, initial_state, state, elements) + not self._ircfg.predecessors(state.loc_key)): + yield dpResultcls(self._ircfg, initial_state, state, elements) if not state.pending: continue if self._implicit: # Force IRDst to be tracked, except in the input block - state.pending[self._ira.IRDst] = set() + state.pending[self._ircfg.IRDst] = set() # Propagate state to parents - for pred in self._ira.graph.predecessors_iter(state.loc_key): + for pred in self._ircfg.predecessors_iter(state.loc_key): todo.add(state.extend(pred)) def get_from_depnodes(self, depnodes, heads): diff --git a/miasm2/analysis/disasm_cb.py b/miasm2/analysis/disasm_cb.py index bb8223e8..d3278cb4 100644 --- a/miasm2/analysis/disasm_cb.py +++ b/miasm2/analysis/disasm_cb.py @@ -26,11 +26,12 @@ def arm_guess_subcall( sp = LocationDB() ir_arch = ira(sp) + ircfg = ira.new_ircfg() print '###' print cur_bloc - ir_arch.add_block(cur_bloc) + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - ir_blocks = ir_arch.blocks.values() + ir_blocks = ircfg.blocks.values() to_add = set() for irblock in ir_blocks: pc_val = None @@ -68,9 +69,10 @@ def arm_guess_jump_table( sp = LocationDB() ir_arch = ira(sp) - ir_arch.add_block(cur_bloc) + ircfg = ira.new_ircfg() + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - ir_blocks = ir_arch.blocks.values() + ir_blocks = ircfg.blocks.values() for irblock in ir_blocks: pc_val = None for exprs in irblock: diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 87d11e0a..0c01610f 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -63,6 +63,7 @@ from miasm2.expression.expression_helper import possible_values from miasm2.ir.translators import Translator from miasm2.analysis.expression_range import expr_range from miasm2.analysis.modularintervals import ModularIntervals +from miasm2.core.locationdb import LocationDB DriftInfo = namedtuple("DriftInfo", ["symbol", "computed", "expected"]) @@ -148,10 +149,12 @@ class DSEEngine(object): def __init__(self, machine): self.machine = machine + self.loc_db = LocationDB() self.handler = {} # addr -> callback(DSEEngine instance) self.instrumentation = {} # addr -> callback(DSEEngine instance) self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock} - self.ir_arch = self.machine.ir() # corresponding IR + self.ir_arch = self.machine.ir(loc_db=self.loc_db) # corresponding IR + self.ircfg = self.ir_arch.new_ircfg() # corresponding IR # Defined after attachment self.jitter = None # Jitload (concrete execution) @@ -159,8 +162,6 @@ class DSEEngine(object): self.symb_concrete = None # Concrete SymbExec for path desambiguisation self.mdis = None # DisasmEngine - self.loc_db = self.ir_arch.loc_db - def prepare(self): """Prepare the environment for attachment with a jitter""" # Disassembler @@ -173,13 +174,16 @@ class DSEEngine(object): self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm, self.ir_arch, {}) self.symb.enable_emulated_simplifications() - self.symb_concrete = EmulatedSymbExec(self.jitter.cpu, self.jitter.vm, - self.ir_arch, {}) + self.symb_concrete = EmulatedSymbExec( + self.jitter.cpu, self.jitter.vm, + self.ir_arch, {} + ) ## Update registers value - self.symb.symbols[self.ir_arch.IRDst] = ExprInt(getattr(self.jitter.cpu, - self.ir_arch.pc.name), - self.ir_arch.IRDst.size) + self.symb.symbols[self.ir_arch.IRDst] = ExprInt( + getattr(self.jitter.cpu, self.ir_arch.pc.name), + self.ir_arch.IRDst.size + ) # Avoid memory write self.symb.func_write = None @@ -316,24 +320,24 @@ class DSEEngine(object): # Get IR blocks if cur_addr in self.addr_to_cacheblocks: - self.ir_arch.blocks.clear() - self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr]) + self.ircfg.blocks.clear() + self.ircfg.blocks.update(self.addr_to_cacheblocks[cur_addr]) else: ## Reset cache structures - self.ir_arch.blocks.clear()# = {} + self.ircfg.blocks.clear()# = {} ## Update current state asm_block = self.mdis.dis_block(cur_addr) - self.ir_arch.add_block(asm_block) - self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) + self.ir_arch.add_asmblock_to_ircfg(asm_block, self.ircfg) + self.addr_to_cacheblocks[cur_addr] = dict(self.ircfg.blocks) # Emulate the current instruction self.symb.reset_modified() # Is the symbolic execution going (potentially) to jump on a lbl_gen? - if len(self.ir_arch.blocks) == 1: - self.symb.run_at(cur_addr) + if len(self.ircfg.blocks) == 1: + self.symb.run_at(self.ircfg, cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain @@ -344,8 +348,10 @@ class DSEEngine(object): self._update_state_from_concrete_symb(self.symb_concrete) while True: - next_addr_concrete = self.symb_concrete.run_block_at(cur_addr) - self.symb.run_block_at(cur_addr) + next_addr_concrete = self.symb_concrete.run_block_at( + self.ircfg, cur_addr + ) + self.symb.run_block_at(self.ircfg, cur_addr) if not (isinstance(next_addr_concrete, ExprLoc) and self.ir_arch.loc_db.get_location_offset( diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index a3d12514..00250157 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -1572,8 +1572,10 @@ class ir_arml(IntermediateRepresentation): assignments = [] loc = loc_do - split = self.add_instr_to_irblock(block, instr, assignments, - irblocks, gen_pc_updt) + split = self.add_instr_to_current_state( + instr, block, assignments, + irblocks, gen_pc_updt + ) if split: raise NotImplementedError("Unsupported instr in IT block (%s)" % instr) @@ -1587,7 +1589,7 @@ class ir_arml(IntermediateRepresentation): ir_blocks_all.append(irblocks) return index, ir_blocks_all - def add_block(self, block, gen_pc_updt=False): + def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): """ Add a native block to the current IR @block: native assembly block @@ -1613,8 +1615,10 @@ class ir_arml(IntermediateRepresentation): label = None continue - split = self.add_instr_to_irblock(block, instr, assignments, - ir_blocks_all, gen_pc_updt) + split = self.add_instr_to_current_state( + instr, block, assignments, + ir_blocks_all, gen_pc_updt + ) if split: ir_blocks_all.append(IRBlock(label, assignments)) label = None @@ -1622,9 +1626,9 @@ class ir_arml(IntermediateRepresentation): if label is not None: ir_blocks_all.append(IRBlock(label, assignments)) - new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) + new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) for irblock in new_ir_blocks_all: - self.blocks[irblock.loc_key] = irblock + ircfg.add_irblock(irblock) return new_ir_blocks_all diff --git a/miasm2/arch/mips32/ira.py b/miasm2/arch/mips32/ira.py index 53c2c6b3..3caa8b12 100644 --- a/miasm2/arch/mips32/ira.py +++ b/miasm2/arch/mips32/ira.py @@ -10,12 +10,8 @@ class ir_a_mips32l(ir_mips32l, ira): ir_mips32l.__init__(self, loc_db) self.ret_reg = self.arch.regs.V0 - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): - # Avoid adding side effects, already done in post_add_bloc - return False - - def post_add_block(self, block, ir_blocks): - IntermediateRepresentation.post_add_block(self, block, ir_blocks) + def post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks): + IntermediateRepresentation.post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks) new_irblocks = [] for irb in ir_blocks: pc_val = None diff --git a/miasm2/arch/ppc/ira.py b/miasm2/arch/ppc/ira.py index 76a979ae..a30f972d 100644 --- a/miasm2/arch/ppc/ira.py +++ b/miasm2/arch/ppc/ira.py @@ -35,14 +35,30 @@ class ir_a_ppc32b(ir_ppc32b, ira): instr )] - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_update): - """Replace function call with corresponding call effects, - inside the IR block""" - if not instr.is_subcall(): - return False - call_effects = self.call_effects(instr.getdstflow(None)[0], instr) - assignments+= call_effects - return True + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + + @instr: native instruction + @block: native block source + @assignments: list of current AssignBlocks + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if instr.is_subcall(): + call_effects = self.call_effects(instr.getdstflow(None)[0], instr) + assignments+= call_effects + return True + + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False def sizeof_char(self): return 8 diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index fc0c81c9..962b9889 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -3,11 +3,11 @@ import warnings import logging -from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IntermediateRepresentation, AssignBlock -from miasm2.expression.expression import ExprAff, ExprOp +from miasm2.expression.expression import ExprOp from miasm2.analysis.data_flow import dead_simp as new_dead_simp_imp + log = logging.getLogger("analysis") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) @@ -27,6 +27,7 @@ class ira(IntermediateRepresentation): class ira_x86_16(ir_x86_16, ira) """ + ret_reg = None def call_effects(self, addr, instr): """Default modelisation of a function call to @addr. This may be used to: @@ -44,14 +45,36 @@ class ira(IntermediateRepresentation): instr) return [assignblk] - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_update): - """Replace function call with corresponding call effects, - inside the IR block""" - if not instr.is_subcall(): - return False - call_effects = self.call_effects(instr.args[0], instr) - assignments+= call_effects - return True + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + If the instruction is a function call, replace the original IR by a + model of the sub function + + Returns a bool: + * True if the current assignments list must be split + * False in other cases. + + @instr: native instruction + @block: native block source + @assignments: current irbloc + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if instr.is_subcall(): + call_effects = self.call_effects(instr.args[0], instr) + assignments+= call_effects + return True + + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False def sizeof_char(self): "Return the size of a char in bits" @@ -73,7 +96,7 @@ class ira(IntermediateRepresentation): "Return the size of a void* in bits" raise NotImplementedError("Abstract method") - def dead_simp(self): + def dead_simp(self, ircfg): """Deprecated: See miasm2.analysis.data_flow.dead_simp()""" warnings.warn('DEPRECATION WARNING: Please use miasm2.analysis.data_flow.dead_simp(ira) instead of ira.dead_simp()') - new_dead_simp_imp(self) + new_dead_simp_imp(self, ircfg) diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 73c184dd..bf9b4e9a 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -23,10 +23,24 @@ from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core.asmblock import AsmBlock, AsmConstraint, AsmBlockBad -from miasm2.core.locationdb import LocationDB +from miasm2.core.asmblock import AsmBlock, AsmConstraint from miasm2.core.graph import DiGraph + +def _expr_loc_to_symb(expr, loc_db): + if not expr.is_loc(): + return expr + if loc_db is None: + name = str(expr) + else: + names = loc_db.get_location_names(expr.loc_key) + if not names: + name = loc_db.pretty_str(expr.loc_key) + else: + # Use only one name for readability + name = sorted(names)[0] + return m2_expr.ExprId(name, expr.size) + class AssignBlock(object): """Represent parallel IR assignment, such as: EAX = EBX @@ -256,6 +270,15 @@ class AssignBlock(object): new_assignblk[new_dst] = new_src return AssignBlock(irs=new_assignblk, instr=self.instr) + def to_string(self, loc_db=None): + out = [] + for dst, src in self.iteritems(): + new_src = src.visit(lambda expr:_expr_loc_to_symb(expr, loc_db)) + new_dst = dst.visit(lambda expr:_expr_loc_to_symb(expr, loc_db)) + line = "%s = %s" % (new_dst, new_src) + out.append(line) + out.append("") + return "\n".join(out) class IRBlock(object): """Intermediate representation block object. @@ -386,6 +409,22 @@ class IRBlock(object): assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) return IRBlock(self.loc_key, assignblks) + def to_string(self, loc_db=None): + out = [] + if loc_db is None: + node_name = "%s:" % self.loc_key + else: + names = loc_db.get_location_names(self.loc_key) + if not names: + node_name = "%s:" % loc_db.pretty_str(self.loc_key) + else: + node_name = "".join("%s:\n" % name for name in names) + out.append(node_name) + + for i, assignblk in enumerate(self): + out.append(assignblk.to_string(loc_db)) + return '\n'.join(out) + class irbloc(IRBlock): """ @@ -402,27 +441,40 @@ class DiGraphIR(DiGraph): """DiGraph for IR instances""" - def __init__(self, blocks, loc_db=None, *args, **kwargs): + def __init__(self, irdst, loc_db, blocks=None, *args, **kwargs): """Instanciate a DiGraphIR + @loc_db: LocationDB instance @blocks: IR blocks """ self.loc_db = loc_db + if blocks is None: + blocks = {} self._blocks = blocks + self._irdst = irdst super(DiGraphIR, self).__init__(*args, **kwargs) - def _expr_loc_to_symb(self, expr): - if not expr.is_loc(): - return expr - if self.loc_db is None: - name = str(expr) - else: - names = self.loc_db.get_location_names(expr.loc_key) - if not names: - name = self.loc_db.pretty_str(expr.loc_key) - else: - # Use only one name for readability - name = sorted(names)[0] - return m2_expr.ExprId(name, expr.size) + @property + def IRDst(self): + return self._irdst + + @property + def blocks(self): + return self._blocks + + def add_irblock(self, irblock): + """ + Add the @irblock to the current DiGraphIR + @irblock: IRBlock instance + """ + self.blocks[irblock.loc_key] = irblock + self.add_node(irblock.loc_key) + + for dst in self.dst_trackback(irblock): + if dst.is_int(): + dst_loc_key = self.loc_db.get_or_create_offset_location(int(dst)) + dst = m2_expr.ExprLoc(dst_loc_key, irblock.dst.size) + if dst.is_loc(): + self.add_uniq_edge(irblock.loc_key, dst.loc_key) def node2lines(self, node): if self.loc_db is None: @@ -446,8 +498,9 @@ class DiGraphIR(DiGraph): raise StopIteration for i, assignblk in enumerate(self._blocks[node]): for dst, src in assignblk.iteritems(): - new_src = src.visit(self._expr_loc_to_symb) - new_dst = dst.visit(self._expr_loc_to_symb) + + new_src = src.visit(lambda expr:_expr_loc_to_symb(expr, self.loc_db)) + new_dst = dst.visit(lambda expr:_expr_loc_to_symb(expr, self.loc_db)) line = "%s = %s" % (new_dst, new_src) if self._dot_offset: yield [self.DotCellDescription(text="%-4d" % i, attr={}), @@ -482,49 +535,6 @@ class DiGraphIR(DiGraph): self._dot_offset = offset return super(DiGraphIR, self).dot() - -class IntermediateRepresentation(object): - """ - Intermediate representation object - - Allow native assembly to intermediate representation traduction - """ - - def __init__(self, arch, attrib, loc_db=None): - if loc_db is None: - loc_db = LocationDB() - self.loc_db = loc_db - self.blocks = {} - self.pc = arch.getpc(attrib) - self.sp = arch.getsp(attrib) - self.arch = arch - self.attrib = attrib - # Lazy structure - self._graph = None - - @property - def blocs(self): - warnings.warn('DEPRECATION WARNING: use ".blocks" instead of ".blocs"') - return self.blocks - - @property - def symbol_pool(self): - warnings.warn('DEPRECATION WARNING: use ".loc_db" instead of ".symbol_pool"') - return self.loc_db - - def get_ir(self, instr): - raise NotImplementedError("Abstract Method") - - def instr2ir(self, instr): - ir_bloc_cur, extra_irblocks = self.get_ir(instr) - for index, irb in enumerate(extra_irblocks): - irs = [] - for assignblk in irb: - irs.append(AssignBlock(assignblk, instr)) - extra_irblocks[index] = IRBlock(irb.loc_key, irs) - assignblk = AssignBlock(ir_bloc_cur, instr) - return assignblk, extra_irblocks - def get_loc_key(self, addr): """Transforms an ExprId/ExprInt/loc_key/int into a loc_key @addr: an ExprId/ExprInt/loc_key/int""" @@ -562,22 +572,6 @@ class IntermediateRepresentation(object): return None return self.blocks.get(loc_key, None) - def get_bloc(self, addr): - """ - DEPRECATED function - Use get_block instead of get_block - """ - warnings.warn('DEPRECATION WARNING: use "get_block" instead of "get_bloc"') - return self.get_block(addr) - - def add_instr(self, line, loc_key=None, gen_pc_updt=False): - if loc_key is None: - loc_key = self.loc_db.add_location() - block = AsmBlock(loc_key) - block.lines = [line] - self.add_block(block, gen_pc_updt) - return loc_key - def getby_offset(self, offset): out = set() for irb in self.blocks.values(): @@ -589,175 +583,6 @@ class IntermediateRepresentation(object): out.add(irb) return out - def gen_pc_update(self, assignments, instr): - offset = m2_expr.ExprInt(instr.offset, self.pc.size) - assignments.append(AssignBlock({self.pc:offset}, instr)) - - def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): - """Function called before adding an instruction from the the native @block to - the current irbloc. - - Returns a couple. The first element is the new irblock. The second the a - bool: - * True if the current irblock must be split - * False in other cases. - - @block: native block source - @instr: native instruction - @irb_cur: current irbloc - @ir_blocks_all: list of additional effects - @gen_pc_updt: insert PC update effects between instructions - - """ - - return False - - def add_instr_to_irblock(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): - """ - Add the IR effects of an instruction to the current irblock. - - Returns a couple. The first element is the new irblock. The second the a - bool: - * True if the current irblock must be split - * False in other cases. - - @block: native block source - @instr: native instruction - @irb_cur: current irbloc - @ir_blocks_all: list of additional effects - @gen_pc_updt: insert PC update effects between instructions - """ - - split = self.pre_add_instr(block, instr, assignments, ir_blocks_all, gen_pc_updt) - if split: - return True - - assignblk, ir_blocks_extra = self.instr2ir(instr) - - if gen_pc_updt is not False: - self.gen_pc_update(assignments, instr) - - assignments.append(assignblk) - ir_blocks_all += ir_blocks_extra - if ir_blocks_extra: - return True - return False - - def add_block(self, block, gen_pc_updt=False): - """ - Add a native block to the current IR - @block: native assembly block - @gen_pc_updt: insert PC update effects between instructions - """ - - loc_key = block.loc_key - ir_blocks_all = [] - - assignments = [] - for instr in block.lines: - if loc_key is None: - assignments = [] - loc_key = self.get_loc_key_for_instr(instr) - split = self.add_instr_to_irblock(block, instr, assignments, - ir_blocks_all, gen_pc_updt) - if split: - ir_blocks_all.append(IRBlock(loc_key, assignments)) - loc_key = None - assignments = [] - if loc_key is not None: - ir_blocks_all.append(IRBlock(loc_key, assignments)) - - new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) - for irblock in new_ir_blocks_all: - self.blocks[irblock.loc_key] = irblock - return new_ir_blocks_all - - def add_bloc(self, block, gen_pc_updt=False): - """ - DEPRECATED function - Use add_block instead of add_block - """ - warnings.warn('DEPRECATION WARNING: use "add_block" instead of "add_bloc"') - return self.add_block(block, gen_pc_updt) - - def expr_fix_regs_for_mode(self, expr, *args, **kwargs): - return expr - - def expraff_fix_regs_for_mode(self, expr, *args, **kwargs): - return expr - - def irbloc_fix_regs_for_mode(self, irblock, *args, **kwargs): - return irblock - - def is_pc_written(self, block): - """Return the first Assignblk of the @blockin which PC is written - @block: IRBlock instance""" - all_pc = self.arch.pc.values() - for assignblk in block: - if assignblk.dst in all_pc: - return assignblk - return None - - def set_empty_dst_to_next(self, block, ir_blocks): - for index, irblock in enumerate(ir_blocks): - if irblock.dst is not None: - continue - next_loc_key = block.get_next() - if next_loc_key is None: - loc_key = None - if block.lines: - line = block.lines[-1] - if line.offset is not None: - loc_key = self.loc_db.get_or_create_offset_location(line.offset + line.l) - if loc_key is None: - loc_key = self.loc_db.add_location() - block.add_cst(loc_key, AsmConstraint.c_next) - else: - loc_key = next_loc_key - dst = m2_expr.ExprLoc(loc_key, self.pc.size) - if irblock.assignblks: - instr = irblock.assignblks[-1].instr - else: - instr = None - assignblk = AssignBlock({self.IRDst: dst}, instr) - ir_blocks[index] = IRBlock(irblock.loc_key, list(irblock.assignblks) + [assignblk]) - - def post_add_block(self, block, ir_blocks): - self.set_empty_dst_to_next(block, ir_blocks) - - new_irblocks = [] - for irblock in ir_blocks: - new_irblock = self.irbloc_fix_regs_for_mode(irblock, self.attrib) - self.blocks[irblock.loc_key] = new_irblock - new_irblocks.append(new_irblock) - # Forget graph if any - self._graph = None - return new_irblocks - - def post_add_bloc(self, block, ir_blocks): - """ - DEPRECATED function - Use post_add_block instead of post_add_bloc - """ - warnings.warn('DEPRECATION WARNING: use "post_add_block" instead of "post_add_bloc"') - return self.post_add_block(block, ir_blocks) - - def get_loc_key_for_instr(self, instr): - """Returns the loc_key associated to an instruction - @instr: current instruction""" - return self.loc_db.get_or_create_offset_location(instr.offset) - - def gen_loc_key_and_expr(self, size): - """ - Return a loc_key and it's corresponding ExprLoc - @size: size of expression - """ - loc_key = self.loc_db.add_location() - return loc_key, m2_expr.ExprLoc(loc_key, size) - - def get_next_loc_key(self, instr): - loc_key = self.loc_db.get_or_create_offset_location(instr.offset + instr.l) - return loc_key def simplify(self, simplifier): """ @@ -775,11 +600,11 @@ class IntermediateRepresentation(object): self.blocks[loc_key] = IRBlock(loc_key, assignblks) return modified - def replace_expr_in_ir(self, bloc, rep): - for assignblk in bloc: + def replace_expr_in_ir(self, block, replaced): + for assignblk in block: for dst, src in assignblk.items(): del assignblk[dst] - assignblk[dst.replace_expr(rep)] = src.replace_expr(rep) + assignblk[dst.replace_expr(replaced)] = src.replace_expr(replaced) def get_rw(self, regs_ids=None): """ @@ -836,29 +661,6 @@ class IntermediateRepresentation(object): return done - def _gen_graph(self): - """ - Gen irbloc digraph - """ - self._graph = DiGraphIR(self.blocks, self.loc_db) - for lbl, block in self.blocks.iteritems(): - assert isinstance(lbl, m2_expr.LocKey) - self._graph.add_node(lbl) - for dst in self.dst_trackback(block): - if dst.is_int(): - dst_lbl = self.loc_db.get_or_create_offset_location(int(dst)) - dst = m2_expr.ExprLoc(dst_lbl.loc_key, self.pc.size) - if dst.is_loc(): - self._graph.add_edge(lbl, dst.loc_key) - - @property - def graph(self): - """Get a DiGraph representation of current IR instance. - Lazy property, building the graph on-demand""" - if self._graph is None: - self._gen_graph() - return self._graph - def remove_empty_assignblks(self): modified = False for loc_key, block in self.blocks.iteritems(): @@ -886,7 +688,7 @@ class IntermediateRepresentation(object): if len(assignblk) > 1: continue assert set(assignblk.keys()) == set([self.IRDst]) - if len(self.graph.successors(block.loc_key)) != 1: + if len(self.successors(block.loc_key)) != 1: continue if not assignblk[self.IRDst].is_loc(): continue @@ -901,7 +703,7 @@ class IntermediateRepresentation(object): for loc_key in jmp_blocks: block = self.blocks[loc_key] dst_loc_key = block.dst - parents = self.graph.predecessors(block.loc_key) + parents = self.predecessors(block.loc_key) for lbl in parents: parent = self.blocks.get(lbl, None) if parent is None: @@ -910,24 +712,24 @@ class IntermediateRepresentation(object): if dst.is_id(block.loc_key): dst = m2_expr.ExprLoc(dst_loc_key, dst.size) - self.graph.discard_edge(lbl, block.loc_key) - self.graph.discard_edge(block.loc_key, dst_loc_key) + self.discard_edge(lbl, block.loc_key) + self.discard_edge(block.loc_key, dst_loc_key) - self.graph.add_uniq_edge(lbl, dst_loc_key) + self.add_uniq_edge(lbl, dst_loc_key) modified = True elif dst.is_cond(): src1, src2 = dst.src1, dst.src2 if src1.is_id(block.loc_key): dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprLoc(dst_loc_key, dst.size), dst.src2) - self.graph.discard_edge(lbl, block.loc_key) - self.graph.discard_edge(block.loc_key, dst_loc_key) - self.graph.add_uniq_edge(lbl, dst_loc_key) + self.discard_edge(lbl, block.loc_key) + self.discard_edge(block.loc_key, dst_loc_key) + self.add_uniq_edge(lbl, dst_loc_key) modified = True if src2.is_id(block.loc_key): dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprLoc(dst_loc_key, dst.size)) - self.graph.discard_edge(lbl, block.loc_key) - self.graph.discard_edge(block.loc_key, dst_loc_key) - self.graph.add_uniq_edge(lbl, dst_loc_key) + self.discard_edge(lbl, block.loc_key) + self.discard_edge(block.loc_key, dst_loc_key) + self.add_uniq_edge(lbl, dst_loc_key) modified = True if dst.src1 == dst.src2: dst = dst.src1 @@ -938,9 +740,9 @@ class IntermediateRepresentation(object): # Remove unlinked useless nodes for loc_key in jmp_blocks: - if (len(self.graph.predecessors(loc_key)) == 0 and - len(self.graph.successors(loc_key)) == 0): - self.graph.del_node(loc_key) + if (len(self.predecessors(loc_key)) == 0 and + len(self.successors(loc_key)) == 0): + self.del_node(loc_key) del self.blocks[loc_key] return modified @@ -950,21 +752,21 @@ class IntermediateRepresentation(object): parent """ modified = False - todo = set(self.graph.nodes()) + todo = set(self.nodes()) while todo: block = todo.pop() - sons = self.graph.successors(block) + sons = self.successors(block) if len(sons) != 1: continue son = list(sons)[0] - if self.graph.predecessors(son) != [block]: + if self.predecessors(son) != [block]: continue if block not in self.blocks: continue if son not in self.blocks: continue # Block has one son, son has one parent => merge - assignblks =[] + assignblks = [] for assignblk in self.blocks[block]: if self.IRDst not in assignblk: assignblks.append(assignblk) @@ -978,13 +780,13 @@ class IntermediateRepresentation(object): assignblks += self.blocks[son].assignblks new_block = IRBlock(block, assignblks) - self.graph.discard_edge(block, son) + self.discard_edge(block, son) - for lson in self.graph.successors(son): - self.graph.add_uniq_edge(block, lson) - self.graph.discard_edge(son, lson) + for lson in self.successors(son): + self.add_uniq_edge(block, lson) + self.discard_edge(son, lson) del self.blocks[son] - self.graph.del_node(son) + self.del_node(son) self.blocks[block] = new_block todo.add(block) @@ -992,6 +794,218 @@ class IntermediateRepresentation(object): return modified +class IntermediateRepresentation(object): + """ + Intermediate representation object + + Allow native assembly to intermediate representation traduction + """ + + def __init__(self, arch, attrib, loc_db): + self.pc = arch.getpc(attrib) + self.sp = arch.getsp(attrib) + self.arch = arch + self.attrib = attrib + self.loc_db = loc_db + self.IRDst = None + + def get_ir(self, instr): + raise NotImplementedError("Abstract Method") + + def new_ircfg(self, *args, **kwargs): + """ + Return a new instance of DiGraphIR + """ + return DiGraphIR(self.IRDst, self.loc_db, *args, **kwargs) + + def new_ircfg_from_asmcfg(self, asmcfg, *args, **kwargs): + """ + Return a new instance of DiGraphIR from an @asmcfg + @asmcfg: AsmCFG instance + """ + + ircfg = DiGraphIR(self.IRDst, self.loc_db, *args, **kwargs) + for block in asmcfg.blocks: + self.add_asmblock_to_ircfg(block, ircfg) + return ircfg + + def instr2ir(self, instr): + ir_bloc_cur, extra_irblocks = self.get_ir(instr) + for index, irb in enumerate(extra_irblocks): + irs = [] + for assignblk in irb: + irs.append(AssignBlock(assignblk, instr)) + extra_irblocks[index] = IRBlock(irb.loc_key, irs) + assignblk = AssignBlock(ir_bloc_cur, instr) + return assignblk, extra_irblocks + + def add_instr_to_ircfg(self, instr, ircfg, loc_key=None, gen_pc_updt=False): + """ + Add the native instruction @instr to the @ircfg + @instr: instruction instance + @ircfg: IRCFG instance + @loc_key: loc_key instance of the instruction destination + @gen_pc_updt: insert PC update effects between instructions + """ + + if loc_key is None: + offset = getattr(instr, "offset", None) + loc_key = self.loc_db.add_location(offset=offset) + block = AsmBlock(loc_key) + block.lines = [instr] + self.add_asmblock_to_ircfg(block, ircfg, gen_pc_updt) + return loc_key + + def gen_pc_update(self, assignments, instr): + offset = m2_expr.ExprInt(instr.offset, self.pc.size) + assignments.append(AssignBlock({self.pc:offset}, instr)) + + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + + Returns a bool: + * True if the current assignments list must be split + * False in other cases. + + @instr: native instruction + @block: native block source + @assignments: list of current AssignBlocks + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False + + def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): + """ + Add a native block to the current IR + @block: native assembly block + @ircfg: DiGraphIR instance + @gen_pc_updt: insert PC update effects between instructions + """ + + loc_key = block.loc_key + ir_blocks_all = [] + + assignments = [] + for instr in block.lines: + if loc_key is None: + assignments = [] + loc_key = self.get_loc_key_for_instr(instr) + split = self.add_instr_to_current_state( + instr, block, assignments, + ir_blocks_all, gen_pc_updt + ) + if split: + ir_blocks_all.append(IRBlock(loc_key, assignments)) + loc_key = None + assignments = [] + if loc_key is not None: + ir_blocks_all.append(IRBlock(loc_key, assignments)) + + new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) + for irblock in new_ir_blocks_all: + ircfg.add_irblock(irblock) + return new_ir_blocks_all + + def add_block(self, block, gen_pc_updt=False): + """ + DEPRECATED function + Use add_block instead of add_block + """ + warnings.warn("""DEPRECATION WARNING + ircfg is now out of IntermediateRepresentation + Use: + ircfg = ir_arch.new_ircfg() + ir_arch.add_asmblock_to_ircfg(block, ircfg) + """) + raise RuntimeError("API Deprecated") + + def add_bloc(self, block, gen_pc_updt=False): + """ + DEPRECATED function + Use add_block instead of add_block + """ + self.add_block(block, gen_pc_updt) + + def get_next_loc_key(self, instr): + loc_key = self.loc_db.get_or_create_offset_location(instr.offset + instr.l) + return loc_key + + def get_loc_key_for_instr(self, instr): + """Returns the loc_key associated to an instruction + @instr: current instruction""" + return self.loc_db.get_or_create_offset_location(instr.offset) + + def gen_loc_key_and_expr(self, size): + """ + Return a loc_key and it's corresponding ExprLoc + @size: size of expression + """ + loc_key = self.loc_db.add_location() + return loc_key, m2_expr.ExprLoc(loc_key, size) + + def expr_fix_regs_for_mode(self, expr, *args, **kwargs): + return expr + + def expraff_fix_regs_for_mode(self, expr, *args, **kwargs): + return expr + + def irbloc_fix_regs_for_mode(self, irblock, *args, **kwargs): + return irblock + + def is_pc_written(self, block): + """Return the first Assignblk of the @blockin which PC is written + @block: IRBlock instance""" + all_pc = self.arch.pc.values() + for assignblk in block: + if assignblk.dst in all_pc: + return assignblk + return None + + def set_empty_dst_to_next(self, block, ir_blocks): + for index, irblock in enumerate(ir_blocks): + if irblock.dst is not None: + continue + next_loc_key = block.get_next() + if next_loc_key is None: + loc_key = None + if block.lines: + line = block.lines[-1] + if line.offset is not None: + loc_key = self.loc_db.get_or_create_offset_location(line.offset + line.l) + if loc_key is None: + loc_key = self.loc_db.add_location() + block.add_cst(loc_key, AsmConstraint.c_next) + else: + loc_key = next_loc_key + dst = m2_expr.ExprLoc(loc_key, self.pc.size) + if irblock.assignblks: + instr = irblock.assignblks[-1].instr + else: + instr = None + assignblk = AssignBlock({self.IRDst: dst}, instr) + ir_blocks[index] = IRBlock(irblock.loc_key, list(irblock.assignblks) + [assignblk]) + + def post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks): + self.set_empty_dst_to_next(block, ir_blocks) + + new_irblocks = [] + for irblock in ir_blocks: + new_irblock = self.irbloc_fix_regs_for_mode(irblock, self.attrib) + ircfg.add_irblock(new_irblock) + new_irblocks.append(new_irblock) + return new_irblocks + + class ir(IntermediateRepresentation): """ DEPRECATED object diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index d137e71f..9ab455da 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -3,9 +3,8 @@ import logging from collections import MutableMapping from miasm2.expression.expression import ExprOp, ExprId, ExprLoc, ExprInt, \ - ExprMem, ExprCompose, ExprSlice, ExprCond, ExprAff + ExprMem, ExprCompose, ExprSlice, ExprCond from miasm2.expression.simplifications import expr_simp -from miasm2.core import asmblock from miasm2.ir.ir import AssignBlock log = logging.getLogger("symbexec") @@ -15,14 +14,14 @@ log.addHandler(console_handler) log.setLevel(logging.INFO) -def get_block(ir_arch, mdis, addr): +def get_block(ir_arch, ircfg, mdis, addr): """Get IRBlock at address @addr""" - loc_key = ir_arch.get_or_create_loc_key(addr) - if loc_key not in ir_arch.blocks: + loc_key = ircfg.get_or_create_loc_key(addr) + if not loc_key in ircfg.blocks: offset = mdis.loc_db.get_location_offset(loc_key) block = mdis.dis_block(offset) - ir_arch.add_block(block) - irblock = ir_arch.get_block(loc_key) + ir_arch.add_asmblock_to_ircfg(block, ircfg) + irblock = ircfg.get_block(loc_key) if irblock is None: raise LookupError('No block found at that address: %s' % ir_arch.loc_db.pretty_str(loc_key)) return irblock @@ -1055,18 +1054,18 @@ class SymbolicExecutionEngine(object): return dst - def run_block_at(self, addr, step=False): + def run_block_at(self, ircfg, addr, step=False): """ Symbolic execution of the block at @addr @addr: address to execute (int or ExprInt or label) @step: display intermediate steps """ - irblock = self.ir_arch.get_block(addr) + irblock = ircfg.get_block(addr) if irblock is not None: addr = self.eval_updt_irblock(irblock, step=step) return addr - def run_at(self, addr, lbl_stop=None, step=False): + def run_at(self, ircfg, addr, lbl_stop=None, step=False): """ Symbolic execution starting at @addr @addr: address to execute (int or ExprInt or label) @@ -1074,7 +1073,7 @@ class SymbolicExecutionEngine(object): @step: display intermediate steps """ while True: - irblock = self.ir_arch.get_block(addr) + irblock = ircfg.get_block(addr) if irblock is None: break if irblock.loc_key == lbl_stop: diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py index 5fe12996..f5ecb566 100644 --- a/miasm2/ir/symbexec_top.py +++ b/miasm2/ir/symbexec_top.py @@ -2,7 +2,6 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine from miasm2.expression.simplifications import expr_simp from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ ExprMem, ExprCond, ExprCompose, ExprOp -from miasm2.core import asmblock TOPSTR = "TOP" diff --git a/miasm2/ir/symbexec_types.py b/miasm2/ir/symbexec_types.py index fedd25bc..349d55a6 100644 --- a/miasm2/ir/symbexec_types.py +++ b/miasm2/ir/symbexec_types.py @@ -1,9 +1,6 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine from miasm2.expression.simplifications import expr_simp -from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ - ExprMem, ExprCond, ExprCompose, ExprOp - -from miasm2.core.ctypesmngr import CTypeId +from miasm2.expression.expression import ExprId, ExprMem class SymbolicStateCTypes(StateEngine): @@ -88,7 +85,6 @@ class SymbExecCType(SymbolicExecutionEngine): @assignblk: AssignBlock instance """ pool_out = {} - eval_cache = {} for dst, src in assignblk.iteritems(): objcs = self.chandler.expr_to_types(src, self.symbols) if isinstance(dst, ExprMem): diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py index a26d4c9f..fc5cf35e 100644 --- a/miasm2/jitter/jitcore.py +++ b/miasm2/jitter/jitcore.py @@ -24,7 +24,6 @@ from miasm2.core.utils import BoundedDict from miasm2.expression.expression import LocKey from miasm2.jitter.csts import * - class JitCore(object): "JiT management. This is an abstract class" @@ -42,6 +41,7 @@ class JitCore(object): """ # Arch related self.ir_arch = ir_arch + self.ircfg = self.ir_arch.new_ircfg() self.arch_name = "%s%s" % (self.ir_arch.arch.name, self.ir_arch.attrib) # Structures for block tracking @@ -126,7 +126,7 @@ class JitCore(object): """Add a block to JiT and JiT it. @block: asm_bloc to add """ - irblocks = self.ir_arch.add_block(block, gen_pc_updt = True) + irblocks = self.ir_arch.add_asmblock_to_ircfg(block, self.ircfg, gen_pc_updt = True) block.blocks = irblocks self.jit_irblocks(block.loc_key, irblocks) diff --git a/miasm2/jitter/jitcore_python.py b/miasm2/jitter/jitcore_python.py index fa751a68..61bd98d0 100644 --- a/miasm2/jitter/jitcore_python.py +++ b/miasm2/jitter/jitcore_python.py @@ -4,7 +4,6 @@ import miasm2.jitter.csts as csts from miasm2.expression.simplifications import ExpressionSimplifier from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec - ################################################################################ # Python jitter Core # ################################################################################ @@ -18,12 +17,16 @@ class JitCore_Python(jitcore.JitCore): def __init__(self, ir_arch, bin_stream): super(JitCore_Python, self).__init__(ir_arch, bin_stream) self.ir_arch = ir_arch + self.ircfg = self.ir_arch.new_ircfg() # CPU & VM (None for now) will be set later expr_simp = ExpressionSimplifier() expr_simp.enable_passes(ExpressionSimplifier.PASS_COMMONS) - self.symbexec = self.SymbExecClass(None, None, self.ir_arch, {}, - sb_expr_simp=expr_simp) + self.symbexec = self.SymbExecClass( + None, None, + self.ir_arch, {}, + sb_expr_simp=expr_simp + ) self.symbexec.enable_emulated_simplifications() def set_cpu_vm(self, cpu, vm): diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index 5f8b4ad6..288e737a 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -205,8 +205,11 @@ class Jitter(object): self.cpu = jcore.JitCpu() self.ir_arch = ir_arch self.bs = bin_stream_vm(self.vm) + self.ircfg = self.ir_arch.new_ircfg() - self.symbexec = EmulatedSymbExec(self.cpu, self.vm, self.ir_arch, {}) + self.symbexec = EmulatedSymbExec( + self.cpu, self.vm, self.ir_arch, {} + ) self.symbexec.reset_regs() try: |