diff options
| -rw-r--r-- | example/disasm/full.py | 175 | ||||
| -rw-r--r-- | example/ida/graph_ir.py | 122 | ||||
| -rw-r--r-- | miasm2/analysis/data_flow.py | 364 | ||||
| -rw-r--r-- | miasm2/analysis/simplifier.py | 303 | ||||
| -rw-r--r-- | miasm2/analysis/ssa.py | 51 | ||||
| -rw-r--r-- | miasm2/arch/arm/arch.py | 2 | ||||
| -rw-r--r-- | miasm2/arch/arm/sem.py | 2 | ||||
| -rw-r--r-- | miasm2/arch/x86/regs.py | 11 | ||||
| -rw-r--r-- | miasm2/arch/x86/sem.py | 9 | ||||
| -rw-r--r-- | miasm2/jitter/arch/JitCore_x86.c | 18 | ||||
| -rw-r--r-- | miasm2/jitter/arch/JitCore_x86.h | 3 | ||||
| -rw-r--r-- | test/analysis/unssa.py | 52 | ||||
| -rw-r--r-- | test/ir/reduce_graph.py | 41 |
13 files changed, 745 insertions, 408 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py index 42d50216..5161a299 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -6,17 +6,13 @@ from miasm2.analysis.binary import Container from miasm2.core.asmblock import log_asmblock, AsmCFG from miasm2.core.interval import interval from miasm2.analysis.machine import Machine -from miasm2.analysis.data_flow import dead_simp, DiGraphDefUse, \ - ReachingDefinitions, merge_blocks, remove_empty_assignblks, \ - PropagateExpr, replace_stack_vars, load_from_int, \ - del_unused_edges +from miasm2.analysis.data_flow import dead_simp, \ + DiGraphDefUse, ReachingDefinitions, \ + replace_stack_vars, load_from_int, del_unused_edges from miasm2.expression.simplifications import expr_simp from miasm2.analysis.ssa import SSADiGraph -from miasm2.analysis.outofssa import UnSSADiGraph -from miasm2.analysis.data_flow import DiGraphLivenessSSA from miasm2.ir.ir import AssignBlock, IRBlock - - +from miasm2.analysis.simplifier import IRCFGSimplifierCommon, IRCFGSimplifierSSA log = logging.getLogger("dis") console_handler = logging.StreamHandler() @@ -207,7 +203,6 @@ open('lines.dot', 'w').write('\n'.join([str(l) for l in all_lines])) log.info('total lines %s' % total_l) - if args.propagexpr: args.gen_ir = True @@ -240,6 +235,9 @@ if args.gen_ir: ir_arch.blocks = {} ir_arch_a.blocks = {} + + head = list(entry_points)[0] + for ad, asmcfg in all_funcs_blocks.items(): log.info("generating IR... %x" % ad) for block in asmcfg.blocks: @@ -257,8 +255,9 @@ if args.gen_ir: print block if args.simplify > 0: - log.info("dead simp...") - dead_simp(ir_arch_a, ircfg_a) + log.info("Simplify...") + ircfg_simplifier = IRCFGSimplifierCommon(ir_arch_a) + ircfg_simplifier.simplify(ircfg_a, head) log.info("ok...") if args.defuse: @@ -270,30 +269,14 @@ if args.gen_ir: out = ircfg_a.dot() open('graph_irflow.dot', 'w').write(out) - if args.simplify > 1: - - ircfg_a.simplify(expr_simp) - modified = True - while modified: - modified = False - modified |= dead_simp(ir_arch_a, ircfg_a) - modified |= remove_empty_assignblks(ircfg_a) - - open('graph_irflow_reduced.dot', 'w').write(ircfg_a.dot()) - if args.ssa and not args.propagexpr: if len(entry_points) != 1: raise RuntimeError("Your graph should have only one head") - head = list(entry_points)[0] ssa = SSADiGraph(ircfg_a) ssa.transform(head) - open("ssa.dot", "wb").write(ircfg_a.dot()) - - - if args.propagexpr: class IRAOutRegs(ira): def get_out_regs(self, block): @@ -324,8 +307,6 @@ if args.propagexpr: - ir_arch_a = IRAOutRegs(mdis.loc_db) - def is_addr_ro_variable(bs, addr, size): """ Return True if address at @addr is a read-only variable. @@ -341,118 +322,32 @@ if args.propagexpr: return False return True + ir_arch_a = IRAOutRegs(mdis.loc_db) - ir_arch_a.ssa_var = {} - index = 0 - modified = True - ssa_forbidden_regs = set([ - ir_arch_a.pc, - ir_arch_a.IRDst, - ir_arch_a.arch.regs.exception_flags - ]) - head = list(entry_points)[0] - heads = set([head]) - all_ssa_vars = {} + class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA): + def do_simplify(self, ssa, head): + modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head) + if args.loadint: + modified |= load_from_int(ssa.graph, bs, is_addr_ro_variable) - propagate_expr = PropagateExpr() - ssa_variable_to_expr = {} + def simplify(self, ircfg, head): + ssa = self.ircfg_to_ssa(ircfg, head) + ssa = self.do_simplify_loop(ssa, head) + ircfg = self.ssa_to_unssa(ssa, head) - while modified: - ssa = SSADiGraph(ircfg_a) - ssa.immutable_ids.update(ssa_forbidden_regs) - ssa.ssa_variable_to_expr.update(all_ssa_vars) - ssa.transform(head) - all_ssa_vars.update(ssa.ssa_variable_to_expr) - - if args.verbose > 3: - open("ssa_%d.dot" % index, "wb").write(ircfg_a.dot()) - - ir_arch_a.ssa_var.update(ssa.ssa_variable_to_expr) - if args.verbose > 3: - open("ssa_orig.dot", "wb").write(ircfg_a.dot()) - - while modified: - log.debug('Loop %d', index) - index += 1 - modified = False - if args.verbose > 3: - open('tmp_before_%d.dot' % index, 'w').write(ircfg_a.dot()) - modified |= propagate_expr.propagate(ssa, head) - if args.verbose > 3: - open('tmp_adter_%d.dot' % index, 'w').write(ircfg_a.dot()) - modified |= ircfg_a.simplify(expr_simp) - if args.verbose > 3: - open('tmp_simp_%d.dot' % index, 'w').write(ircfg_a.dot()) - simp_modified = True - while simp_modified: - index += 1 - if args.verbose > 3: - open('tmp_before_%d.dot' % index, 'w').write(ircfg_a.dot()) - simp_modified = False - log.info("dead simp...") - simp_modified |= dead_simp(ir_arch_a, ircfg_a) - log.info("ok...") - - index += 1 - if args.verbose > 3: - open('tmp_after_%d.dot' % index, 'w').write(ircfg_a.dot()) - simp_modified |= remove_empty_assignblks(ircfg_a) - simp_modified |= del_unused_edges(ircfg_a, heads) - simp_modified |= merge_blocks(ircfg_a, heads) - - if args.loadint: - simp_modified |= load_from_int(ircfg_a, bs, is_addr_ro_variable) - modified |= simp_modified - index += 1 - if args.verbose > 3: - open('stack_%d.dot' % index, 'w').write(ircfg_a.dot()) - if args.stack2var: - modified |= replace_stack_vars(ir_arch_a, ssa) - - if args.verbose > 3: - open('final_pre.dot', 'w').write(ircfg_a.dot()) - - if args.verbose > 3: - open('final_merge.dot', 'w').write(ircfg_a.dot()) - ssa = SSADiGraph(ircfg_a) - ssa.immutable_ids.update(ssa_forbidden_regs) - ssa.ssa_variable_to_expr.update(all_ssa_vars) - ssa.transform(head) - print '*'*80, "Remove phi" - if args.verbose > 3: - open('final_ssa.dot', 'w').write(ircfg_a.dot()) - - cfg_liveness = DiGraphLivenessSSA(ircfg_a) - cfg_liveness.init_var_info(ir_arch_a) - cfg_liveness.compute_liveness() - - unssa = UnSSADiGraph(ssa, head, cfg_liveness) - - if args.verbose > 3: - open('final_no_phi.dot', 'w').write(ircfg_a.dot()) - - modified = True - while modified: - log.debug('Loop %d', index) - index += 1 - modified = False - modified |= ircfg_a.simplify(expr_simp) - if args.verbose > 3: - open('tmp_simp_%d.dot' % index, 'w').write(ircfg_a.dot()) - simp_modified = True - while simp_modified: - index += 1 - if args.verbose > 3: - open('tmp_before_%d.dot' % index, 'w').write(ircfg_a.dot()) - simp_modified = False - simp_modified |= dead_simp(ir_arch_a, ircfg_a) - index += 1 - if args.verbose > 3: - open('tmp_after_%d.dot' % index, 'w').write(ircfg_a.dot()) - simp_modified |= remove_empty_assignblks(ircfg_a) - simp_modified |= merge_blocks(ircfg_a, heads) - modified |= simp_modified - index += 1 - - open('final.dot', 'w').write(ircfg_a.dot()) + if args.stack2var: + replace_stack_vars(self.ir_arch, ircfg) + + ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) + ircfg_simplifier.simplify(ircfg, head) + return ircfg + + + + + head = list(entry_points)[0] + ir_arch_a = IRAOutRegs(mdis.loc_db) + simplifier = CustomIRCFGSimplifierSSA(ir_arch_a) + ircfg = simplifier.simplify(ircfg_a, head) + open('final.dot', 'w').write(ircfg.dot()) diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index b04979ac..8026174d 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -10,15 +10,9 @@ from miasm2.core.asmblock import is_int from miasm2.core.bin_stream_ida import bin_stream_ida from miasm2.expression.simplifications import expr_simp from miasm2.ir.ir import IRBlock, AssignBlock - -from miasm2.analysis.ssa import SSADiGraph, UnSSADiGraph, DiGraphLivenessSSA - -from miasm2.analysis.data_flow import dead_simp, \ - merge_blocks, remove_empty_assignblks, \ - PropagateExpr, load_from_int - - +from miasm2.analysis.data_flow import load_from_int from utils import guess_machine, expr2colorstr +from miasm2.analysis.simplifier import IRCFGSimplifierCommon, IRCFGSimplifierSSA @@ -250,15 +244,11 @@ def build_graph(start_addr, type_graph, simplify=False, dontmodstack=True, loadi title = "Miasm IR graph" + head = list(entry_points)[0] + if simplify: - dead_simp(ir_arch, ircfg) - ircfg.simplify(expr_simp) - modified = True - while modified: - modified = False - modified |= dead_simp(ir_arch, ircfg) - modified |= remove_empty_assignblks(ircfg) - modified |= merge_blocks(ircfg, entry_points) + ircfg_simplifier = IRCFGSimplifierCommon(ir_arch) + ircfg_simplifier.simplify(ircfg, head) title += " (simplified)" if type_graph == TYPE_GRAPH_IR: @@ -266,8 +256,6 @@ def build_graph(start_addr, type_graph, simplify=False, dontmodstack=True, loadi graph.Show() return - head = list(entry_points)[0] - class IRAOutRegs(ira): def get_out_regs(self, block): @@ -298,86 +286,38 @@ def build_graph(start_addr, type_graph, simplify=False, dontmodstack=True, loadi new_irblock = IRBlock(irblock.loc_key, assignblks) ircfg.blocks[loc] = new_irblock - ir_arch = IRAOutRegs(mdis.loc_db) - ir_arch.ssa_var = {} - modified = True - ssa_forbidden_regs = set([ - ir_arch.pc, - ir_arch.IRDst, - ir_arch.arch.regs.exception_flags - ]) - head = list(entry_points)[0] - heads = set([head]) - all_ssa_vars = {} - - propagate_expr = PropagateExpr() + class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA): + def do_simplify(self, ssa, head): + modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head) + if loadint: + modified |= load_from_int(ssa.graph, bs, is_addr_ro_variable) + return modified - ssa = SSADiGraph(ircfg) - ssa.immutable_ids.update(ssa_forbidden_regs) - ssa.ssa_variable_to_expr.update(all_ssa_vars) - ssa.transform(head) - all_ssa_vars.update(ssa.ssa_variable_to_expr) + def simplify(self, ircfg, head): + ssa = self.ircfg_to_ssa(ircfg, head) + ssa = self.do_simplify_loop(ssa, head) - ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) + if type_graph == TYPE_GRAPH_IRSSA: + ret = ssa.graph + elif type_graph == TYPE_GRAPH_IRSSAUNSSA: + ircfg = self.ssa_to_unssa(ssa, head) + ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) + ircfg_simplifier.simplify(ircfg, head) + ret = ircfg + else: + raise ValueError("Unknown option") + return ret - if simplify: - while modified: - ssa = SSADiGraph(ircfg) - ssa.immutable_ids.update(ssa_forbidden_regs) - ssa.ssa_variable_to_expr.update(all_ssa_vars) - ssa.transform(head) - all_ssa_vars.update(ssa.ssa_variable_to_expr) - - ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) - - while modified: - modified = False - modified |= propagate_expr.propagate(ssa, head) - modified |= ircfg.simplify(expr_simp) - simp_modified = True - while simp_modified: - simp_modified = False - simp_modified |= dead_simp(ir_arch, ircfg) - simp_modified |= remove_empty_assignblks(ircfg) - simp_modified |= load_from_int(ircfg, bs, is_addr_ro_variable) - modified |= simp_modified - - - ssa = SSADiGraph(ircfg) - ssa.immutable_ids.update(ssa_forbidden_regs) - ssa.ssa_variable_to_expr.update(all_ssa_vars) - ssa.transform(head) - all_ssa_vars.update(ssa.ssa_variable_to_expr) - - if type_graph == TYPE_GRAPH_IRSSA: - graph = GraphMiasmIR(ssa.graph, title, None) - graph.Show() - return + head = list(entry_points)[0] + simplifier = CustomIRCFGSimplifierSSA(ir_arch) + ircfg = simplifier.simplify(ircfg, head) + open('final.dot', 'w').write(ircfg.dot()) - if type_graph == TYPE_GRAPH_IRSSAUNSSA: - - cfg_liveness = DiGraphLivenessSSA(ssa.graph) - cfg_liveness.init_var_info(ir_arch) - cfg_liveness.compute_liveness() - - UnSSADiGraph(ssa, head, cfg_liveness) - if simplify: - modified = True - while modified: - modified = False - modified |= ssa.graph.simplify(expr_simp) - simp_modified = True - while simp_modified: - simp_modified = False - simp_modified |= dead_simp(ir_arch, ssa.graph) - simp_modified |= remove_empty_assignblks(ssa.graph) - simp_modified |= merge_blocks(ssa.graph, heads) - modified |= simp_modified - graph = GraphMiasmIR(ssa.graph, title, None) - graph.Show() + graph = GraphMiasmIR(ircfg, title, None) + graph.Show() def function_graph_ir(): # Get settings diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index 4bf64e25..2201a088 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -11,6 +11,7 @@ from miasm2.expression.expression_helper import possible_values from miasm2.analysis.ssa import get_phi_sources_parent_block, \ irblock_has_phi + class ReachingDefinitions(dict): """ Computes for each assignblock the set of reaching definitions. @@ -291,6 +292,7 @@ def _test_merge_next_block(ircfg, loc_key): return None if son not in ircfg.blocks: return None + return son @@ -328,14 +330,16 @@ def _do_merge_blocks(ircfg, loc_key, son_loc_key): ircfg.blocks[loc_key] = new_block -def _test_jmp_only(ircfg, loc_key): +def _test_jmp_only(ircfg, loc_key, heads): """ If irblock at @loc_key sets only IRDst to an ExprLoc, return the corresponding loc_key target. + Avoid creating predecssors for heads LocKeys None in other cases. @ircfg: IRCFG instance @loc_key: LocKey instance of the candidate irblock + @heads: LocKey heads of the graph """ @@ -347,11 +351,20 @@ def _test_jmp_only(ircfg, loc_key): items = dict(irblock.assignblks[0]).items() if len(items) != 1: return None + if len(ircfg.successors(loc_key)) != 1: + return None + # Don't create predecessors on heads dst, src = items[0] assert dst.is_id("IRDst") if not src.is_loc(): return None - return src.loc_key + dst = src.loc_key + if loc_key in heads: + predecessors = set(ircfg.predecessors(dst)) + predecessors.difference_update(set([loc_key])) + if predecessors: + return None + return dst def _relink_block_node(ircfg, loc_key, son_loc_key, replace_dct): @@ -396,7 +409,6 @@ def _remove_to_son(ircfg, loc_key, son_loc_key): # Unlink block destinations ircfg.del_edge(loc_key, son_loc_key) - del ircfg.blocks[loc_key] replace_dct = { ExprLoc(loc_key, ircfg.IRDst.size):ExprLoc(son_loc_key, ircfg.IRDst.size) @@ -404,6 +416,9 @@ def _remove_to_son(ircfg, loc_key, son_loc_key): _relink_block_node(ircfg, loc_key, son_loc_key, replace_dct) + ircfg.del_node(loc_key) + del ircfg.blocks[loc_key] + return True @@ -433,7 +448,6 @@ def _remove_to_parent(ircfg, loc_key, son_loc_key): ircfg.blocks[son_loc_key] = new_irblock - del ircfg.blocks[son_loc_key] ircfg.add_irblock(new_irblock) replace_dct = { @@ -442,10 +456,14 @@ def _remove_to_parent(ircfg, loc_key, son_loc_key): _relink_block_node(ircfg, son_loc_key, loc_key, replace_dct) + + ircfg.del_node(son_loc_key) + del ircfg.blocks[son_loc_key] + return True -def merge_blocks(ircfg, loc_key_entries): +def merge_blocks(ircfg, heads): """ This function modifies @ircfg to apply the following transformations: - group an irblock with its son if the irblock has one and only one son and @@ -457,10 +475,12 @@ def merge_blocks(ircfg, loc_key_entries): IRDst with a given label, this irblock is dropped and its son becomes the head. References are fixed + This function avoid creating predecessors on heads + Return True if at least an irblock has been modified @ircfg: IRCFG instance - @loc_key_entries: loc_key to keep + @heads: loc_key to keep """ modified = False @@ -470,15 +490,15 @@ def merge_blocks(ircfg, loc_key_entries): # Test merge block son = _test_merge_next_block(ircfg, loc_key) - if son is not None and son not in loc_key_entries: + if son is not None and son not in heads: _do_merge_blocks(ircfg, loc_key, son) todo.add(loc_key) modified = True continue # Test jmp only block - son = _test_jmp_only(ircfg, loc_key) - if son is not None and loc_key not in loc_key_entries: + son = _test_jmp_only(ircfg, loc_key, heads) + if son is not None and loc_key not in heads: ret = _remove_to_son(ircfg, loc_key, son) modified |= ret if ret: @@ -487,7 +507,7 @@ def merge_blocks(ircfg, loc_key_entries): # Test head jmp only block if (son is not None and - son not in loc_key_entries and + son not in heads and son in ircfg.blocks): # jmp only test done previously ret = _remove_to_parent(ircfg, loc_key, son) @@ -510,17 +530,19 @@ def remove_empty_assignblks(ircfg): modified = False for loc_key, block in ircfg.blocks.iteritems(): irs = [] + block_modified = False for assignblk in block: if len(assignblk): irs.append(assignblk) else: - modified = True - ircfg.blocks[loc_key] = IRBlock(loc_key, irs) - + block_modified = True + if block_modified: + new_irblock = IRBlock(loc_key, irs) + ircfg.blocks[loc_key] = new_irblock + modified = True return modified - class SSADefUse(DiGraph): """ Generate DefUse information from SSA transformation @@ -528,33 +550,28 @@ class SSADefUse(DiGraph): """ def add_var_def(self, node, src): - lbl, index, dst = node - index2dst = self._links.setdefault(lbl, {}) - dst2src = index2dst.setdefault(index, {}) - dst2src[dst] = src + index2dst = self._links.setdefault(node.label, {}) + dst2src = index2dst.setdefault(node.index, {}) + dst2src[node.var] = src def add_def_node(self, def_nodes, node, src): - lbl, index, dst = node - if dst.is_id(): - def_nodes[dst] = node + if node.var.is_id(): + def_nodes[node.var] = node def add_use_node(self, use_nodes, node, src): - lbl, index, dst = node sources = set() - if dst.is_mem(): - sources.update(dst.ptr.get_r(mem_read=True)) + if node.var.is_mem(): + sources.update(node.var.ptr.get_r(mem_read=True)) sources.update(src.get_r(mem_read=True)) for source in sources: if not source.is_mem(): use_nodes.setdefault(source, set()).add(node) def get_node_target(self, node): - lbl, index, reg = node - return self._links[lbl][index][reg] + return self._links[node.label][node.index][node.var] def set_node_target(self, node, src): - lbl, index, reg = node - self._links[lbl][index][reg] = src + self._links[node.label][node.index][node.var] = src @classmethod def from_ssa(cls, ssa): @@ -575,7 +592,7 @@ class SSADefUse(DiGraph): continue for index, assignblk in enumerate(block): for dst, src in assignblk.iteritems(): - node = lbl, index, dst + node = AssignblkNode(lbl, index, dst) graph.add_var_def(node, src) graph.add_def_node(def_nodes, node, src) graph.add_use_node(use_nodes, node, src) @@ -622,73 +639,58 @@ def expr_has_mem(expr): return expr_test_visit(expr, expr_has_mem_test) -def expr_has_call_test(expr, result): - if result: - # Don't analyse if we already found a candidate - return False - if expr.is_op() and expr.op.startswith("call"): - result.add(expr) - return False - return True - - -def expr_has_call(expr): +class PropagateThroughExprId(object): """ - Return True if expr contains at least one "call" operator - @expr: Expr instance + Propagate expressions though ExprId """ - return expr_test_visit(expr, expr_has_call_test) - - -class PropagateExpr(object): - - def assignblk_is_propagation_barrier(self, assignblk): - for dst, src in assignblk.iteritems(): - if expr_has_call(src): - return True - if dst.is_mem(): - return True - return False def has_propagation_barrier(self, assignblks): + """ + Return True if propagation cannot cross the @assignblks + @assignblks: list of AssignBlock to check + """ for assignblk in assignblks: for dst, src in assignblk.iteritems(): - if expr_has_call(src): + if src.is_function_call(): return True if dst.is_mem(): return True return False - def is_mem_written(self, ssa, node, successor): - loc_a, index_a, reg_a = node - loc_b, index_b, reg_b = successor - block_b = ssa.graph.blocks[loc_b] + def is_mem_written(self, ssa, node_a, node_b): + """ + Return True if memory is written at least once between @node_a and + @node_b - nodes_to_do = self.compute_reachable_nodes_from_a_to_b(ssa.graph, loc_a, loc_b) + @node: AssignblkNode representing the start position + @successor: AssignblkNode representing the end position + """ + block_b = ssa.graph.blocks[node_b.label] + nodes_to_do = self.compute_reachable_nodes_from_a_to_b(ssa.graph, node_a.label, node_b.label) - if loc_a == loc_b: + if node_a.label == node_b.label: # src is dst - assert nodes_to_do == set([loc_a]) - if self.has_propagation_barrier(block_b.assignblks[index_a:index_b]): + assert nodes_to_do == set([node_a.label]) + if self.has_propagation_barrier(block_b.assignblks[node_a.index:node_b.index]): return True else: - # Check everyone but loc_a and loc_b - for loc in nodes_to_do - set([loc_a, loc_b]): + # Check everyone but node_a.label and node_b.label + for loc in nodes_to_do - set([node_a.label, node_b.label]): block = ssa.graph.blocks[loc] if self.has_propagation_barrier(block.assignblks): return True - # Check loc_a partially - block_a = ssa.graph.blocks[loc_a] - if self.has_propagation_barrier(block_a.assignblks[index_a:]): + # Check node_a.label partially + block_a = ssa.graph.blocks[node_a.label] + if self.has_propagation_barrier(block_a.assignblks[node_a.index:]): return True - if nodes_to_do.intersection(ssa.graph.successors(loc_b)): - # There is a path from loc_b to loc_b => Check loc_b fully + if nodes_to_do.intersection(ssa.graph.successors(node_b.label)): + # There is a path from node_b.label to node_b.label => Check node_b.label fully if self.has_propagation_barrier(block_b.assignblks): return True else: - # Check loc_b partially - if self.has_propagation_barrier(block_b.assignblks[:index_b]): + # Check node_b.label partially + if self.has_propagation_barrier(block_b.assignblks[:node_b.index]): return True return False @@ -699,45 +701,104 @@ class PropagateExpr(object): def propagation_allowed(self, ssa, to_replace, node_a, node_b): """ - Return True if we can replace @node source into @node_b + Return True if we can replace @node_a source present in @to_replace into + @node_b + + @node_a: AssignblkNode position + @node_b: AssignblkNode position """ - loc_a, index_a, reg_a = node_a - if not expr_has_mem(to_replace[reg_a]): + if not expr_has_mem(to_replace[node_a.var]): return True if self.is_mem_written(ssa, node_a, node_b): return False return True - def propagate(self, ssa, head): + + def get_var_definitions(self, ssa): + """ + Return a dictionary linking variable to its assignment location + @ssa: SSADiGraph instance + """ + ircfg = ssa.graph + def_dct = {} + for node in ircfg.nodes(): + for index, assignblk in enumerate(ircfg.blocks[node]): + for dst, src in assignblk.iteritems(): + if not dst.is_id(): + continue + if dst in ssa.immutable_ids: + continue + assert dst not in def_dct + def_dct[dst] = node, index + return def_dct + + def phi_has_identical_sources(self, ssa, def_dct, var): + """ + If phi operation has identical source values, return it; else None + @ssa: SSADiGraph instance + @def_dct: dictionary linking variable to its assignment location + @var: Phi destination variable + """ + loc_key, index = def_dct[var] + sources = ssa.graph.blocks[loc_key][index][var] + assert sources.is_op('Phi') + sources_values = set() + for src in sources.args: + assert src in def_dct + loc_key, index = def_dct[src] + value = ssa.graph.blocks[loc_key][index][src] + sources_values.add(value) + if len(sources_values) != 1: + return None + return list(sources_values)[0] + + def get_candidates(self, ssa, head, max_expr_depth): + def_dct = self.get_var_definitions(ssa) defuse = SSADefUse.from_ssa(ssa) to_replace = {} node_to_reg = {} for node in defuse.nodes(): - lbl, index, reg = node + if node.var in ssa.immutable_ids: + continue src = defuse.get_node_target(node) - if expr_has_call(src): + if max_expr_depth is not None and len(str(src)) > max_expr_depth: continue - if src.is_op('Phi'): + if src.is_function_call(): + continue + if node.var.is_mem(): continue - if reg.is_mem(): + if src.is_op('Phi'): + ret = self.phi_has_identical_sources(ssa, def_dct, node.var) + if ret: + to_replace[node.var] = ret + node_to_reg[node] = node.var continue - to_replace[reg] = src - node_to_reg[node] = reg + to_replace[node.var] = src + node_to_reg[node] = node.var + return node_to_reg, to_replace, defuse + def propagate(self, ssa, head, max_expr_depth=None): + """ + Do expression propagation + @ssa: SSADiGraph instance + @head: the head location of the graph + @max_expr_depth: the maximum allowed depth of an expression + """ + node_to_reg, to_replace, defuse = self.get_candidates(ssa, head, max_expr_depth) modified = False for node, reg in node_to_reg.iteritems(): for successor in defuse.successors(node): if not self.propagation_allowed(ssa, to_replace, node, successor): continue - loc_a, index_a, reg_a = node - loc_b, index_b, reg_b = successor - block = ssa.graph.blocks[loc_b] + node_a = node + node_b = successor + block = ssa.graph.blocks[node_b.label] - replace = {reg_a: to_replace[reg_a]} + replace = {node_a.var: to_replace[node_a.var]} # Replace assignblks = list(block) - assignblk = block[index_b] + assignblk = block[node_b.index] out = {} for dst, src in assignblk.iteritems(): if src.is_op('Phi'): @@ -745,8 +806,7 @@ class PropagateExpr(object): continue if src.is_mem(): - ptr = src.ptr - ptr = ptr.replace_expr(replace) + ptr = src.ptr.replace_expr(replace) new_src = ExprMem(ptr, src.size) else: new_src = src.replace_expr(replace) @@ -754,8 +814,7 @@ class PropagateExpr(object): if dst.is_id(): new_dst = dst elif dst.is_mem(): - ptr = dst.ptr - ptr = ptr.replace_expr(replace) + ptr = dst.ptr.replace_expr(replace) new_dst = ExprMem(ptr, dst.size) else: new_dst = dst.replace_expr(replace) @@ -765,9 +824,107 @@ class PropagateExpr(object): modified = True out[new_dst] = new_src out = AssignBlock(out, assignblk.instr) - assignblks[index_b] = out + assignblks[node_b.index] = out new_block = IRBlock(block.loc_key, assignblks) ssa.graph.blocks[block.loc_key] = new_block + + return modified + + + +class PropagateExprIntThroughExprId(PropagateThroughExprId): + """ + Propagate ExprInt though ExprId: classic constant propagation + This is a sub family of PropagateThroughExprId. + It reduces leaves in expressions of a program. + """ + + def get_candidates(self, ssa, head, max_expr_depth): + defuse = SSADefUse.from_ssa(ssa) + + to_replace = {} + node_to_reg = {} + for node in defuse.nodes(): + src = defuse.get_node_target(node) + if not src.is_int(): + continue + if src.is_function_call(): + continue + if node.var.is_mem(): + continue + to_replace[node.var] = src + node_to_reg[node] = node.var + return node_to_reg, to_replace, defuse + + def propagation_allowed(self, ssa, to_replace, node_a, node_b): + """ + Propagating ExprInt is always ok + """ + return True + + +class PropagateThroughExprMem(object): + """ + Propagate through ExprMem in very simple cases: + - if no memory write between source and target + - if source does not contain any memory reference + """ + + def propagate(self, ssa, head, max_expr_depth=None): + ircfg = ssa.graph + todo = set() + modified = False + for block in ircfg.blocks.itervalues(): + for i, assignblk in enumerate(block): + for dst, src in assignblk.iteritems(): + if not dst.is_mem(): + continue + if expr_has_mem(src): + continue + todo.add((block.loc_key, i + 1, dst, src)) + ptr = dst.ptr + for size in xrange(8, dst.size, 8): + todo.add((block.loc_key, i + 1, ExprMem(ptr, size), src[:size])) + + while todo: + loc_key, index, mem_dst, mem_src = todo.pop() + block = ircfg.blocks[loc_key] + assignblks = list(block) + block_modified = False + for i in xrange(index, len(block)): + assignblk = block[i] + write_mem = False + assignblk_modified = False + out = dict(assignblk) + out_new = {} + for dst, src in out.iteritems(): + if dst.is_mem(): + write_mem = True + ptr = dst.ptr.replace_expr({mem_dst:mem_src}) + dst = ExprMem(ptr, dst.size) + src = src.replace_expr({mem_dst:mem_src}) + out_new[dst] = src + if out != out_new: + assignblk_modified = True + + if assignblk_modified: + assignblks[i] = AssignBlock(out_new, assignblk.instr) + block_modified = True + if write_mem: + break + else: + # If no memory written, we may propagate to sons + # if son has only parent + for successor in ircfg.successors(loc_key): + predecessors = ircfg.predecessors(successor) + if len(predecessors) != 1: + continue + todo.add((successor, 0, mem_dst, mem_src)) + + if block_modified: + modified = True + new_block = IRBlock(block.loc_key, assignblks) + ircfg.blocks[block.loc_key] = new_block return modified @@ -832,15 +989,15 @@ def check_expr_below_stack(ir_arch_a, expr): return True -def retrieve_stack_accesses(ir_arch_a, ssa): +def retrieve_stack_accesses(ir_arch_a, ircfg): """ Walk the ssa graph and find stack based variables. Return a dictionary linking stack base address to its size/name @ir_arch_a: ira instance - @ssa: SSADiGraph instance + @ircfg: IRCFG instance """ stack_vars = set() - for block in ssa.graph.blocks.itervalues(): + for block in ircfg.blocks.itervalues(): for assignblk in block: for dst, src in assignblk.iteritems(): stack_vars.update(get_stack_accesses(ir_arch_a, dst)) @@ -906,18 +1063,23 @@ def replace_mem_stack_vars(expr, base_to_info): return expr.visit(lambda expr:fix_stack_vars(expr, base_to_info)) -def replace_stack_vars(ir_arch_a, ssa): +def replace_stack_vars(ir_arch_a, ircfg): """ Try to replace stack based memory accesses by variables. + + Hypothesis: the input ircfg must have all it's accesses to stack explicitly + done through the stack register, ie every aliases on those variables is + resolved. + WARNING: may fail @ir_arch_a: ira instance - @ssa: SSADiGraph instance + @ircfg: IRCFG instance """ - base_to_info = retrieve_stack_accesses(ir_arch_a, ssa) + base_to_info = retrieve_stack_accesses(ir_arch_a, ircfg) modified = False - for block in ssa.graph.blocks.itervalues(): + for block in ircfg.blocks.itervalues(): assignblks = [] for assignblk in block: out = {} @@ -932,7 +1094,7 @@ def replace_stack_vars(ir_arch_a, ssa): out = AssignBlock(out, assignblk.instr) assignblks.append(out) new_block = IRBlock(block.loc_key, assignblks) - ssa.graph.blocks[block.loc_key] = new_block + ircfg.blocks[block.loc_key] = new_block return modified @@ -975,7 +1137,7 @@ def load_from_int(ir_arch, bs, is_addr_ro_variable): """ modified = False - for label, block in ir_arch.blocks.iteritems(): + for block in ir_arch.blocks.itervalues(): assignblks = list() for assignblk in block: out = {} diff --git a/miasm2/analysis/simplifier.py b/miasm2/analysis/simplifier.py new file mode 100644 index 00000000..ca8e74fb --- /dev/null +++ b/miasm2/analysis/simplifier.py @@ -0,0 +1,303 @@ +""" +Apply simplification passes to an IR cfg +""" + +import logging +from functools import wraps +from miasm2.analysis.ssa import SSADiGraph +from miasm2.analysis.outofssa import UnSSADiGraph +from miasm2.analysis.data_flow import DiGraphLivenessSSA +from miasm2.expression.simplifications import expr_simp +from miasm2.analysis.data_flow import dead_simp, \ + merge_blocks, remove_empty_assignblks, \ + PropagateExprIntThroughExprId, PropagateThroughExprId, \ + PropagateThroughExprMem, del_unused_edges + + +log = logging.getLogger("simplifier") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARNING) + + +def fix_point(func): + @wraps(func) + def ret_func(self, ircfg, head): + log.debug('[%s]: start', func.func_name) + has_been_modified = False + modified = True + while modified: + modified = func(self, ircfg, head) + has_been_modified |= modified + log.debug( + '[%s]: stop %r', + func.func_name, + has_been_modified + ) + return has_been_modified + return ret_func + + +class IRCFGSimplifier(object): + """ + Simplify an IRCFG + This class applies passes until reaching a fix point + """ + + def __init__(self, ir_arch): + self.ir_arch = ir_arch + self.init_passes() + + def init_passes(self): + """ + Init the array of simplification passes + """ + self.passes = [] + + @fix_point + def simplify(self, ircfg, head): + """ + Apply passes until reaching a fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + modified = False + for simplify_pass in self.passes: + modified |= simplify_pass(ircfg, head) + return modified + + def __call__(self, ircfg, head): + return self.simplify(ircfg, head) + + +class IRCFGSimplifierCommon(IRCFGSimplifier): + """ + Simplify an IRCFG + This class applies following passes until reaching a fix point: + - simplify_ircfg + - do_dead_simp_ircfg + """ + def __init__(self, ir_arch, expr_simp=expr_simp): + self.expr_simp = expr_simp + super(IRCFGSimplifierCommon, self).__init__(ir_arch) + + def init_passes(self): + self.passes = [ + self.simplify_ircfg, + self.do_dead_simp_ircfg, + ] + + @fix_point + def simplify_ircfg(self, ircfg, _head): + """ + Apply self.expr_simp on the @ircfg until reaching fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + """ + modified = ircfg.simplify(self.expr_simp) + return modified + + @fix_point + def do_dead_simp_ircfg(self, ircfg, head): + """ + Apply: + - dead_simp + - remove_empty_assignblks + - merge_blocks + on the @ircfg until reaching fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + modified = dead_simp(self.ir_arch, ircfg) + modified |= remove_empty_assignblks(ircfg) + modified |= merge_blocks(ircfg, set([head])) + return modified + + +class IRCFGSimplifierSSA(IRCFGSimplifierCommon): + """ + Simplify an IRCFG. + The IRCF is first transformed in SSA, then apply transformations passes + and apply out-of-ssa. Final passes of IRcfgSimplifier are applied + + This class apply following pass until reaching a fix point: + - do_propagate_int + - do_propagate_mem + - do_propagate_expr + - do_dead_simp_ssa + """ + + def __init__(self, ir_arch, expr_simp=expr_simp): + super(IRCFGSimplifierSSA, self).__init__(ir_arch, expr_simp) + + self.ir_arch.ssa_var = {} + self.all_ssa_vars = {} + + self.ssa_forbidden_regs = self.get_forbidden_regs() + + self.propag_int = PropagateExprIntThroughExprId() + self.propag_expr = PropagateThroughExprId() + self.propag_mem = PropagateThroughExprMem() + + def get_forbidden_regs(self): + """ + Return a set of immutable register during SSA transformation + """ + regs = set( + [ + self.ir_arch.pc, + self.ir_arch.IRDst, + self.ir_arch.arch.regs.exception_flags + ] + ) + return regs + + def init_passes(self): + """ + Init the array of simplification passes + """ + self.passes = [ + self.simplify_ssa, + self.do_propagate_int, + self.do_propagate_mem, + self.do_propagate_expr, + self.do_dead_simp_ssa, + ] + + def ircfg_to_ssa(self, ircfg, head): + """ + Apply the SSA transformation to @ircfg using it's @head + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + ssa = SSADiGraph(ircfg) + ssa.immutable_ids.update(self.ssa_forbidden_regs) + ssa.ssa_variable_to_expr.update(self.all_ssa_vars) + ssa.transform(head) + self.all_ssa_vars.update(ssa.ssa_variable_to_expr) + self.ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) + return ssa + + def ssa_to_unssa(self, ssa, head): + """ + Apply the out-of-ssa transformation to @ssa using it's @head + + @ssa: SSADiGraph instance + @head: Location instance of the graph head + """ + cfg_liveness = DiGraphLivenessSSA(ssa.graph) + cfg_liveness.init_var_info(self.ir_arch) + cfg_liveness.compute_liveness() + + UnSSADiGraph(ssa, head, cfg_liveness) + return ssa.graph + + @fix_point + def simplify_ssa(self, ssa, _head): + """ + Apply self.expr_simp on the @ssa.graph until reaching fix point + Return True if the graph has been modified + + @ssa: SSADiGraph instance + """ + modified = ssa.graph.simplify(self.expr_simp) + return modified + + @fix_point + def do_propagate_int(self, ssa, head): + """ + Constant propagation in the @ssa graph + @head: Location instance of the graph head + """ + modified = self.propag_int.propagate(ssa, head) + modified |= ssa.graph.simplify(self.expr_simp) + modified |= del_unused_edges(ssa.graph, set([head])) + return modified + + @fix_point + def do_propagate_mem(self, ssa, head): + """ + Propagation of expression based on ExprInt/ExprId in the @ssa graph + @head: Location instance of the graph head + """ + modified = self.propag_mem.propagate(ssa, head) + modified |= ssa.graph.simplify(self.expr_simp) + modified |= del_unused_edges(ssa.graph, set([head])) + return modified + + @fix_point + def do_propagate_expr(self, ssa, head): + """ + Expressions propagation through ExprId in the @ssa graph + @head: Location instance of the graph head + """ + modified = self.propag_expr.propagate(ssa, head) + modified |= ssa.graph.simplify(self.expr_simp) + modified |= del_unused_edges(ssa.graph, set([head])) + return modified + + @fix_point + def do_dead_simp_ssa(self, ssa, head): + """ + Apply: + - dead_simp + - remove_empty_assignblks + - del_unused_edges + - merge_blocks + on the @ircfg until reaching fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + modified = dead_simp(self.ir_arch, ssa.graph) + modified |= remove_empty_assignblks(ssa.graph) + modified |= del_unused_edges(ssa.graph, set([head])) + modified |= merge_blocks(ssa.graph, set([head])) + return modified + + def do_simplify(self, ssa, head): + """ + Apply passes until reaching a fix point + Return True if the graph has been modified + """ + return super(IRCFGSimplifierSSA, self).simplify(ssa, head) + + def do_simplify_loop(self, ssa, head): + """ + Apply do_simplify until reaching a fix point + SSA is updated between each do_simplify + Return True if the graph has been modified + """ + modified = True + while modified: + modified = self.do_simplify(ssa, head) + # Update ssa structs + ssa = self.ircfg_to_ssa(ssa.graph, head) + return ssa + + def simplify(self, ircfg, head): + """ + Apply SSA transformation to @ircfg + Apply passes until reaching a fix point + Apply out-of-ssa transformation + Apply post simplification passes + + Updated simplified IRCFG instance and return it + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + ssa = self.ircfg_to_ssa(ircfg, head) + ssa = self.do_simplify_loop(ssa, head) + ircfg = self.ssa_to_unssa(ssa, head) + ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) + ircfg_simplifier.simplify(ircfg, head) + return ircfg diff --git a/miasm2/analysis/ssa.py b/miasm2/analysis/ssa.py index ea3189eb..501cdd5e 100644 --- a/miasm2/analysis/ssa.py +++ b/miasm2/analysis/ssa.py @@ -1,9 +1,46 @@ from collections import deque -from miasm2.expression.expression import ExprId, ExprAssign, ExprOp, get_expr_ids +from miasm2.expression.expression import ExprId, ExprAssign, ExprOp, \ + ExprLoc, get_expr_ids from miasm2.ir.ir import AssignBlock, IRBlock +def sanitize_graph_head(ircfg, head): + """ + In multiple algorithm, the @head of the ircfg may not have predecessors. + The function transform the @ircfg in order to ensure this property + @ircfg: IRCFG instance + @head: the location of the graph's head + """ + + if not ircfg.predecessors(head): + return + original_edges = ircfg.predecessors(head) + sub_head = ircfg.loc_db.add_location() + + # Duplicate graph, replacing references to head by sub_head + replaced_expr = { + ExprLoc(head, ircfg.IRDst.size): + ExprLoc(sub_head, ircfg.IRDst.size) + } + ircfg.simplify( + lambda expr:expr.replace_expr(replaced_expr) + ) + # Duplicate head block + ircfg.add_irblock(IRBlock(sub_head, list(ircfg.blocks[head]))) + + # Remove original head block + ircfg.del_node(head) + + for src in original_edges: + ircfg.add_edge(src, sub_head) + + # Create new head, jumping to sub_head + assignblk = AssignBlock({ircfg.IRDst:ExprLoc(sub_head, ircfg.IRDst.size)}) + new_irblock = IRBlock(head, [assignblk]) + ircfg.add_irblock(new_irblock) + + class SSA(object): """ Generic class for static single assignment (SSA) transformation @@ -366,6 +403,7 @@ class SSADiGraph(SSA): def transform(self, head): """Transforms into SSA""" + sanitize_graph_head(self.graph, head) self._init_variable_defs(head) self._place_phi(head) self._rename(head) @@ -595,10 +633,11 @@ class SSADiGraph(SSA): # Replace non modified node used in phi with new variable self.ircfg.simplify(lambda expr:expr.replace_expr(var_to_newname)) - irblock = self.ircfg.blocks[head] - assignblks = list(irblock) - assignblks[0:0] = [AssignBlock(newname_to_var, assignblks[0].instr)] - self.ircfg.blocks[head] = IRBlock(head, assignblks) + if newname_to_var: + irblock = self.ircfg.blocks[head] + assignblks = list(irblock) + assignblks[0:0] = [AssignBlock(newname_to_var, assignblks[0].instr)] + self.ircfg.blocks[head] = IRBlock(head, assignblks) # Updt structure for loc_key in self._phinodes: @@ -612,7 +651,6 @@ class SSADiGraph(SSA): for newname, var in newname_to_var.iteritems(): self.ssa_to_location[newname] = head, 0 self.ssa_variable_to_expr[newname] = var - self.immutable_ids.add(newname) self.expressions[newname] = var @@ -718,7 +756,6 @@ class UnSSADiGraph(object): self.insert_parallel_copy() self.replace_merge_sets() self.remove_assign_eq() - remove_empty_assignblks(self.ssa.graph) def insert_parallel_copy(self): """ diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index e7054b51..c50748e4 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -1694,10 +1694,12 @@ armop("sxtb", [bs('01101010'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_ armop("sxth", [bs('01101011'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) armop("rev", [bs('01101011'), bs('1111'), rd, bs('1111'), bs('0011'), rm]) +armop("rev16", [bs('01101011'), bs('1111'), rd, bs('1111'), bs('1011'), rm]) armop("pld", [bs8(0xF5), bs_addi, bs_rw, bs('01'), mem_rn_imm, bs('1111'), imm12_off]) armop("isb", [bs8(0xF5), bs8(0x7F), bs8(0xF0), bs8(0x6F)]) +armop("nop", [bs8(0xE3), bs8(0x20), bs8(0xF0), bs8(0)]) class arm_widthm1(arm_imm, m_arg): def decode(self, v): diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 3e017d1f..782a99fb 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -1493,6 +1493,7 @@ mnemo_condm0 = {'add': add, 'mla': mla, 'ldr': ldr, 'ldrd': ldrd, + 'ldrsb': ldrsb, 'str': l_str, 'strd': l_strd, 'b': b, @@ -1549,7 +1550,6 @@ mnemo_condm1 = {'adds': add, 'blx': blx, 'ldrb': ldrb, - 'ldrsb': ldrsb, 'ldsb': ldrsb, 'strb': strb, } diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py index 84590c75..ef1095e2 100644 --- a/miasm2/arch/x86/regs.py +++ b/miasm2/arch/x86/regs.py @@ -235,9 +235,7 @@ reg_mm5 = 'MM5' reg_mm6 = 'MM6' reg_mm7 = 'MM7' - -reg_tsc1 = "tsc1" -reg_tsc2 = "tsc2" +reg_tsc = "tsc" reg_float_c0 = 'float_c0' reg_float_c1 = 'float_c1' @@ -321,8 +319,7 @@ DS = ExprId(reg_ds, size=16) FS = ExprId(reg_fs, size=16) GS = ExprId(reg_gs, size=16) -tsc1 = ExprId(reg_tsc1, size=32) -tsc2 = ExprId(reg_tsc2, size=32) +tsc = ExprId(reg_tsc, size=64) float_c0 = ExprId(reg_float_c0, size=1) float_c1 = ExprId(reg_float_c1, size=1) @@ -388,7 +385,7 @@ all_regs_ids = [ zf, nf, pf, of, cf, af, df, tf, i_f, iopl, nt, rf, vm, ac, vif, vip, i_d, float_control, float_eip, float_cs, float_address, float_ds, - tsc1, tsc2, + tsc, ES, CS, SS, DS, FS, GS, float_st0, float_st1, float_st2, float_st3, float_st4, float_st5, float_st6, float_st7, @@ -411,7 +408,7 @@ all_regs_ids_no_alias = [ zf, nf, pf, of, cf, af, df, tf, i_f, iopl, nt, rf, vm, ac, vif, vip, i_d, float_control, float_eip, float_cs, float_address, float_ds, - tsc1, tsc2, + tsc, ES, CS, SS, DS, FS, GS, float_st0, float_st1, float_st2, float_st3, float_st4, float_st5, float_st6, float_st7, diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index b2ef5a43..d03a7cd4 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3040,12 +3040,9 @@ def hlt(_, instr): def rdtsc(_, instr): e = [] - e.append(m2_expr.ExprAssign(tsc1, tsc1 + m2_expr.ExprInt(1, 32))) - e.append(m2_expr.ExprAssign(tsc2, tsc2 + m2_expr.ExprCond(tsc1 - tsc1.mask, - m2_expr.ExprInt(0, 32), - m2_expr.ExprInt(1, 32)))) - e.append(m2_expr.ExprAssign(mRAX[32], tsc1)) - e.append(m2_expr.ExprAssign(mRDX[32], tsc2)) + e.append(m2_expr.ExprAssign(tsc, tsc + m2_expr.ExprInt(1, 64))) + e.append(m2_expr.ExprAssign(mRAX[32], tsc[:32])) + e.append(m2_expr.ExprAssign(mRDX[32], tsc[32:])) return e, [] diff --git a/miasm2/jitter/arch/JitCore_x86.c b/miasm2/jitter/arch/JitCore_x86.c index fa47b324..a13b6881 100644 --- a/miasm2/jitter/arch/JitCore_x86.c +++ b/miasm2/jitter/arch/JitCore_x86.c @@ -74,8 +74,7 @@ reg_dict gpreg_dict[] = { {.name = "XMM14", .offset = offsetof(vm_cpu_t, XMM14), .size = 128}, {.name = "XMM15", .offset = offsetof(vm_cpu_t, XMM15), .size = 128}, - {.name = "tsc1", .offset = offsetof(vm_cpu_t, tsc1), .size = 32}, - {.name = "tsc2", .offset = offsetof(vm_cpu_t, tsc2), .size = 32}, + {.name = "tsc", .offset = offsetof(vm_cpu_t, tsc), .size = 64}, {.name = "exception_flags", .offset = offsetof(vm_cpu_t, exception_flags), .size = 32}, {.name = "interrupt_num", .offset = offsetof(vm_cpu_t, interrupt_num), .size = 32}, @@ -156,8 +155,7 @@ PyObject* cpu_get_gpreg(JitCpu* self) get_reg_bn(XMM14, 128); get_reg_bn(XMM15, 128); - get_reg(tsc1); - get_reg(tsc2); + get_reg(tsc); return dict; } @@ -266,8 +264,7 @@ PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) PyObject * cpu_init_regs(JitCpu* self) { memset(self->cpu, 0, sizeof(vm_cpu_t)); - ((vm_cpu_t*)self->cpu)->tsc1 = 0x22222222; - ((vm_cpu_t*)self->cpu)->tsc2 = 0x11111111; + ((vm_cpu_t*)self->cpu)->tsc = 0x1122334455667788ULL; ((vm_cpu_t*)self->cpu)->i_f = 1; Py_INCREF(Py_None); return Py_None; @@ -662,8 +659,7 @@ getset_reg_bn(XMM13, 128); getset_reg_bn(XMM14, 128); getset_reg_bn(XMM15, 128); -getset_reg_u32(tsc1); -getset_reg_u32(tsc2); +getset_reg_u64(tsc); getset_reg_u32(exception_flags); getset_reg_u32(interrupt_num); @@ -754,8 +750,7 @@ PyObject* get_gpreg_offset_all(void) get_reg_off(XMM14); get_reg_off(XMM15); - get_reg_off(tsc1); - get_reg_off(tsc2); + get_reg_off(tsc); get_reg_off(interrupt_num); get_reg_off(exception_flags); @@ -859,8 +854,7 @@ static PyGetSetDef JitCpu_getseters[] = { {"XMM14", (getter)JitCpu_get_XMM14, (setter)JitCpu_set_XMM14, "XMM14", NULL}, {"XMM15", (getter)JitCpu_get_XMM15, (setter)JitCpu_set_XMM15, "XMM15", NULL}, - {"tsc1", (getter)JitCpu_get_tsc1, (setter)JitCpu_set_tsc1, "tsc1", NULL}, - {"tsc2", (getter)JitCpu_get_tsc2, (setter)JitCpu_set_tsc2, "tsc2", NULL}, + {"tsc", (getter)JitCpu_get_tsc, (setter)JitCpu_set_tsc, "tsc", NULL}, {"exception_flags", (getter)JitCpu_get_exception_flags, (setter)JitCpu_set_exception_flags, "exception_flags", NULL}, {"interrupt_num", (getter)JitCpu_get_interrupt_num, (setter)JitCpu_set_interrupt_num, "interrupt_num", NULL}, diff --git a/miasm2/jitter/arch/JitCore_x86.h b/miasm2/jitter/arch/JitCore_x86.h index 6d86d6b8..27d94d7c 100644 --- a/miasm2/jitter/arch/JitCore_x86.h +++ b/miasm2/jitter/arch/JitCore_x86.h @@ -80,8 +80,7 @@ typedef struct { unsigned int reg_float_ds; - uint64_t tsc1; - uint64_t tsc2; + uint64_t tsc; uint16_t ES; diff --git a/test/analysis/unssa.py b/test/analysis/unssa.py index ae9566ee..a796f3b6 100644 --- a/test/analysis/unssa.py +++ b/test/analysis/unssa.py @@ -1,14 +1,10 @@ """ Test cases for dead code elimination""" -from pdb import pm -from pprint import pprint as pp from miasm2.expression.expression import ExprId, ExprInt, ExprAssign, ExprMem, \ - ExprCond, ExprOp, ExprLoc + ExprCond, ExprLoc from miasm2.core.locationdb import LocationDB -from miasm2.analysis.data_flow import DiGraphLivenessSSA, dead_simp, PropagateExpr +from miasm2.analysis.simplifier import IRCFGSimplifierSSA from miasm2.ir.analysis import ira from miasm2.ir.ir import IRCFG, IRBlock, AssignBlock -from miasm2.analysis.ssa import SSADiGraph -from miasm2.analysis.outofssa import UnSSADiGraph loc_db = LocationDB() @@ -595,6 +591,18 @@ def add_out_reg_end(ir_arch_a, ircfg_a): ir_arch_a = IRAOutRegs(loc_db) +class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA): + def get_forbidden_regs(self): + """ + Return a set of immutable register during SSA transformation + """ + regs = set( + [ + self.ir_arch.pc, + self.ir_arch.IRDst, + ] + ) + return regs for test_nb, ircfg in enumerate( [ @@ -620,34 +628,8 @@ for test_nb, ircfg in enumerate( # SSA head = LBL0 - ssa = SSADiGraph(ircfg) - ssa.transform(head) - - ir_arch_a.ssa_var = ssa.ssa_variable_to_expr - dead_simp(ir_arch_a, ssa.graph) - - open("ssa_%d.dot" % test_nb, "wb").write(ssa.graph.dot()) - - - - # Un SSA - ir_arch_a.ssa_var = ssa.ssa_variable_to_expr - - propagate_expr = PropagateExpr() - modified = True - while modified: - modified = False - modified |= propagate_expr.propagate(ssa, head) - - open('tmp_%d.dot' % test_nb, 'w').write(ssa.graph.dot()) - - cfg_liveness = DiGraphLivenessSSA(ssa.graph) - cfg_liveness.init_var_info(ir_arch_a) - cfg_liveness.compute_liveness() - - open('liveness_%d.dot' % test_nb, 'w').write(cfg_liveness.dot()) - - unssa = UnSSADiGraph(ssa, head, cfg_liveness) - open('final_%d.dot' % test_nb, 'w').write(unssa.ssa.graph.dot()) + simplifier = CustomIRCFGSimplifierSSA(ir_arch_a) + ircfg = simplifier(ircfg, head) + open('final_%d.dot' % test_nb, 'w').write(ircfg.dot()) # XXX TODO: add real regression test diff --git a/test/ir/reduce_graph.py b/test/ir/reduce_graph.py index 29a3501f..75ff3410 100644 --- a/test/ir/reduce_graph.py +++ b/test/ir/reduce_graph.py @@ -321,17 +321,27 @@ G4_RES_IRB0 = gen_irblock( LBL0, [ [ + ExprAssign(IRDst, ExprLoc(LBL1, 32)), + ] + ] +) + + +G4_RES_IRB1 = gen_irblock( + LBL1, + [ + [ ExprAssign(A, C), ], [ ExprAssign(D, A), - ExprAssign(IRDst, ExprLoc(LBL0, 32)), + ExprAssign(IRDst, ExprLoc(LBL1, 32)), ] ] ) -for irb in [G4_RES_IRB0 ]: +for irb in [G4_RES_IRB0, G4_RES_IRB1 ]: G4_RES.add_irblock(irb) @@ -389,15 +399,25 @@ for irb in [G5_IRB0, G5_IRB1, G5_IRB2, G5_IRB3]: # Result G5_RES = IRA.new_ircfg() + G5_RES_IRB0 = gen_irblock( LBL0, [ [ + ExprAssign(IRDst, ExprLoc(LBL1, 32)), + ] + ] +) + +G5_RES_IRB1 = gen_irblock( + LBL1, + [ + [ ExprAssign(A, C), ], [ ExprAssign(D, A), - ExprAssign(IRDst, ExprCond(C, ExprLoc(LBL0, 32), ExprLoc(LBL3, 32))), + ExprAssign(IRDst, ExprCond(C, ExprLoc(LBL1, 32), ExprLoc(LBL3, 32))), ] ] ) @@ -413,7 +433,7 @@ G5_RES_IRB3 = gen_irblock( ] ) -for irb in [G5_RES_IRB0, G5_RES_IRB3 ]: +for irb in [G5_RES_IRB0, G5_RES_IRB1, G5_RES_IRB3 ]: G5_RES.add_irblock(irb) @@ -605,14 +625,23 @@ G8_RES_IRB0 = gen_irblock( LBL0, [ [ + ExprAssign(IRDst, ExprLoc(LBL1, 32)), + ] + ] +) + +G8_RES_IRB1 = gen_irblock( + LBL1, + [ + [ ExprAssign(A, C), - ExprAssign(IRDst, ExprLoc(LBL0, 32)), + ExprAssign(IRDst, ExprLoc(LBL1, 32)), ] ] ) -for irb in [G8_RES_IRB0]: +for irb in [G8_RES_IRB0, G8_RES_IRB1]: G8_RES.add_irblock(irb) |