diff options
30 files changed, 973 insertions, 359 deletions
diff --git a/.appveyor.yml b/.appveyor.yml index 5a6b3b38..fb565570 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -33,7 +33,7 @@ build_script: test_script: - cmd: cd c:\projects\miasm\test - - "%PYTHON%\\python.exe test_all.py" + - "%PYTHON%\\python.exe -W error test_all.py" after_test: - cmd: chdir diff --git a/.travis.yml b/.travis.yml index f5c55368..ee4f0ed5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,9 +26,7 @@ before_script: - pip install -r optional_requirements.txt # codespell - "pip install codespell && git ls-files | xargs codespell --ignore-words=.codespell_ignore 2>/dev/null" -# turn deprecation warning into RuntimeError -- "find . -name '*.py' | xargs sed -i 's/warnings\\.warn(/raise RuntimeError(/g'" # install - python setup.py build build_ext - python setup.py install -script: cd test && python test_all.py $MIASM_TEST_EXTRA_ARG && git ls-files -o --exclude-standard +script: cd test && python -W error test_all.py $MIASM_TEST_EXTRA_ARG && git ls-files -o --exclude-standard diff --git a/example/disasm/full.py b/example/disasm/full.py index cc7f9be8..42d50216 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -8,9 +8,12 @@ from miasm2.core.interval import interval from miasm2.analysis.machine import Machine from miasm2.analysis.data_flow import dead_simp, DiGraphDefUse, \ ReachingDefinitions, merge_blocks, remove_empty_assignblks, \ - PropagateExpr, replace_stack_vars, load_from_int + PropagateExpr, replace_stack_vars, load_from_int, \ + del_unused_edges from miasm2.expression.simplifications import expr_simp -from miasm2.analysis.ssa import SSADiGraph, UnSSADiGraph, DiGraphLivenessSSA +from miasm2.analysis.ssa import SSADiGraph +from miasm2.analysis.outofssa import UnSSADiGraph +from miasm2.analysis.data_flow import DiGraphLivenessSSA from miasm2.ir.ir import AssignBlock, IRBlock @@ -45,9 +48,9 @@ parser.add_argument('-l', "--dontdis-retcall", action="store_true", help="If set, disassemble only call destinations") parser.add_argument('-s', "--simplify", action="count", help="Apply simplifications rules (liveness, graph simplification, ...)") -parser.add_argument('-o', "--shiftoffset", default=0, +parser.add_argument("--base-address", default=0, type=lambda x: int(x, 0), - help="Shift input binary by an offset") + help="Base address of the input binary") parser.add_argument('-a', "--try-disasm-all", action="store_true", help="Try to disassemble the whole binary") parser.add_argument('-i', "--image", action="store_true", @@ -79,10 +82,10 @@ if args.verbose: log.info('Load binary') if args.rawbinary: cont = Container.fallback_container(open(args.filename, "rb").read(), - vm=None, addr=args.shiftoffset) + vm=None, addr=args.base_address) else: with open(args.filename, "rb") as fdesc: - cont = Container.from_stream(fdesc, addr=args.shiftoffset) + cont = Container.from_stream(fdesc, addr=args.base_address) default_addr = cont.entry_point bs = cont.bin_stream @@ -395,6 +398,9 @@ if args.propagexpr: if args.verbose > 3: open('tmp_after_%d.dot' % index, 'w').write(ircfg_a.dot()) simp_modified |= remove_empty_assignblks(ircfg_a) + simp_modified |= del_unused_edges(ircfg_a, heads) + simp_modified |= merge_blocks(ircfg_a, heads) + if args.loadint: simp_modified |= load_from_int(ircfg_a, bs, is_addr_ro_variable) modified |= simp_modified diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py index 1c883214..61bc747f 100644 --- a/example/ida/ctype_propagation.py +++ b/example/ida/ctype_propagation.py @@ -156,13 +156,10 @@ class SymbExecCTypeFix(SymbExecCType): def __init__(self, ir_arch, symbols, chandler, cst_propag_link, - func_read=None, func_write=None, sb_expr_simp=expr_simp): super(SymbExecCTypeFix, self).__init__(ir_arch, symbols, chandler, - func_read=func_read, - func_write=func_write, sb_expr_simp=expr_simp) self.cst_propag_link = cst_propag_link diff --git a/example/jitter/trace.py b/example/jitter/trace.py index aeb4c775..e1683450 100644 --- a/example/jitter/trace.py +++ b/example/jitter/trace.py @@ -24,14 +24,14 @@ def instr_hook(jitter): class ESETrackMemory(EmulatedSymbExec): """Emulated symb exec with memory access tracking""" - def _func_read(self, expr_mem): - value = super(ESETrackMemory, self)._func_read(expr_mem) + def mem_read(self, expr_mem): + value = super(ESETrackMemory, self).mem_read(expr_mem) print "Read %s: %s" % (expr_mem, value) return value - def _func_write(self, symb_exec, dest, data): + def mem_write(self, dest, data): print "Write %s: %s" % (dest, data) - return super(ESETrackMemory, self)._func_write(symb_exec, dest, data) + return super(ESETrackMemory, self).mem_write(dest, data) # Parse arguments parser = Sandbox_Linux_arml.parser(description="Tracer") diff --git a/miasm2/analysis/binary.py b/miasm2/analysis/binary.py index 90d71369..93bd74b2 100644 --- a/miasm2/analysis/binary.py +++ b/miasm2/analysis/binary.py @@ -68,7 +68,7 @@ class Container(object): """Instantiate a container and parse the binary @stream: stream to use as binary @vm: (optional) VmMngr instance to link with the executable - @addr: (optional) Shift to apply before parsing the binary. If set, + @addr: (optional) Base address of the parsed binary. If set, force the unknown format """ return Container.from_string(stream.read(), *args, **kwargs) @@ -215,7 +215,7 @@ class ContainerUnknown(Container): "Container abstraction for unknown format" def parse(self, data, vm=None, addr=0, **kwargs): - self._bin_stream = bin_stream_str(data, shift=addr) + self._bin_stream = bin_stream_str(data, base_address=addr) if vm is not None: vm.add_memory_page(addr, PAGE_READ, diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index 799b17d0..4bf64e25 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -4,9 +4,12 @@ from collections import namedtuple from miasm2.core.graph import DiGraph from miasm2.ir.ir import AssignBlock, IRBlock from miasm2.expression.expression import ExprLoc, ExprMem, ExprId, ExprInt,\ - ExprAssign + ExprAssign, ExprOp from miasm2.expression.simplifications import expr_simp from miasm2.core.interval import interval +from miasm2.expression.expression_helper import possible_values +from miasm2.analysis.ssa import get_phi_sources_parent_block, \ + irblock_has_phi class ReachingDefinitions(dict): """ @@ -1212,3 +1215,201 @@ class DiGraphLivenessIRA(DiGraphLiveness): var_out = ir_arch_a.get_out_regs(irblock) irblock_liveness = self.blocks[node] irblock_liveness.infos[-1].var_out = var_out + + +def discard_phi_sources(ircfg, deleted_vars): + """ + Remove phi sources in @ircfg belonging to @deleted_vars set + @ircfg: IRCFG instance in ssa form + @deleted_vars: unused phi sources + """ + for block in ircfg.blocks.values(): + if not block.assignblks: + continue + assignblk = block[0] + todo = {} + modified = False + for dst, src in assignblk.iteritems(): + if not src.is_op('Phi'): + todo[dst] = src + continue + srcs = set(expr for expr in src.args if expr not in deleted_vars) + assert(srcs) + if len(srcs) > 1: + todo[dst] = srcs + continue + todo[dst] = srcs.pop() + modified = True + if not modified: + continue + assignblks = list(block) + assignblk = dict(assignblk) + assignblk.update(todo) + assignblk = AssignBlock(assignblk, assignblks[0].instr) + assignblks[0] = assignblk + new_irblock = IRBlock(block.loc_key, assignblks) + ircfg.blocks[block.loc_key] = new_irblock + return True + + +def get_unreachable_nodes(ircfg, edges_to_del, heads): + """ + Return the unreachable nodes starting from heads and the associated edges to + be deleted. + + @ircfg: IRCFG instance + @edges_to_del: edges already marked as deleted + heads: locations of graph heads + """ + todo = set(heads) + visited_nodes = set() + new_edges_to_del = set() + while todo: + node = todo.pop() + if node in visited_nodes: + continue + visited_nodes.add(node) + for successor in ircfg.successors(node): + if (node, successor) not in edges_to_del: + todo.add(successor) + all_nodes = set(ircfg.nodes()) + nodes_to_del = all_nodes.difference(visited_nodes) + for node in nodes_to_del: + for successor in ircfg.successors(node): + if successor not in nodes_to_del: + # Frontier: link from a deleted node to a living node + new_edges_to_del.add((node, successor)) + return nodes_to_del, new_edges_to_del + + +def update_phi_with_deleted_edges(ircfg, edges_to_del): + """ + Update phi which have a source present in @edges_to_del + @ssa: IRCFG instance in ssa form + @edges_to_del: edges to delete + """ + + modified = False + blocks = dict(ircfg.blocks) + for loc_src, loc_dst in edges_to_del: + block = ircfg.blocks[loc_dst] + assert block.assignblks + assignblks = list(block) + assignblk = assignblks[0] + out = {} + for dst, phi_sources in assignblk.iteritems(): + if not phi_sources.is_op('Phi'): + out = assignblk + break + var_to_parents = get_phi_sources_parent_block( + ircfg, + loc_dst, + phi_sources.args + ) + to_keep = set(phi_sources.args) + for src in phi_sources.args: + parents = var_to_parents[src] + if loc_src in parents: + to_keep.discard(src) + modified = True + assert to_keep + if len(to_keep) == 1: + out[dst] = to_keep.pop() + else: + out[dst] = ExprOp('Phi', *to_keep) + assignblk = AssignBlock(out, assignblks[0].instr) + assignblks[0] = assignblk + new_irblock = IRBlock(loc_dst, assignblks) + blocks[block.loc_key] = new_irblock + + for loc_key, block in blocks.iteritems(): + ircfg.blocks[loc_key] = block + return modified + + +def del_unused_edges(ircfg, heads): + """ + Delete non accessible edges in the @ircfg graph. + @ircfg: IRCFG instance in ssa form + @heads: location of the heads of the graph + """ + + deleted_vars = set() + modified = False + edges_to_del_1 = set() + for node in ircfg.nodes(): + successors = set(ircfg.successors(node)) + block = ircfg.blocks[node] + dst = block.dst + possible_dsts = set(solution.value for solution in possible_values(dst)) + if not all(dst.is_loc() for dst in possible_dsts): + continue + possible_dsts = set(dst.loc_key for dst in possible_dsts) + if len(possible_dsts) == len(successors): + continue + dsts_to_del = successors.difference(possible_dsts) + for dst in dsts_to_del: + edges_to_del_1.add((node, dst)) + + # Remove edges and update phi accordingly + # Two cases here: + # - edge is directly linked to a phi node + # - edge is indirect linked to a phi node + nodes_to_del, edges_to_del_2 = get_unreachable_nodes(ircfg, edges_to_del_1, heads) + modified |= update_phi_with_deleted_edges(ircfg, edges_to_del_1.union(edges_to_del_2)) + + for src, dst in edges_to_del_1.union(edges_to_del_2): + ircfg.del_edge(src, dst) + for node in nodes_to_del: + block = ircfg.blocks[node] + ircfg.del_node(node) + for assignblock in block: + for dst in assignblock: + deleted_vars.add(dst) + + if deleted_vars: + modified |= discard_phi_sources(ircfg, deleted_vars) + + return modified + + +class DiGraphLivenessSSA(DiGraphLivenessIRA): + """ + DiGraph representing variable liveness is a SSA graph + """ + def __init__(self, ircfg): + super(DiGraphLivenessSSA, self).__init__(ircfg) + + self.loc_key_to_phi_parents = {} + for irblock in self.blocks.values(): + if not irblock_has_phi(irblock): + continue + out = {} + for sources in irblock[0].itervalues(): + var_to_parents = get_phi_sources_parent_block(self, irblock.loc_key, sources.args) + for var, var_parents in var_to_parents.iteritems(): + out.setdefault(var, set()).update(var_parents) + self.loc_key_to_phi_parents[irblock.loc_key] = out + + def back_propagate_to_parent(self, todo, node, parent): + parent_block = self.blocks[parent] + cur_block = self.blocks[node] + irblock = self.ircfg.blocks[node] + if cur_block.infos[0].var_in == parent_block.infos[-1].var_out: + return + var_info = cur_block.infos[0].var_in.union(parent_block.infos[-1].var_out) + + if irblock_has_phi(irblock): + # Remove phi special case + out = set() + phi_sources = self.loc_key_to_phi_parents[irblock.loc_key] + for var in var_info: + if var not in phi_sources: + out.add(var) + continue + if parent in phi_sources[var]: + out.add(var) + var_info = out + + parent_block.infos[-1].var_out = var_info + todo.add(parent) diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 3a986537..5eb924d7 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -104,25 +104,25 @@ class ESETrackModif(EmulatedSymbExec): self.dse_memory_to_expr = None # function(addr) -> Expr used to # symbolize - def _func_read(self, expr_mem): + def mem_read(self, expr_mem): if not expr_mem.ptr.is_int(): return expr_mem dst_addr = int(expr_mem.ptr) - if not self.dse_memory_range: - # Trivial case (optimization) - return super(ESETrackModif, self)._func_read(expr_mem) - # Split access in atomic accesses out = [] for addr in xrange(dst_addr, dst_addr + (expr_mem.size / 8)): if addr in self.dse_memory_range: # Symbolize memory access out.append(self.dse_memory_to_expr(addr)) + continue + atomic_access = ExprMem(ExprInt(addr, expr_mem.ptr.size), 8) + if atomic_access in self.symbols: + out.append( super(EmulatedSymbExec, self).mem_read(atomic_access)) else: # Get concrete value atomic_access = ExprMem(ExprInt(addr, expr_mem.ptr.size), 8) - out.append(super(ESETrackModif, self)._func_read(atomic_access)) + out.append(super(ESETrackModif, self).mem_read(atomic_access)) if len(out) == 1: # Trivial case (optimization) @@ -131,6 +131,10 @@ class ESETrackModif(EmulatedSymbExec): # Simplify for constant merging (ex: {ExprInt(1, 8), ExprInt(2, 8)}) return self.expr_simp(ExprCompose(*out)) + def mem_write(self, expr, data): + # Call Symbolic mem_write (avoid side effects on vm) + return super(EmulatedSymbExec, self).mem_write(expr, data) + def reset_modified(self): """Reset modified expression tracker""" self.modified_expr.clear() @@ -140,6 +144,14 @@ class ESETrackModif(EmulatedSymbExec): self.modified_expr.add(dst) +class ESENoVMSideEffects(EmulatedSymbExec): + """ + Do EmulatedSymbExec without modifying memory + """ + def mem_write(self, expr, data): + return super(EmulatedSymbExec, self).mem_write(expr, data) + + class DSEEngine(object): """Dynamic Symbolic Execution Engine @@ -174,12 +186,10 @@ class DSEEngine(object): self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm, self.ir_arch, {}) self.symb.enable_emulated_simplifications() - self.symb_concrete = EmulatedSymbExec( + self.symb_concrete = ESENoVMSideEffects( self.jitter.cpu, self.jitter.vm, self.ir_arch, {} ) - ### Avoid side effects on jitter while using 'symb_concrete' - self.symb_concrete.func_write = None ## Update registers value self.symb.symbols[self.ir_arch.IRDst] = ExprInt( @@ -187,9 +197,6 @@ class DSEEngine(object): self.ir_arch.IRDst.size ) - # Avoid memory write - self.symb.func_write = None - # Activate callback on each instr self.jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) self.jitter.exec_cb = self.callback diff --git a/miasm2/analysis/outofssa.py b/miasm2/analysis/outofssa.py new file mode 100644 index 00000000..6355aeb2 --- /dev/null +++ b/miasm2/analysis/outofssa.py @@ -0,0 +1,411 @@ +from miasm2.expression.expression import ExprId +from miasm2.ir.ir import IRBlock, AssignBlock +from miasm2.analysis.ssa import get_phi_sources_parent_block, \ + irblock_has_phi + + +class Varinfo(object): + """Store liveness information for a variable""" + __slots__ = ["live_index", "loc_key", "index"] + + def __init__(self, live_index, loc_key, index): + self.live_index = live_index + self.loc_key = loc_key + self.index = index + + +class UnSSADiGraph(object): + """ + Implements unssa algorithm + Revisiting Out-of-SSA Translation for Correctness, Code Quality, and + Efficiency + """ + + def __init__(self, ssa, head, cfg_liveness): + self.cfg_liveness = cfg_liveness + self.ssa = ssa + self.head = head + + # Set of created variables + self.copy_vars = set() + # Virtual parallel copies + + # On loc_key's Phi node dst -> set((parent, src)) + self.phi_parent_sources = {} + # On loc_key's Phi node, loc_key -> set(Phi dsts) + self.phi_destinations = {} + # Phi's dst -> new var + self.phi_new_var = {} + # For a new_var representing dst: + # new_var -> set(parents of Phi's src in dst = Phi(src,...)) + self.new_var_to_srcs_parents = {} + # new_var -> set(variables to be coalesced with, named "merge_set") + self.merge_state = {} + + # Launch the algorithm in several steps + self.isolate_phi_nodes_block() + self.init_phis_merge_state() + self.order_ssa_var_dom() + self.aggressive_coalesce_block() + self.insert_parallel_copy() + self.replace_merge_sets() + self.remove_assign_eq() + + def insert_parallel_copy(self): + """ + Naive Out-of-SSA from CSSA (without coalescing for now) + - Replace Phi + - Create room for parallel copies in Phi's parents + """ + ircfg = self.ssa.graph + + for irblock in ircfg.blocks.values(): + if not irblock_has_phi(irblock): + continue + + # Replace Phi with Phi's dst = new_var + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + new_var = self.phi_new_var[dst] + parallel_copies[dst] = new_var + + assignblks = list(irblock) + assignblks[0] = AssignBlock(parallel_copies, irblock[0].instr) + new_irblock = IRBlock(irblock.loc_key, assignblks) + ircfg.blocks[irblock.loc_key] = new_irblock + + # Insert new_var = src in each Phi's parent, at the end of the block + parent_to_parallel_copies = {} + parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in parent_to_parallel_copies.iteritems(): + parent = ircfg.blocks[parent] + assignblks = list(parent) + assignblks.append(AssignBlock(parallel_copies, parent[-1].instr)) + new_irblock = IRBlock(parent.loc_key, assignblks) + ircfg.blocks[parent.loc_key] = new_irblock + + def create_copy_var(self, var): + """ + Generate a new var standing for @var + @var: variable to replace + """ + new_var = ExprId('var%d' % len(self.copy_vars), var.size) + self.copy_vars.add(new_var) + return new_var + + def isolate_phi_nodes_block(self): + """ + Init structures and virtually insert parallel copy before/after each phi + node + """ + ircfg = self.ssa.graph + for irblock in ircfg.blocks.itervalues(): + if not irblock_has_phi(irblock): + continue + for dst, sources in irblock[0].iteritems(): + assert sources.is_op('Phi') + new_var = self.create_copy_var(dst) + self.phi_new_var[dst] = new_var + + var_to_parents = get_phi_sources_parent_block( + self.ssa.graph, + irblock.loc_key, + sources.args + ) + + for src in sources.args: + parents = var_to_parents[src] + self.new_var_to_srcs_parents.setdefault(new_var, set()).update(parents) + for parent in parents: + self.phi_parent_sources.setdefault(dst, set()).add((parent, src)) + + self.phi_destinations[irblock.loc_key] = set(irblock[0]) + + def init_phis_merge_state(self): + """ + Generate trivial coalescing of phi variable and itself + """ + for phi_new_var in self.phi_new_var.itervalues(): + self.merge_state.setdefault(phi_new_var, set([phi_new_var])) + + def order_ssa_var_dom(self): + """Compute dominance order of each ssa variable""" + ircfg = self.ssa.graph + + # compute dominator tree + dominator_tree = ircfg.compute_dominator_tree(self.head) + + # variable -> Varinfo + self.var_to_varinfo = {} + # live_index can later be used to compare dominance of AssignBlocks + live_index = 0 + + # walk in DFS over the dominator tree + for loc_key in dominator_tree.walk_depth_first_forward(self.head): + irblock = ircfg.blocks[loc_key] + + # Create live index for phi new vars + # They do not exist in the graph yet, so index is set to None + if irblock_has_phi(irblock): + for dst in irblock[0]: + if not dst.is_id(): + continue + new_var = self.phi_new_var[dst] + self.var_to_varinfo[new_var] = Varinfo(live_index, loc_key, None) + + live_index += 1 + + # Create live index for remaining assignments + for index, assignblk in enumerate(irblock): + used = False + for dst in assignblk: + if not dst.is_id(): + continue + if dst in self.ssa.immutable_ids: + # Will not be considered by the current algo, ignore it + # (for instance, IRDst) + continue + + assert dst not in self.var_to_varinfo + self.var_to_varinfo[dst] = Varinfo(live_index, loc_key, index) + used = True + if used: + live_index += 1 + + + def ssa_def_dominates(self, node_a, node_b): + """ + Return living index order of @node_a and @node_b + @node_a: Varinfo instance + @node_b: Varinfo instance + """ + ret = self.var_to_varinfo[node_a].live_index <= self.var_to_varinfo[node_b].live_index + return ret + + def merge_set_sort(self, merge_set): + """ + Return a sorted list of (live_index, var) from @merge_set in dominance + order + @merge_set: set of coalescing variables + """ + return sorted( + (self.var_to_varinfo[var].live_index, var) + for var in merge_set + ) + + def ssa_def_is_live_at(self, node_a, node_b, parent): + """ + Return True if @node_a is live during @node_b definition + If @parent is None, this is a liveness test for a post phi variable; + Else, it is a liveness test for a variable source of the phi node + + @node_a: Varinfo instance + @node_b: Varinfo instance + @parent: Optional parent location of the phi source + """ + loc_key_b, index_b = self.var_to_varinfo[node_b].loc_key, self.var_to_varinfo[node_b].index + if parent and index_b is None: + index_b = 0 + if node_a not in self.new_var_to_srcs_parents: + # node_a is not a new var (it is a "classic" var) + # -> use a basic liveness test + liveness_b = self.cfg_liveness.blocks[loc_key_b].infos[index_b] + return node_a in liveness_b.var_out + + for def_loc_key in self.new_var_to_srcs_parents[node_a]: + # Consider node_a as defined at the end of its parents blocks + # and compute liveness check accordingly + + if def_loc_key == parent: + # Same path as node_a definition, so SSA ensure b cannot be live + # on this path (otherwise, a Phi would already happen earlier) + continue + liveness_end_block = self.cfg_liveness.blocks[def_loc_key].infos[-1] + if node_b in liveness_end_block.var_out: + return True + return False + + def merge_nodes_interfere(self, node_a, node_b, parent): + """ + Return True if @node_a and @node_b interfere + @node_a: variable + @node_b: variable + @parent: Optional parent location of the phi source for liveness tests + + Interference check is: is x live at y definition (or reverse) + TODO: add Value-based interference improvement + """ + if self.var_to_varinfo[node_a].live_index == self.var_to_varinfo[node_b].live_index: + # Defined in the same AssignBlock -> interfere + return True + + if self.var_to_varinfo[node_a].live_index < self.var_to_varinfo[node_b].live_index: + return self.ssa_def_is_live_at(node_a, node_b, parent) + return self.ssa_def_is_live_at(node_b, node_a, parent) + + def merge_sets_interfere(self, merge_a, merge_b, parent): + """ + Return True if no variable in @merge_a and @merge_b interferes. + + Implementation of "Algorithm 2: Check intersection in a set of variables" + + @merge_a: a dom ordered list of equivalent variables + @merge_b: a dom ordered list of equivalent variables + @parent: Optional parent location of the phi source for liveness tests + """ + if merge_a == merge_b: + # No need to consider interference if equal + return False + + merge_a_list = self.merge_set_sort(merge_a) + merge_b_list = self.merge_set_sort(merge_b) + dom = [] + while merge_a_list or merge_b_list: + if not merge_a_list: + _, current = merge_b_list.pop(0) + elif not merge_b_list: + _, current = merge_a_list.pop(0) + else: + # compare live_indexes (standing for dominance) + if merge_a_list[-1] < merge_b_list[-1]: + _, current = merge_a_list.pop(0) + else: + _, current = merge_b_list.pop(0) + while dom and not self.ssa_def_dominates(dom[-1], current): + dom.pop() + + # Don't test node in same merge_set + if ( + # Is stack not empty? + dom and + # Trivial non-interference if dom.top() and current come + # from the same merge set + not (dom[-1] in merge_a and current in merge_a) and + not (dom[-1] in merge_b and current in merge_b) and + # Actually test for interference + self.merge_nodes_interfere(current, dom[-1], parent) + ): + return True + dom.append(current) + return False + + def aggressive_coalesce_parallel_copy(self, parallel_copies, parent): + """ + Try to coalesce variables each dst/src couple together from + @parallel_copies + + @parallel_copies: a dictionary representing dst/src parallel + assignments. + @parent: Optional parent location of the phi source for liveness tests + """ + for dst, src in parallel_copies.iteritems(): + dst_merge = self.merge_state.setdefault(dst, set([dst])) + src_merge = self.merge_state.setdefault(src, set([src])) + if not self.merge_sets_interfere(dst_merge, src_merge, parent): + dst_merge.update(src_merge) + for node in dst_merge: + self.merge_state[node] = dst_merge + + def aggressive_coalesce_block(self): + """Try to coalesce phi var with their pre/post variables""" + + ircfg = self.ssa.graph + + # Run coalesce on the post phi parallel copy + for irblock in ircfg.blocks.values(): + if not irblock_has_phi(irblock): + continue + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + parallel_copies[dst] = self.phi_new_var[dst] + self.aggressive_coalesce_parallel_copy(parallel_copies, None) + + # Run coalesce on the pre phi parallel copy + + # Stand for the virtual parallel copies at the end of Phi's block + # parents + parent_to_parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in parent_to_parallel_copies.iteritems(): + self.aggressive_coalesce_parallel_copy(parallel_copies, parent) + + def get_best_merge_set_name(self, merge_set): + """ + For a given @merge_set, prefer an original SSA variable instead of a + created copy. In other case, take a random name. + @merge_set: set of equivalent expressions + """ + if not merge_set: + raise RuntimeError("Merge set should not be empty") + for var in merge_set: + if var not in self.copy_vars: + return var + # Get random name + return var + + + def replace_merge_sets(self): + """ + In the graph, replace all variables from merge state by their + representative variable + """ + replace = {} + merge_sets = set() + + # Elect representative for merge sets + merge_set_to_name = {} + for merge_set in self.merge_state.itervalues(): + frozen_merge_set = frozenset(merge_set) + merge_sets.add(frozen_merge_set) + var_name = self.get_best_merge_set_name(merge_set) + merge_set_to_name[frozen_merge_set] = var_name + + # Generate replacement of variable by their representative + for merge_set in merge_sets: + var_name = merge_set_to_name[merge_set] + merge_set = list(merge_set) + for var in merge_set: + replace[var] = var_name + + self.ssa.graph.simplify(lambda x: x.replace_expr(replace)) + + def remove_phi(self): + """ + Remove phi operators in @ifcfg + @ircfg: IRDiGraph instance + """ + + for irblock in self.ssa.graph.blocks.values(): + assignblks = list(irblock) + out = {} + for dst, src in assignblks[0].iteritems(): + if src.is_op('Phi'): + assert set([dst]) == set(src.args) + continue + out[dst] = src + assignblks[0] = AssignBlock(out, assignblks[0].instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) + + def remove_assign_eq(self): + """ + Remove trivial expressions (a=a) in the current graph + """ + for irblock in self.ssa.graph.blocks.values(): + assignblks = list(irblock) + for i, assignblk in enumerate(assignblks): + out = {} + for dst, src in assignblk.iteritems(): + if dst == src: + continue + out[dst] = src + assignblks[i] = AssignBlock(out, assignblk.instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) diff --git a/miasm2/analysis/ssa.py b/miasm2/analysis/ssa.py index b68eef7c..ea3189eb 100644 --- a/miasm2/analysis/ssa.py +++ b/miasm2/analysis/ssa.py @@ -1,6 +1,5 @@ from collections import deque -from miasm2.analysis.data_flow import DiGraphLivenessIRA, remove_empty_assignblks from miasm2.expression.expression import ExprId, ExprAssign, ExprOp, get_expr_ids from miasm2.ir.ir import AssignBlock, IRBlock @@ -791,7 +790,6 @@ class UnSSADiGraph(object): for src in sources.args: parents = var_to_parents[src] self.new_var_to_srcs_parents.setdefault(new_var, set()).update(parents) - parent_dsts = set((parent, src) for parent in parents) for parent in parents: self.phi_parent_sources.setdefault(dst, set()).add((parent, src)) @@ -1080,47 +1078,3 @@ class UnSSADiGraph(object): out[dst] = src assignblks[i] = AssignBlock(out, assignblk.instr) self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) - - -class DiGraphLivenessSSA(DiGraphLivenessIRA): - """ - DiGraph representing variable liveness is a SSA graph - """ - def __init__(self, ircfg): - super(DiGraphLivenessSSA, self).__init__(ircfg) - - self.loc_key_to_phi_parents = {} - for irblock in self.blocks.values(): - if not irblock_has_phi(irblock): - continue - out = {} - for sources in irblock[0].itervalues(): - var_to_parents = get_phi_sources_parent_block(self, irblock.loc_key, sources.args) - for var, var_parents in var_to_parents.iteritems(): - out.setdefault(var, set()).update(var_parents) - self.loc_key_to_phi_parents[irblock.loc_key] = out - - def back_propagate_to_parent(self, todo, node, parent): - parent_block = self.blocks[parent] - cur_block = self.blocks[node] - irblock = self.ircfg.blocks[node] - if cur_block.infos[0].var_in == parent_block.infos[-1].var_out: - return - var_info = cur_block.infos[0].var_in.union(parent_block.infos[-1].var_out) - - if irblock_has_phi(irblock): - # Remove phi special case - out = set() - phi_sources = self.loc_key_to_phi_parents[irblock.loc_key] - for var in var_info: - if var not in phi_sources: - out.add(var) - continue - if parent in phi_sources[var]: - out.add(var) - var_info = out - - parent_block.infos[-1].var_out = var_info - todo.add(parent) - - diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 7f9d50e6..b625647e 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -470,7 +470,7 @@ class instruction_x86(instruction): return self.name in ['CALL'] def dstflow2label(self, loc_db): - if self.additional_info.g1.value & 6 and self.name in repeat_mn: + if self.additional_info.g1.value & 14 and self.name in repeat_mn: return expr = self.args[0] if not expr.is_int(): @@ -512,7 +512,7 @@ class instruction_x86(instruction): return self.name in ['CALL'] def getdstflow(self, loc_db): - if self.additional_info.g1.value & 6 and self.name in repeat_mn: + if self.additional_info.g1.value & 14 and self.name in repeat_mn: addr = int(self.offset) loc_key = loc_db.get_or_create_offset_location(addr) return [ExprLoc(loc_key, self.v_opmode())] @@ -549,7 +549,10 @@ class instruction_x86(instruction): if self.additional_info.g1.value & 2: if getattr(self.additional_info.prefixed, 'default', "") != "\xF2": o = "REPNE %s" % o - if self.additional_info.g1.value & 4: + if self.additional_info.g1.value & 8: + if getattr(self.additional_info.prefixed, 'default', "") != "\xF3": + o = "REP %s" % o + elif self.additional_info.g1.value & 4: if getattr(self.additional_info.prefixed, 'default', "") != "\xF3": o = "REPE %s" % o return o @@ -677,12 +680,15 @@ class mn_x86(cls_mn): if prefix == "LOCK": pref |= 1 text = new_s - elif prefix == "REPNE": + elif prefix == "REPNE" or prefix == "REPNZ": pref |= 2 text = new_s - elif prefix == "REPE": + elif prefix == "REPE" or prefix == "REPZ": pref |= 4 text = new_s + elif prefix == "REP": + pref |= 8 + text = new_s c = super(mn_x86, cls).fromstring(text, loc_db, mode) c.additional_info.g1.value = pref return c @@ -713,7 +719,7 @@ class mn_x86(cls_mn): elif c == '\xf2': pre_dis_info['g1'] = 2 elif c == '\xf3': - pre_dis_info['g1'] = 4 + pre_dis_info['g1'] = 12 elif c == '\x2e': pre_dis_info['g2'] = 1 @@ -728,20 +734,20 @@ class mn_x86(cls_mn): elif c == '\x65': pre_dis_info['g2'] = 6 - elif mode == 64 and c in '@ABCDEFGHIJKLMNO': - x = ord(c) - pre_dis_info['rex_p'] = 1 - pre_dis_info['rex_w'] = (x >> 3) & 1 - pre_dis_info['rex_r'] = (x >> 2) & 1 - pre_dis_info['rex_x'] = (x >> 1) & 1 - pre_dis_info['rex_b'] = (x >> 0) & 1 - offset += 1 - break else: - c = '' break pre_dis_info['prefix'] += c offset += 1 + if mode == 64 and c in '@ABCDEFGHIJKLMNO': + x = ord(c) + pre_dis_info['rex_p'] = 1 + pre_dis_info['rex_w'] = (x >> 3) & 1 + pre_dis_info['rex_r'] = (x >> 2) & 1 + pre_dis_info['rex_x'] = (x >> 1) & 1 + pre_dis_info['rex_b'] = (x >> 0) & 1 + offset += 1 + elif pre_dis_info.get('g1', None) == 12 and c in ['\xa6', '\xa7', '\xae', '\xaf']: + pre_dis_info['g1'] = 4 return pre_dis_info, v, mode, offset, offset - offset_o @classmethod @@ -856,7 +862,7 @@ class mn_x86(cls_mn): if hasattr(self, 'no_xmm_pref'): return None v = "\xf2" + v - if self.g1.value & 4: + if self.g1.value & 12: if hasattr(self, 'no_xmm_pref'): return None v = "\xf3" + v @@ -895,7 +901,7 @@ class mn_x86(cls_mn): out = [] for c, v in candidates: if (hasattr(c, 'no_xmm_pref') and - (c.g1.value & 2 or c.g1.value & 4 or c.opmode)): + (c.g1.value & 2 or c.g1.value & 4 or c.g1.value & 8 or c.opmode)): continue if hasattr(c, "fopmode") and v_opmode(c) != c.fopmode.mode: continue @@ -4607,6 +4613,8 @@ addop("maskmovdqu", [bs8(0x0f), bs8(0xf7), pref_66] + addop("emms", [bs8(0x0f), bs8(0x77)]) +addop("endbr64", [pref_f3, bs8(0x0f), bs8(0x1e), bs8(0xfa)]) +addop("endbr32", [pref_f3, bs8(0x0f), bs8(0x1e), bs8(0xfb)]) mn_x86.bintree = factor_one_bit(mn_x86.bintree) # mn_x86.bintree = factor_fields_all(mn_x86.bintree) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 862240e5..b2ef5a43 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -1321,7 +1321,7 @@ def _tpl_eflags(tmp): def popfw(ir, instr): - tmp = ir.ExprMem(mRSP[instr.mode], 32) + tmp = ir.ExprMem(mRSP[instr.mode], 16) e = _tpl_eflags(tmp) e.append( m2_expr.ExprAssign(mRSP[instr.mode], mRSP[instr.mode] + m2_expr.ExprInt(2, mRSP[instr.mode].size))) @@ -4938,6 +4938,14 @@ def emms(ir, instr): # Implemented as a NOP return [], [] +def endbr64(ir, instr): + # Implemented as a NOP + return [], [] + +def endbr32(ir, instr): + # Implemented as a NOP + return [], [] + # Common value without too many option, 0x1fa0 STMXCSR_VALUE = 0x1fa0 def stmxcsr(ir, instr, dst): @@ -5584,6 +5592,8 @@ mnemo_func = {'mov': mov, "movmskpd": movmskpd, "stmxcsr": stmxcsr, "ldmxcsr": ldmxcsr, + "endbr64": endbr64, + "endbr32": endbr32, } @@ -5669,9 +5679,9 @@ class ir_x86_16(IntermediateRepresentation): # end condition if zf_val is None: c_cond = cond_dec - elif instr.additional_info.g1.value & 2: # REPNE + elif instr.additional_info.g1.value & 2: # REPNE and REPNZ c_cond = cond_dec | zf - elif instr.additional_info.g1.value & 4: # REP + elif instr.additional_info.g1.value & 12: # REPE, REP and REPZ c_cond = cond_dec | (zf ^ m2_expr.ExprInt(1, 1)) # gen while diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py index 8bd59467..af31a52c 100644 --- a/miasm2/core/bin_stream.py +++ b/miasm2/core/bin_stream.py @@ -160,63 +160,73 @@ class bin_stream(object): class bin_stream_str(bin_stream): - def __init__(self, input_str="", offset=0L, shift=0): + def __init__(self, input_str="", offset=0L, base_address=0, shift=None): bin_stream.__init__(self) + if shift is not None: + raise DeprecationWarning("use base_address instead of shift") self.bin = input_str self.offset = offset - self.shift = shift + self.base_address = base_address self.l = len(input_str) def _getbytes(self, start, l=1): - if start + l + self.shift > self.l: + if start + l - self.base_address > self.l: raise IOError("not enough bytes in str") + if start - self.base_address < 0: + raise IOError("Negative offset") - return super(bin_stream_str, self)._getbytes(start + self.shift, l) + return super(bin_stream_str, self)._getbytes(start - self.base_address, l) def readbs(self, l=1): - if self.offset + l + self.shift > self.l: + if self.offset + l - self.base_address > self.l: raise IOError("not enough bytes in str") + if self.offset - self.base_address < 0: + raise IOError("Negative offset") self.offset += l - return self.bin[self.offset - l + self.shift:self.offset + self.shift] + return self.bin[self.offset - l - self.base_address:self.offset - self.base_address] def __str__(self): - out = self.bin[self.offset + self.shift:] + out = self.bin[self.offset - self.base_address:] return out def setoffset(self, val): self.offset = val def getlen(self): - return self.l - (self.offset + self.shift) + return self.l - (self.offset - self.base_address) class bin_stream_file(bin_stream): - def __init__(self, binary, offset=0L, shift=0): + def __init__(self, binary, offset=0L, base_address=0, shift=None): bin_stream.__init__(self) + if shift is not None: + raise DeprecationWarning("use base_address instead of shift") self.bin = binary self.bin.seek(0, 2) - self.shift = shift + self.base_address = base_address self.l = self.bin.tell() self.offset = offset def getoffset(self): - return self.bin.tell() - self.shift + return self.bin.tell() + self.base_address def setoffset(self, val): - self.bin.seek(val + self.shift) + self.bin.seek(val - self.base_address) offset = property(getoffset, setoffset) def readbs(self, l=1): - if self.offset + l + self.shift > self.l: + if self.offset + l - self.base_address > self.l: raise IOError("not enough bytes in file") + if self.offset - self.base_address < 0: + raise IOError("Negative offset") return self.bin.read(l) def __str__(self): return str(self.bin) def getlen(self): - return self.l - (self.offset + self.shift) + return self.l - (self.offset - self.base_address) class bin_stream_container(bin_stream): @@ -236,6 +246,8 @@ class bin_stream_container(bin_stream): def readbs(self, l=1): if self.offset + l > self.l: raise IOError("not enough bytes") + if self.offset < 0: + raise IOError("Negative offset") self.offset += l return self.bin.virt.get(self.offset - l, self.offset) diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py index f61d1e67..e385b044 100644 --- a/miasm2/core/graph.py +++ b/miasm2/core/graph.py @@ -143,6 +143,14 @@ class DiGraph(object): return [x for x in self.heads_iter()] def find_path(self, src, dst, cycles_count=0, done=None): + """ + Searches for paths from @src to @dst + @src: loc_key of basic block from which it should start + @dst: loc_key of basic block where it should stop + @cycles_count: maximum number of times a basic block can be processed + @done: dictionary of already processed loc_keys, it's value is number of times it was processed + @out: list of paths from @src to @dst + """ if done is None: done = {} if dst in done and done[dst] > cycles_count: @@ -157,6 +165,33 @@ class DiGraph(object): if path and path[0] == src: out.append(path + [dst]) return out + + def find_path_from_src(self, src, dst, cycles_count=0, done=None): + """ + This function does the same as function find_path. + But it searches the paths from src to dst, not vice versa like find_path. + This approach might be more efficient in some cases. + @src: loc_key of basic block from which it should start + @dst: loc_key of basic block where it should stop + @cycles_count: maximum number of times a basic block can be processed + @done: dictionary of already processed loc_keys, it's value is number of times it was processed + @out: list of paths from @src to @dst + """ + + if done is None: + done = {} + if src == dst: + return [[src]] + if src in done and done[src] > cycles_count: + return [[]] + out = [] + for node in self.successors(src): + done_n = dict(done) + done_n[src] = done_n.get(src, 0) + 1 + for path in self.find_path_from_src(node, dst, cycles_count, done_n): + if path and path[len(path)-1] == dst: + out.append([src] + path) + return out def nodeid(self, node): """ diff --git a/miasm2/core/locationdb.py b/miasm2/core/locationdb.py index b6e60794..4c5da29e 100644 --- a/miasm2/core/locationdb.py +++ b/miasm2/core/locationdb.py @@ -204,6 +204,22 @@ class LocationDB(object): for name, loc_key in self._name_to_loc_key.iteritems(): assert name in self._loc_key_to_names[loc_key] + def find_free_name(self, name): + """ + If @name is not known in DB, return it + Else append an index to it corresponding to the next unknown name + + @name: string + """ + if self.get_name_location(name) is None: + return name + i = 0 + while True: + new_name = "%s_%d" % (name, i) + if self.get_name_location(new_name) is None: + return new_name + i += 1 + def add_location(self, name=None, offset=None, strict=True): """Add a new location in the locationDB. Returns the corresponding LocKey. If @name is set, also associate a name to this new location. @@ -252,7 +268,10 @@ class LocationDB(object): # Non-strict mode if name_loc_key is not None: known_offset = self.get_offset_location(name_loc_key) - if known_offset != offset: + if known_offset is None: + if offset is not None: + self.set_location_offset(name_loc_key, offset) + elif known_offset != offset: raise ValueError( "Location with name '%s' already have an offset: 0x%x " "(!= 0x%x)" % (name, offset, known_offset) diff --git a/miasm2/expression/simplifications.py b/miasm2/expression/simplifications.py index 8ea9c41f..483331a6 100644 --- a/miasm2/expression/simplifications.py +++ b/miasm2/expression/simplifications.py @@ -55,6 +55,7 @@ class ExpressionSimplifier(object): simplifications_common.simp_zeroext_and_cst_eq_cst, simplifications_common.simp_test_signext_inf, simplifications_common.simp_test_zeroext_inf, + simplifications_common.simp_cond_inf_eq_unsigned_zero, ], @@ -67,6 +68,7 @@ class ExpressionSimplifier(object): m2_expr.ExprCond: [ simplifications_common.simp_cond, simplifications_common.simp_cond_zeroext, + simplifications_common.simp_cond_add, # CC op simplifications_common.simp_cond_flag, simplifications_common.simp_cmp_int_arg, @@ -75,11 +77,13 @@ class ExpressionSimplifier(object): simplifications_common.simp_x_and_cst_eq_cst, simplifications_common.simp_cond_logic_ext, simplifications_common.simp_cond_sign_bit, + simplifications_common.simp_cond_eq_1_0, ], m2_expr.ExprMem: [simplifications_common.simp_mem], } + # Heavy passes PASS_HEAVY = {} @@ -193,8 +197,6 @@ class ExpressionSimplifier(object): expr_simp = ExpressionSimplifier() expr_simp.enable_passes(ExpressionSimplifier.PASS_COMMONS) - - expr_simp_high_to_explicit = ExpressionSimplifier() expr_simp_high_to_explicit.enable_passes(ExpressionSimplifier.PASS_HIGH_TO_EXPLICIT) diff --git a/miasm2/expression/simplifications_common.py b/miasm2/expression/simplifications_common.py index 00b14554..a4b7c61e 100644 --- a/miasm2/expression/simplifications_common.py +++ b/miasm2/expression/simplifications_common.py @@ -909,6 +909,7 @@ def simp_cmp_int(expr_simp, expr): """ ({X, 0} == int) => X == int[:] X + int1 == int2 => X == int2-int1 + X ^ int1 == int2 => X == int1^int2 """ if (expr.is_op(TOK_EQUAL) and expr.args[1].is_int() and @@ -922,28 +923,42 @@ def simp_cmp_int(expr_simp, expr): expr = expr_simp( ExprOp(TOK_EQUAL, src, new_int) ) - elif (expr.is_op() and - expr.op in [ - TOK_EQUAL, - ] and - expr.args[1].is_int() and - expr.args[0].is_op("+") and - expr.args[0].args[-1].is_int()): - # X + int1 == int2 => X == int2-int1 - # WARNING: - # X - 0x10 <=u 0x20 gives X in [0x10 0x30] - # which is not equivalet to A <=u 0x10 - - left, right = expr.args - left, int_diff = left.args[:-1], left.args[-1] - if len(left) == 1: - left = left[0] - else: - left = ExprOp('+', *left) - new_int = expr_simp(right - int_diff) - expr = expr_simp( - ExprOp(expr.op, left, new_int), - ) + elif not expr.is_op(TOK_EQUAL): + return expr + assert len(expr.args) == 2 + + left, right = expr.args + if left.is_int() and not right.is_int(): + left, right = right, left + if not right.is_int(): + return expr + if not (left.is_op() and left.op in ['+', '^']): + return expr + if not left.args[-1].is_int(): + return expr + # X + int1 == int2 => X == int2-int1 + # WARNING: + # X - 0x10 <=u 0x20 gives X in [0x10 0x30] + # which is not equivalet to A <=u 0x10 + + left_orig = left + left, last_int = left.args[:-1], left.args[-1] + + if len(left) == 1: + left = left[0] + else: + left = ExprOp(left.op, *left) + + if left_orig.op == "+": + new_int = expr_simp(right - last_int) + elif left_orig.op == '^': + new_int = expr_simp(right ^ last_int) + else: + raise RuntimeError("Unsupported operator") + + expr = expr_simp( + ExprOp(TOK_EQUAL, left, new_int), + ) return expr @@ -1375,6 +1390,59 @@ def simp_cond_sign_bit(_, expr): return ExprCond(cond, expr.src1, expr.src2) +def simp_cond_add(expr_s, expr): + """ + (a+b)?X:Y => (a == b)?Y:X + (a^b)?X:Y => (a == b)?Y:X + """ + cond = expr.cond + if not cond.is_op(): + return expr + if cond.op not in ['+', '^']: + return expr + if len(cond.args) != 2: + return expr + arg1, arg2 = cond.args + if cond.is_op('+'): + new_cond = ExprOp('==', arg1, expr_s(-arg2)) + elif cond.is_op('^'): + new_cond = ExprOp('==', arg1, arg2) + else: + raise ValueError('Bad case') + return ExprCond(new_cond, expr.src2, expr.src1) + + +def simp_cond_eq_1_0(expr_s, expr): + """ + (a == b)?ExprInt(1, 1):ExprInt(0, 1) => a == b + (a <s b)?ExprInt(1, 1):ExprInt(0, 1) => a == b + ... + """ + cond = expr.cond + if not cond.is_op(): + return expr + if cond.op not in [ + TOK_EQUAL, + TOK_INF_SIGNED, TOK_INF_EQUAL_SIGNED, + TOK_INF_UNSIGNED, TOK_INF_EQUAL_UNSIGNED + ]: + return expr + if expr.src1 != ExprInt(1, 1) or expr.src2 != ExprInt(0, 1): + return expr + return cond + + +def simp_cond_inf_eq_unsigned_zero(expr_s, expr): + """ + (a <=u 0) => a == 0 + """ + if not expr.is_op(TOK_INF_EQUAL_UNSIGNED): + return expr + if not expr.args[1].is_int(0): + return expr + return ExprOp(TOK_EQUAL, expr.args[0], expr.args[1]) + + def simp_test_signext_inf(expr_s, expr): """A.signExt() <s int => A <s int[:]""" if not (expr.is_op(TOK_INF_SIGNED) or expr.is_op(TOK_INF_EQUAL_SIGNED)): diff --git a/miasm2/expression/simplifications_explicit.py b/miasm2/expression/simplifications_explicit.py index 4c5dde3e..00892201 100644 --- a/miasm2/expression/simplifications_explicit.py +++ b/miasm2/expression/simplifications_explicit.py @@ -155,13 +155,5 @@ def simp_flags(_, expr): op_nf, = args return ~op_nf - elif expr.is_op(TOK_EQUAL): - arg1, arg2 = args - return ExprCond( - arg1 - arg2, - ExprInt(0, expr.size), - ExprInt(1, expr.size), - ) - return expr diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 1347e0e5..f54ee2a5 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -1,4 +1,3 @@ -import warnings import logging from collections import MutableMapping @@ -154,6 +153,9 @@ class MemArray(MutableMapping): """Mask offset""" return self._mask + def __contains__(self, offset): + return offset in self._offset_to_expr + def __getitem__(self, offset): assert 0 <= offset <= self._mask return self._offset_to_expr.__getitem__(offset) @@ -725,14 +727,6 @@ class SymbolMngr(object): """Variables of the current state""" return list(self) - def get(self, expr, default=None): - """Deprecated version of read""" - warnings.warn('DEPRECATION WARNING: use "read(self, expr)" instead of get') - ret = self.read(expr) - if default is not None and ret == expr: - return default - return ret - def merge_ptr_read(known, ptrs): """ @@ -805,8 +799,6 @@ class SymbolicExecutionEngine(object): StateEngine = SymbolicState def __init__(self, ir_arch, state=None, - func_read=None, - func_write=None, sb_expr_simp=expr_simp_explicit): self.expr_to_visitor = { @@ -828,13 +820,6 @@ class SymbolicExecutionEngine(object): for dst, src in state.iteritems(): self.symbols.write(dst, src) - if func_read: - warnings.warn('DEPRECATION WARNING: override function "mem_read(self, expr)" instead of func_read') - if func_write: - warnings.warn('DEPRECATION WARNING: override function "mem_write(self, dsr, src)" instead of func_write') - - self.func_read = func_read - self.func_write = func_write self.ir_arch = ir_arch self.expr_simp = sb_expr_simp @@ -1114,34 +1099,6 @@ class SymbolicExecutionEngine(object): return ret - def _resolve_mem_parts(self, expr): - """For a given ExprMem @expr, get known/unknown parts from the store. - @expr: ExprMem instance - - Return a list of (known, value) where known is a bool representing if - the value has been resolved from the store or not. - """ - - # Extract known parts in symbols - assert expr.size % 8 == 0 - ptr = expr.ptr - known = [] - ptrs = [] - for index in xrange(expr.size / 8): - offset = self.expr_simp(ptr + ExprInt(index, ptr.size)) - ptrs.append(offset) - mem = ExprMem(offset, 8) - known.append(mem in self.symbols) - - reads = merge_ptr_read(known, ptrs) - out = [] - for is_known, ptr_value, size in reads: - mem = ExprMem(ptr_value, size) - if is_known: - mem = self.symbols.read(mem) - out.append((is_known, mem)) - return out - def mem_read(self, expr): """ [DEV]: Override to modify the effective memory reads @@ -1149,21 +1106,7 @@ class SymbolicExecutionEngine(object): Read symbolic value at ExprMem @expr @expr: ExprMem """ - - parts = self._resolve_mem_parts(expr) - - out = [] - for known, part in parts: - if not known and part.is_mem() and self.func_read is not None: - ret = self.func_read(part) - else: - ret = part - - out.append(ret) - ret = self.expr_simp(ExprCompose(*out)) - - assert ret.size == expr.size - return ret + return self.symbols.read(expr) def mem_write(self, dst, src): """ @@ -1173,106 +1116,4 @@ class SymbolicExecutionEngine(object): @dst: destination ExprMem @src: source Expression """ - if self.func_write is not None: - self.func_write(self, dst, src) - else: - self.symbols.write(dst, src) - - - # Deprecated methods - - def apply_expr_on_state(self, expr, cache): - """Deprecated version of eval_expr""" - warnings.warn('DEPRECATION WARNING: use "eval_expr" instead of apply_expr_on_state') - - if cache is None: - cache = {} - ret = self.eval_expr(expr, eval_cache=cache) - return ret - - def modified_mems(self, init_state=None): - """Deprecated version of modified(ids=False)""" - warnings.warn('DEPRECATION WARNING: use "modified(self, ids=False)" instead of modified_mems') - for mem in self.modified(init_state=init_state, ids=False): - yield mem - - def modified_regs(self, init_state=None): - """Deprecated version of modified(mems=False)""" - warnings.warn('DEPRECATION WARNING: use "modified(self, mems=False)" instead of modified_regs') - for reg in self.modified(init_state=init_state, mems=False): - yield reg - - def dump_id(self): - """Deprecated version of dump(mems=False)""" - warnings.warn('DEPRECATION WARNING: use "dump(self, mems=False)" instead of dump_id') - self.dump(mems=False) - - def dump_mem(self): - """Deprecated version of dump(ids=False)""" - warnings.warn('DEPRECATION WARNING: use "dump(self, ids=False)" instead of dump_mem') - self.dump(ids=False) - - def eval_ir_expr(self, assignblk): - """Deprecated version of eval_ir_expr(self, assignblk)""" - warnings.warn('DEPRECATION WARNING: use "eval_assignblk(self, assignblk)" instead of eval_ir_expr') - return self.eval_assignblk(assignblk).iteritems() - - def eval_ir(self, assignblk): - """Deprecated version of eval_updt_assignblk(self, assignblk)""" - warnings.warn('DEPRECATION WARNING: use "eval_assignblk(self, assignblk)" instead of eval_ir') - return self.eval_updt_assignblk(assignblk) - - def emulbloc(self, irb, step=False): - """Deprecated version of eval_updt_irblock(self, irb, step=False)""" - warnings.warn('DEPRECATION WARNING: use "eval_updt_irblock(self, irb, step=False)" instead of emulbloc') - return self.eval_updt_irblock(irb, step) - - def emul_ir_bloc(self, _, addr, step=False): - """Deprecated version of run_block_at""" - warnings.warn('DEPRECATION WARNING: use "run_block_at(self, addr, step=False)" instead of emul_ir_bloc') - return self.run_block_at(addr, step) - - def emul_ir_block(self, addr, step=False): - """Deprecated version of run_block_at""" - warnings.warn('DEPRECATION WARNING: use "run_block_at(self, addr, step=False)" instead of emul_ir_block') - return self.run_block_at(addr, step) - - def emul_ir_blocks(self, addr, lbl_stop=None, step=False): - """Deprecated version of run_at""" - warnings.warn('DEPRECATION WARNING: use "run_at(self, addr, lbl_stop=None, step=False):" instead of emul_ir_blocks') - return self.run_at(addr, lbl_stop, step) - - def emul_ir_blocs(self, _, addr, lbl_stop=None, step=False): - """Deprecated version of run_at""" - warnings.warn('DEPRECATION WARNING: use "run_at(self, addr, lbl_stop=None, step=False):" instead of emul_ir_blocs') - return self.run_at(addr, lbl_stop, step) - - def apply_expr(self, expr): - """Deprecated version of eval_updt_expr""" - warnings.warn('DEPRECATION WARNING: use "eval_updt_expr" instead of apply_expr') - return self.eval_updt_expr(expr) - - def as_assignblock(self): - """Return the current state as an AssignBlock""" - warnings.warn('DEPRECATION WARNING: use "modified(ids=True, mems=True)" instead of as_assignblock') - out = [] - for dst, src in self.modified(ids=True, mems=True): - out.append((dst, src)) - return AssignBlock(dict(out)) - - -class symbexec(SymbolicExecutionEngine): - """ - DEPRECATED object - Use SymbolicExecutionEngine instead of symbexec - """ - - def __init__(self, ir_arch, known_symbols, - func_read=None, - func_write=None, - sb_expr_simp=expr_simp_explicit): - warnings.warn("Deprecated API: use SymbolicExecutionEngine") - super(symbexec, self).__init__(ir_arch, known_symbols, - func_read, - func_write, - sb_expr_simp=sb_expr_simp) + self.symbols.write(dst, src) diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py index 887ebe59..be48c065 100644 --- a/miasm2/ir/symbexec_top.py +++ b/miasm2/ir/symbexec_top.py @@ -86,13 +86,9 @@ class SymbExecTopNoMem(SymbolicExecutionEngine): StateEngine = SymbolicStateTop def __init__(self, ir_arch, state, regstop, - func_read=None, - func_write=None, sb_expr_simp=expr_simp): known_symbols = dict(state) super(SymbExecTopNoMem, self).__init__(ir_arch, known_symbols, - func_read, - func_write, sb_expr_simp) self.regstop = set(regstop) diff --git a/miasm2/ir/symbexec_types.py b/miasm2/ir/symbexec_types.py index 71e5d6fa..e4f37e3f 100644 --- a/miasm2/ir/symbexec_types.py +++ b/miasm2/ir/symbexec_types.py @@ -66,15 +66,11 @@ class SymbExecCType(SymbolicExecutionEngine): def __init__(self, ir_arch, symbols, chandler, - func_read=None, - func_write=None, sb_expr_simp=expr_simp): self.chandler = chandler super(SymbExecCType, self).__init__(ir_arch, {}, - func_read, - func_write, sb_expr_simp) self.symbols = dict(symbols) diff --git a/miasm2/ir/translators/z3_ir.py b/miasm2/ir/translators/z3_ir.py index 2572ac74..204ee976 100644 --- a/miasm2/ir/translators/z3_ir.py +++ b/miasm2/ir/translators/z3_ir.py @@ -219,7 +219,7 @@ class TranslatorZ3(Translator): ) elif expr.op == "<s": res = z3.If( - z3.SLT(args[0], args[1]), + args[0] < args[1], z3.BitVecVal(1, 1), z3.BitVecVal(0, 1) ) @@ -231,7 +231,7 @@ class TranslatorZ3(Translator): ) elif expr.op == "<=s": res = z3.If( - z3.SLE(args[0], args[1]), + args[0] <= args[1], z3.BitVecVal(1, 1), z3.BitVecVal(0, 1) ) diff --git a/miasm2/jitter/emulatedsymbexec.py b/miasm2/jitter/emulatedsymbexec.py index 15024505..78c02fb1 100644 --- a/miasm2/jitter/emulatedsymbexec.py +++ b/miasm2/jitter/emulatedsymbexec.py @@ -28,8 +28,6 @@ class EmulatedSymbExec(SymbolicExecutionEngine): super(EmulatedSymbExec, self).__init__(*args, **kwargs) self.cpu = cpu self.vm = vm - self.func_read = self._func_read - self.func_write = self._func_write def reset_regs(self): """Set registers value to 0. Ignore register aliases""" @@ -37,13 +35,13 @@ class EmulatedSymbExec(SymbolicExecutionEngine): self.symbols.symbols_id[reg] = m2_expr.ExprInt(0, size=reg.size) # Memory management - def _func_read(self, expr_mem): + def mem_read(self, expr_mem): """Memory read wrapper for symbolic execution @expr_mem: ExprMem""" addr = expr_mem.ptr if not addr.is_int(): - return expr_mem + return super(EmulatedSymbExec, self).mem_read(expr_mem) addr = int(addr) size = expr_mem.size / 8 value = self.cpu.get_mem(addr, size) @@ -54,9 +52,8 @@ class EmulatedSymbExec(SymbolicExecutionEngine): return m2_expr.ExprInt(int(value.encode("hex"), 16), expr_mem.size) - def _func_write(self, symb_exec, dest, data): + def mem_write(self, dest, data): """Memory read wrapper for symbolic execution - @symb_exec: symbexec instance @dest: ExprMem instance @data: Expr instance""" diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py index b94a9309..17041372 100644 --- a/miasm2/jitter/loader/elf.py +++ b/miasm2/jitter/loader/elf.py @@ -65,6 +65,20 @@ def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): # Get symbol sections symbol_sections = [] for section_header in elf.sh: + if hasattr(section_header, 'symbols'): + for name, sym in section_header.symbols.iteritems(): + if not name or sym.value == 0: + continue + name = loc_db.find_free_name(name) + loc_db.add_location(name, sym.value, strict=False) + + if hasattr(section_header, 'reltab'): + for rel in section_header.reltab: + if not rel.sym or rel.offset == 0: + continue + name = loc_db.find_free_name(rel.sym) + loc_db.add_location(name, rel.offset, strict=False) + if hasattr(section_header, 'symtab'): log.debug("Find %d symbols in %r", len(section_header.symtab), section_header) diff --git a/miasm2/os_dep/win_api_x86_32.py b/miasm2/os_dep/win_api_x86_32.py index 5d6e4765..df679074 100644 --- a/miasm2/os_dep/win_api_x86_32.py +++ b/miasm2/os_dep/win_api_x86_32.py @@ -1490,20 +1490,20 @@ def kernel32_lstrlen(jitter): my_strlen(jitter, whoami(), jitter.get_str_ansi, len) -def my_lstrcat(jitter, funcname, get_str): +def my_lstrcat(jitter, funcname, get_str, set_str): ret_ad, args = jitter.func_args_stdcall(['ptr_str1', 'ptr_str2']) s1 = get_str(args.ptr_str1) s2 = get_str(args.ptr_str2) - jitter.vm.set_mem(args.ptr_str1, s1 + s2) + set_str(args.ptr_str1, s1 + s2) jitter.func_ret_stdcall(ret_ad, args.ptr_str1) def kernel32_lstrcatA(jitter): - my_lstrcat(jitter, whoami(), jitter.get_str_ansi) + my_lstrcat(jitter, whoami(), jitter.get_str_ansi, jitter.set_str_ansi) def kernel32_lstrcatW(jitter): - my_lstrcat(jitter, whoami(), jitter.get_str_unic) + my_lstrcat(jitter, whoami(), jitter.get_str_unic, jitter.set_str_unic) def kernel32_GetUserGeoID(jitter): diff --git a/test/analysis/unssa.py b/test/analysis/unssa.py index 244b5436..ae9566ee 100644 --- a/test/analysis/unssa.py +++ b/test/analysis/unssa.py @@ -2,12 +2,13 @@ from pdb import pm from pprint import pprint as pp from miasm2.expression.expression import ExprId, ExprInt, ExprAssign, ExprMem, \ - ExprCond, ExprOp + ExprCond, ExprOp, ExprLoc from miasm2.core.locationdb import LocationDB -from miasm2.analysis.data_flow import * +from miasm2.analysis.data_flow import DiGraphLivenessSSA, dead_simp, PropagateExpr from miasm2.ir.analysis import ira from miasm2.ir.ir import IRCFG, IRBlock, AssignBlock -from miasm2.analysis.ssa import SSADiGraph, UnSSADiGraph, DiGraphLivenessSSA +from miasm2.analysis.ssa import SSADiGraph +from miasm2.analysis.outofssa import UnSSADiGraph loc_db = LocationDB() diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index b866a325..d2204d77 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -2020,7 +2020,7 @@ reg_tests = [ "F2AE"), (m32, "00000000 REPE SCASB", "F3AE"), - (m32, "00000000 REPE LODSD", + (m32, "00000000 REP LODSD", "F3ad"), (m32, "00000000 RET", @@ -3093,6 +3093,11 @@ reg_tests = [ (m32, "00000000 EMMS", "0f77"), + + (m64, "00000000 ENDBR64", + "f30f1efa"), + (m32, "00000000 ENDBR32", + "f30f1efb"), ] diff --git a/test/expr_type/test_chandler.py b/test/expr_type/test_chandler.py index f13b8559..09c588cb 100644 --- a/test/expr_type/test_chandler.py +++ b/test/expr_type/test_chandler.py @@ -512,7 +512,7 @@ exprc2expr = ExprCToExpr(expr_types, types_mngr) mychandler.updt_expr_types(expr_types) -for (expr, result) in tests[4:]: +for (expr, result) in tests: print "*" * 80 print "Native expr:", expr result = set(result) diff --git a/test/expression/simplifications.py b/test/expression/simplifications.py index 5bca3fa9..cc33fc54 100644 --- a/test/expression/simplifications.py +++ b/test/expression/simplifications.py @@ -101,6 +101,10 @@ i3 = ExprInt(3, 32) im1 = ExprInt(-1, 32) im2 = ExprInt(-2, 32) +bi0 = ExprInt(0, 1) +bi1 = ExprInt(1, 1) + + icustom = ExprInt(0x12345678, 32) cc = ExprCond(a, b, c) @@ -490,6 +494,21 @@ to_test = [ ExprOp(TOK_EQUAL, a8, ExprInt(0xFF, 8)) ), + ( + ExprOp(TOK_EQUAL, i2, a + i1), + ExprOp(TOK_EQUAL, a , i1) + ), + + ( + ExprOp(TOK_EQUAL, a ^ i1, i2), + ExprOp(TOK_EQUAL, a , i3) + ), + + ( + ExprOp(TOK_EQUAL, i2, a ^ i1), + ExprOp(TOK_EQUAL, a , i3) + ), + (ExprOp(TOK_INF_SIGNED, i1, i2), ExprInt(1, 1)), (ExprOp(TOK_INF_UNSIGNED, i1, i2), ExprInt(1, 1)), (ExprOp(TOK_INF_EQUAL_SIGNED, i1, i2), ExprInt(1, 1)), @@ -692,6 +711,33 @@ to_test = [ (a8.zeroExtend(32)[2:5], a8[2:5]), + + ( + ExprCond(a + b, a, b), + ExprCond(ExprOp(TOK_EQUAL, a, -b), b, a) + ), + + ( + ExprCond(a + i1, a, b), + ExprCond(ExprOp(TOK_EQUAL, a, im1), b, a) + ), + + + ( + ExprCond(ExprOp(TOK_EQUAL, a, i1), bi1, bi0), + ExprOp(TOK_EQUAL, a, i1) + ), + + ( + ExprCond(ExprOp(TOK_INF_SIGNED, a, i1), bi1, bi0), + ExprOp(TOK_INF_SIGNED, a, i1) + ), + + ( + ExprOp(TOK_INF_EQUAL_UNSIGNED, a, i0), + ExprOp(TOK_EQUAL, a, i0) + ), + ] for e_input, e_check in to_test: diff --git a/test/ir/symbexec.py b/test/ir/symbexec.py index 00ef7c10..4f01ac3c 100755 --- a/test/ir/symbexec.py +++ b/test/ir/symbexec.py @@ -26,28 +26,34 @@ class TestSymbExec(unittest.TestCase): id_d = ExprId('d', 32) id_e = ExprId('e', 64) - sb = SymbolicExecutionEngine(ira, - { - ExprMem(ExprInt(0x4, 32), 8): ExprInt(0x44, 8), - ExprMem(ExprInt(0x5, 32), 8): ExprInt(0x33, 8), - ExprMem(ExprInt(0x6, 32), 8): ExprInt(0x22, 8), - ExprMem(ExprInt(0x7, 32), 8): ExprInt(0x11, 8), + class CustomSymbExec(SymbolicExecutionEngine): + def mem_read(self, expr): + if expr == ExprMem(ExprInt(0x1000, 32), 32): + return id_x + return super(CustomSymbExec, self).mem_read(expr) - ExprMem(ExprInt(0x20, 32), 32): id_x, + sb = CustomSymbExec(ira, + { + ExprMem(ExprInt(0x4, 32), 8): ExprInt(0x44, 8), + ExprMem(ExprInt(0x5, 32), 8): ExprInt(0x33, 8), + ExprMem(ExprInt(0x6, 32), 8): ExprInt(0x22, 8), + ExprMem(ExprInt(0x7, 32), 8): ExprInt(0x11, 8), - ExprMem(ExprInt(0x40, 32), 32): id_x, - ExprMem(ExprInt(0x44, 32), 32): id_a, + ExprMem(ExprInt(0x20, 32), 32): id_x, - ExprMem(ExprInt(0x54, 32), 32): ExprInt(0x11223344, 32), + ExprMem(ExprInt(0x40, 32), 32): id_x, + ExprMem(ExprInt(0x44, 32), 32): id_a, - ExprMem(id_a, 32): ExprInt(0x11223344, 32), - id_a: ExprInt(0, 32), - id_b: ExprInt(0, 32), + ExprMem(ExprInt(0x54, 32), 32): ExprInt(0x11223344, 32), - ExprMem(id_c, 32): ExprMem(id_d + ExprInt(0x4, 32), 32), - ExprMem(id_c + ExprInt(0x4, 32), 32): ExprMem(id_d + ExprInt(0x8, 32), 32), + ExprMem(id_a, 32): ExprInt(0x11223344, 32), + id_a: ExprInt(0, 32), + id_b: ExprInt(0, 32), - }) + ExprMem(id_c, 32): ExprMem(id_d + ExprInt(0x4, 32), 32), + ExprMem(id_c + ExprInt(0x4, 32), 32): ExprMem(id_d + ExprInt(0x8, 32), 32), + + }) self.assertEqual(sb.eval_expr(ExprInt(1, 32)-ExprInt(1, 32)), ExprInt(0, 32)) @@ -101,14 +107,6 @@ class TestSymbExec(unittest.TestCase): self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x100, 32), 32)), ExprMem(ExprInt(0x100, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(id_c + ExprInt(0x2, 32), 32)), ExprMem(id_d + ExprInt(0x6, 32), 32)) - ## Func read - def custom_func_read(mem): - if mem == ExprMem(ExprInt(0x1000, 32), 32): - return id_x - return mem - - sb.func_read = custom_func_read - ## Unmodified read self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 8)), ExprInt(0x44, 8)) |