about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCamille Mougey <commial@gmail.com>2018-07-09 12:59:26 +0200
committerGitHub <noreply@github.com>2018-07-09 12:59:26 +0200
commit17d48de1951c81fc8b5b4184713a971537747227 (patch)
treed6f36c57d1e6d60a24b29234054178b30f2a6f0e
parent89cb925f74a29f1ab6dc2dc2b2d68c4f6a9d4158 (diff)
parent953676d831c314282a9e113f5617ca7a90b09be6 (diff)
downloadmiasm-17d48de1951c81fc8b5b4184713a971537747227.tar.gz
miasm-17d48de1951c81fc8b5b4184713a971537747227.zip
Merge pull request #713 from serpilliere/ssa_transform
Ssa transform
-rw-r--r--example/disasm/full.py13
-rw-r--r--miasm2/analysis/ssa.py574
-rw-r--r--miasm2/core/graph.py16
-rwxr-xr-xtest/test_all.py4
4 files changed, 605 insertions, 2 deletions
diff --git a/example/disasm/full.py b/example/disasm/full.py
index cf52253b..fdd220ca 100644
--- a/example/disasm/full.py
+++ b/example/disasm/full.py
@@ -9,6 +9,7 @@ from miasm2.core.interval import interval
 from miasm2.analysis.machine import Machine
 from miasm2.analysis.data_flow import dead_simp, DiGraphDefUse, ReachingDefinitions
 from miasm2.expression.simplifications import expr_simp
+from miasm2.analysis.ssa import SSAPath, SSADiGraph
 
 log = logging.getLogger("dis")
 console_handler = logging.StreamHandler()
@@ -52,6 +53,8 @@ parser.add_argument('-c', "--rawbinary", default=False, action="store_true",
 parser.add_argument('-d', "--defuse", action="store_true",
                     help="Dump the def-use graph in file 'defuse.dot'."
                     "The defuse is dumped after simplifications if -s option is specified")
+parser.add_argument('-p', "--ssa", action="store_true",
+                    help="Generate the ssa form in  'ssa.dot'.")
 
 args = parser.parse_args()
 
@@ -236,3 +239,13 @@ if args.gen_ir:
             modified |= ircfg_a.merge_blocks()
 
         open('graph_irflow_reduced.dot', 'w').write(ircfg_a.dot())
+
+    if args.ssa:
+        heads = ircfg_a.heads()
+        if len(heads) != 1:
+            raise RuntimeError("Your graph should have only one head")
+        head = list(heads)[0]
+        ssa = SSADiGraph(ircfg_a)
+        ssa.transform(head)
+
+        open("ssa.dot", "wb").write(ssa.graph.dot())
diff --git a/miasm2/analysis/ssa.py b/miasm2/analysis/ssa.py
new file mode 100644
index 00000000..63d0c4fb
--- /dev/null
+++ b/miasm2/analysis/ssa.py
@@ -0,0 +1,574 @@
+from collections import deque
+
+from miasm2.expression.expression import ExprId, ExprAff, ExprOp, get_expr_ids
+from miasm2.ir.ir import AssignBlock, IRBlock
+
+
+
+class SSA(object):
+    """
+    Generic class for static single assignment (SSA) transformation
+
+    Handling of
+    - variable generation
+    - variable renaming
+    - conversion of an IRCFG block into SSA
+
+    Variables will be renamed to <variable>.<index>, whereby the
+    index will be increased in every definition of <variable>.
+
+    Memory expressions are stateless. The addresses are in SSA form,
+    but memory aliasing will occur. For instance, if it holds
+    that RAX == RBX.0 + (-0x8) and
+
+    @64[RBX.0 + (-0x8)] = RDX
+    RCX.0 = @64[RAX],
+
+    then it cannot be tracked that RCX.0 == RDX.
+    """
+
+
+    def __init__(self, ircfg):
+        """
+        Initialises generic class for SSA
+        :param ircfg: instance of IRCFG
+        """
+        # IRCFG instance
+        self.ircfg = ircfg
+
+        # SSA blocks
+        self.blocks = {}
+
+        # stack for RHS
+        self._stack_rhs = {}
+        # stack for LHS
+        self._stack_lhs = {}
+
+        self._ssa_variable_to_expr = {}
+
+        # dict of SSA expressions
+        self.expressions = {}
+
+        # dict of SSA to original location
+        self.ssa_to_location = {}
+
+        # Don't SSA IRDst
+        self.immutable_ids = set([self.ircfg.IRDst])
+
+    def get_regs(self, expr):
+        return get_expr_ids(expr)
+
+    def transform(self, *args, **kwargs):
+        """Transforms into SSA"""
+        raise NotImplementedError("Abstract method")
+
+    def get_block(self, loc_key):
+        """
+        Returns an IRBlock
+        :param loc_key: LocKey instance
+        :return: IRBlock
+        """
+        irblock = self.ircfg.blocks.get(loc_key, None)
+
+        return irblock
+
+    def reverse_variable(self, ssa_var):
+        """
+        Transforms a variable in SSA form into non-SSA form
+        :param ssa_var: ExprId, variable in SSA form
+        :return: ExprId, variable in non-SSA form
+        """
+        expr = self._ssa_variable_to_expr.get(ssa_var, ssa_var)
+        return expr
+
+    def reset(self):
+        """Resets SSA transformation"""
+        self.blocks = {}
+        self.expressions = {}
+        self._stack_rhs = {}
+        self._stack_lhs = {}
+        self.ssa_to_location = {}
+
+    def _gen_var_expr(self, expr, stack):
+        """
+        Generates a variable expression in SSA form
+        :param expr: variable expression which will be translated
+        :param stack: self._stack_rhs or self._stack_lhs
+        :return: variable expression in SSA form
+        """
+        index = stack[expr]
+        name = "%s.%d" % (expr.name, index)
+        ssa_var = ExprId(name, expr.size)
+        self._ssa_variable_to_expr[ssa_var] = expr
+
+        return ssa_var
+
+    def _transform_var_rhs(self, ssa_var):
+        """
+        Transforms a variable on the right hand side into SSA
+        :param ssa_var: variable
+        :return: transformed variable
+        """
+        # variable has never been on the LHS
+        if ssa_var not in self._stack_rhs:
+            return ssa_var
+        # variable has been on the LHS
+        stack = self._stack_rhs
+        return self._gen_var_expr(ssa_var, stack)
+
+    def _transform_var_lhs(self, expr):
+        """
+        Transforms a variable on the left hand side into SSA
+        :param expr: variable
+        :return: transformed variable
+        """
+        # check if variable has already been on the LHS
+        if expr not in self._stack_lhs:
+            self._stack_lhs[expr] = 0
+        # save last value for RHS transformation
+        self._stack_rhs[expr] = self._stack_lhs[expr]
+
+        # generate SSA expression
+        stack = self._stack_lhs
+        ssa_var = self._gen_var_expr(expr, stack)
+
+        return ssa_var
+
+    def _transform_expression_lhs(self, dst):
+        """
+        Transforms an expression on the left hand side into SSA
+        :param dst: expression
+        :return: expression in SSA form
+        """
+        if dst.is_mem():
+            # transform with last RHS instance
+            ssa_var = self._transform_expression_rhs(dst)
+        else:
+            # transform LHS
+            ssa_var = self._transform_var_lhs(dst)
+
+            # increase SSA variable counter
+            self._stack_lhs[dst] += 1
+
+        return ssa_var
+
+    def _transform_expression_rhs(self, src):
+        """
+        Transforms an expression on the right hand side into SSA
+        :param src: expression
+        :return: expression in SSA form
+        """
+        # dissect expression in variables
+        variables = self.get_regs(src)
+        src_ssa = src
+        # transform variables
+        for expr in variables:
+            ssa_var = self._transform_var_rhs(expr)
+            src_ssa = src_ssa.replace_expr({expr: ssa_var})
+
+        return src_ssa
+
+    @staticmethod
+    def _parallel_instructions(assignblk):
+        """
+        Extracts the instruction from a AssignBlock.
+
+        Since instructions in a AssignBlock are evaluated
+        in parallel, memory instructions on the left hand
+        side will be inserted into the start of the list.
+        Then, memory instruction on the LHS will be
+        transformed firstly.
+
+        :param assignblk: assignblock
+        :return: sorted list of expressions
+        """
+        instructions = []
+        for dst in assignblk:
+            # dst = src
+            aff = assignblk.dst2ExprAff(dst)
+            # insert memory expression into start of list
+            if dst.is_mem():
+                instructions.insert(0, aff)
+            else:
+                instructions.append(aff)
+
+        return instructions
+
+    @staticmethod
+    def _convert_block(irblock, ssa_list):
+        """
+        Transforms an IRBlock inplace into SSA
+        :param irblock: IRBlock to be transformed
+        :param ssa_list: list of SSA expressions
+        """
+        # iterator over SSA expressions
+        ssa_iter = iter(ssa_list)
+        new_irs = []
+        # walk over IR blocks' assignblocks
+        for assignblk in irblock.assignblks:
+            # list of instructions
+            instructions = []
+            # insert SSA instructions
+            for _ in assignblk:
+                instructions.append(ssa_iter.next())
+            # replace instructions of assignblock in IRBlock
+            new_irs.append(AssignBlock(instructions, assignblk.instr))
+        return IRBlock(irblock.loc_key, new_irs)
+
+    def _rename_expressions(self, loc_key):
+        """
+        Transforms variables and expressions
+        of an IRBlock into SSA.
+
+        IR representations of an assembly instruction are evaluated
+        in parallel. Thus, RHS and LHS instructions will be performed
+        separately.
+        :param loc_key: IRBlock loc_key
+        """
+        # list of IRBlock's SSA expressions
+        ssa_expressions_block = []
+
+        # retrieve IRBlock
+        irblock = self.get_block(loc_key)
+        if irblock is None:
+            # Incomplete graph
+            return
+
+        # iterate block's IR expressions
+        for index, assignblk in enumerate(irblock.assignblks):
+            # list of parallel instructions
+            instructions = self._parallel_instructions(assignblk)
+            # list for transformed RHS expressions
+            rhs = deque()
+
+            # transform RHS
+            for expr in instructions:
+                src = expr.src
+                src_ssa = self._transform_expression_rhs(src)
+                # save transformed RHS
+                rhs.append(src_ssa)
+
+            # transform LHS
+            for expr in instructions:
+                if expr.dst in self.immutable_ids:
+                    dst_ssa = expr.dst
+                else:
+                    dst_ssa = self._transform_expression_lhs(expr.dst)
+
+                # retrieve corresponding RHS expression
+                src_ssa = rhs.popleft()
+
+                # rebuild SSA expression
+                expr = ExprAff(dst_ssa, src_ssa)
+                self.expressions[dst_ssa] = src_ssa
+                self.ssa_to_location[dst_ssa] = (loc_key, index)
+
+
+                # append ssa expression to list
+                ssa_expressions_block.append(expr)
+
+        # replace blocks IR expressions with corresponding SSA transformations
+        new_irblock = self._convert_block(irblock, ssa_expressions_block)
+        self.ircfg.blocks[loc_key] = new_irblock
+
+
+class SSABlock(SSA):
+    """
+    SSA transformation on block level
+
+    It handles
+    - transformation of a single IRBlock into SSA
+    - reassembling an SSA expression into a non-SSA
+      expression through iterative resolving of the RHS
+    """
+
+    def transform(self, loc_key):
+        """
+        Transforms a block into SSA form
+        :param loc_key: IRBlock loc_key
+        """
+        self._rename_expressions(loc_key)
+
+    def reassemble_expr(self, expr):
+        """
+        Reassembles an expression in SSA form into a solely non-SSA expression
+        :param expr: expression
+        :return: non-SSA expression
+        """
+        # worklist
+        todo = {expr.copy()}
+
+        while todo:
+            # current expression
+            cur = todo.pop()
+            # RHS of current expression
+            cur_rhs = self.expressions[cur]
+
+            # replace cur with RHS in expr
+            expr = expr.replace_expr({cur: cur_rhs})
+
+            # parse ExprIDs on RHS
+            ids_rhs = self.get_regs(cur_rhs)
+
+            # add RHS ids to worklist
+            for id_rhs in ids_rhs:
+                if id_rhs in self.expressions:
+                    todo.add(id_rhs)
+        return expr
+
+
+class SSAPath(SSABlock):
+    """
+    SSA transformation on path level
+
+    It handles
+    - transformation of a path of IRBlocks into SSA
+    """
+
+    def transform(self, path):
+        """
+        Transforms a path into SSA
+        :param path: list of IRBlock loc_key
+        """
+        for block in path:
+            self._rename_expressions(block)
+
+
+class SSADiGraph(SSA):
+    """
+    SSA transformation on DiGraph level
+
+    It handles
+    - transformation of a DiGraph into SSA
+    - generation, insertion and filling of phi nodes
+
+    The implemented SSA form is known as minimal SSA.
+    """
+
+    PHI_STR = 'Phi'
+
+
+    def __init__(self, ircfg):
+        """
+        Initialises SSA class for directed graphs
+        :param ircfg: instance of IRCFG
+        """
+        super(SSADiGraph, self).__init__(ircfg)
+
+        # variable definitions
+        self.defs = {}
+
+        # dict of blocks' phi nodes
+        self._phinodes = {}
+
+        # IRCFG control flow graph
+        self.graph = ircfg
+
+
+    def transform(self, head):
+        """Transforms into SSA"""
+        self._init_variable_defs(head)
+        self._place_phi(head)
+        self._rename(head)
+        self._insert_phi()
+        self._convert_phi()
+
+    def reset(self):
+        """Resets SSA transformation"""
+        super(SSADiGraph, self).reset()
+        self.defs = {}
+        self._phinodes = {}
+
+    def _init_variable_defs(self, head):
+        """
+        Initialises all variable definitions and
+        assigns the corresponding IRBlocks.
+
+        All variable definitions in self.defs contain
+        a set of IRBlocks in which the variable gets assigned
+        """
+
+        for loc_key in self.graph.walk_depth_first_forward(head):
+            irblock = self.get_block(loc_key)
+            if irblock is None:
+                # Incomplete graph
+                continue
+
+            # search for block's IR definitions/destinations
+            for assignblk in irblock.assignblks:
+                for dst in assignblk:
+                    # enforce ExprId
+                    if dst.is_id():
+                        # exclude immutable ids
+                        if dst in self.immutable_ids:
+                            continue
+                        # map variable definition to blocks
+                        self.defs.setdefault(dst, set()).add(irblock.loc_key)
+
+    def _place_phi(self, head):
+        """
+        For all blocks, empty phi functions will be placed for every
+        variable in the block's dominance frontier.
+
+        self.phinodes contains a dict for every block in the
+        dominance frontier. In this dict, each variable
+        definition maps to its corresponding phi function.
+
+        Source: Cytron, Ron, et al.
+        "An efficient method of computing static single assignment form"
+        Proceedings of the 16th ACM SIGPLAN-SIGACT symposium on
+        Principles of programming languages (1989), p. 30
+        """
+        # dominance frontier
+        frontier = self.graph.compute_dominance_frontier(head)
+
+        for variable in self.defs:
+            done = set()
+            todo = set()
+            intodo = set()
+
+            for loc_key in self.defs[variable]:
+                todo.add(loc_key)
+                intodo.add(loc_key)
+
+            while todo:
+                loc_key = todo.pop()
+
+                # walk through block's dominance frontier
+                for node in frontier.get(loc_key, []):
+                    if node in done:
+                        continue
+
+                    # place empty phi functions for a variable
+                    empty_phi = self._gen_empty_phi(variable)
+
+                    # add empty phi node for variable in node
+                    self._phinodes.setdefault(node, {})[variable] = empty_phi.src
+
+                    done.add(node)
+
+                    if node not in intodo:
+                        intodo.add(node)
+                        todo.add(node)
+
+    def _gen_empty_phi(self, expr):
+        """
+        Generates an empty phi function for a variable
+        :param expr: variable
+        :return: ExprAff, empty phi function for expr
+        """
+        phi = ExprId(self.PHI_STR, expr.size)
+        return ExprAff(expr, phi)
+
+    def _fill_phi(self, *args):
+        """
+        Fills a phi function with variables.
+
+        phi(x.1, x.5, x.6)
+
+        :param args: list of ExprId
+        :return: ExprOp
+        """
+        return ExprOp(self.PHI_STR, *set(args))
+
+    def _rename(self, head):
+        """
+        Transforms each variable expression in the CFG into SSA
+        by traversing the dominator tree in depth-first search.
+
+        1. Transform variables of phi functions on LHS into SSA
+        2. Transform all non-phi expressions into SSA
+        3. Update the successor's phi functions' RHS with current SSA variables
+        4. Save current SSA variable stack for successors in the dominator tree
+
+        Source: Cytron, Ron, et al.
+        "An efficient method of computing static single assignment form"
+        Proceedings of the 16th ACM SIGPLAN-SIGACT symposium on
+        Principles of programming languages (1989), p. 31
+        """
+        # compute dominator tree
+        dominator_tree = self.graph.compute_dominator_tree(head)
+
+        # init SSA variable stack
+        stack = [self._stack_rhs]
+
+        # walk in DFS over the dominator tree
+        for loc_key in dominator_tree.walk_depth_first_forward(head):
+            # restore SSA variable stack of the predecessor in the dominator tree
+            self._stack_rhs = stack.pop().copy()
+
+            # Transform variables of phi functions on LHS into SSA
+            self._rename_phi_lhs(loc_key)
+
+            # Transform all non-phi expressions into SSA
+            self._rename_expressions(loc_key)
+
+            # Update the successor's phi functions' RHS with current SSA variables
+            # walk over block's successors in the CFG
+            for successor in self.graph.successors_iter(loc_key):
+                self._rename_phi_rhs(successor)
+
+            # Save current SSA variable stack for successors in the dominator tree
+            for successor in dominator_tree.successors_iter(loc_key):
+                stack.append(self._stack_rhs)
+
+    def _rename_phi_lhs(self, loc_key):
+        """
+        Transforms phi function's expressions of an IRBlock
+        on the left hand side into SSA
+        :param loc_key: IRBlock loc_key
+        """
+        if loc_key in self._phinodes:
+            # create temporary list of phi function assignments for inplace renaming
+            tmp = list(self._phinodes[loc_key])
+
+            # iterate over all block's phi nodes
+            for dst in tmp:
+                # transform variables on LHS inplace
+                self._phinodes[loc_key][self._transform_expression_lhs(dst)] = self._phinodes[loc_key].pop(dst)
+
+    def _rename_phi_rhs(self, successor):
+        """
+        Transforms the right hand side of each successor's phi function
+        into SSA. Each transformed expression of a phi function's
+        right hand side is of the form
+
+        phi(<var>.<index 1>, <var>.<index 2>, ..., <var>.<index n>)
+
+        :param successor: loc_key of block's direct successor in the CFG
+        """
+        # if successor is in block's dominance frontier
+        if successor in self._phinodes:
+            # walk over all variables on LHS
+            for dst, src in self._phinodes[successor].iteritems():
+                # transform variable on RHS in non-SSA form
+                expr = self.reverse_variable(dst)
+                # transform expr into it's SSA form using current stack
+                src_ssa = self._transform_expression_rhs(expr)
+
+                # Add src_ssa to phi args
+                if src.is_id(self.PHI_STR):
+                    # phi function is empty
+                    expr = self._fill_phi(src_ssa)
+                else:
+                    # phi function contains at least one value
+                    expr = self._fill_phi(src_ssa, *src.args)
+
+                # update phi function
+                self._phinodes[successor][dst] = expr
+
+    def _insert_phi(self):
+        """Inserts phi functions into the list of SSA expressions"""
+        for loc_key in self._phinodes:
+            for dst in self._phinodes[loc_key]:
+                self.expressions[dst] = self._phinodes[loc_key][dst]
+
+    def _convert_phi(self):
+        """Inserts corresponding phi functions inplace
+        into IRBlock at the beginning"""
+        for loc_key in self._phinodes:
+            irblock = self.get_block(loc_key)
+            assignblk = AssignBlock(self._phinodes[loc_key])
+            # insert at the beginning
+            new_irs = IRBlock(loc_key, [assignblk] + list(irblock.assignblks))
+            self.ircfg.blocks[loc_key] = new_irs
diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py
index d88f8721..d35148b1 100644
--- a/miasm2/core/graph.py
+++ b/miasm2/core/graph.py
@@ -339,6 +339,22 @@ class DiGraph(object):
                                                 self.successors_iter,
                                                 self.predecessors_iter)
 
+
+
+
+    def compute_dominator_tree(self, head):
+        """
+        Computes the dominator tree of a graph
+        :param head: head of graph
+        :return: DiGraph
+        """
+        idoms = self.compute_immediate_dominators(head)
+        dominator_tree = DiGraph()
+        for node in idoms:
+            dominator_tree.add_edge(idoms[node], node)
+
+        return dominator_tree
+
     @staticmethod
     def _walk_generic_dominator(node, gen_dominators, succ_cb):
         """Generic algorithm to return an iterator of the ordered list of
diff --git a/test/test_all.py b/test/test_all.py
index 40df315c..9b17aa92 100755
--- a/test/test_all.py
+++ b/test/test_all.py
@@ -515,8 +515,8 @@ class ExampleDisasmFull(ExampleDisassembler):
 
     def __init__(self, *args, **kwargs):
         super(ExampleDisasmFull, self).__init__(*args, **kwargs)
-        self.command_line = ["full.py", "-g", "-ss", "-d", "-m"] + self.command_line
-        self.products += ["graph_defuse.dot", "graph_execflow.dot",
+        self.command_line = ["full.py", "-g", "-ss", "-d", "-p", "-m"] + self.command_line
+        self.products += ["graph_defuse.dot", "graph_execflow.dot", "ssa.dot",
                           "graph_irflow.dot", "graph_irflow_raw.dot", "lines.dot", "graph_irflow_reduced.dot"]