#!/usr/bin/env python
#-*- coding:utf-8 -*-

import logging

from miasm2.ir.symbexec import symbexec
from miasm2.ir.ir import ir
from miasm2.expression.expression \
    import ExprAff, ExprCond, ExprId, ExprInt, ExprMem

log = logging.getLogger("analysis")
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
log.addHandler(console_handler)
log.setLevel(logging.WARNING)


class ira(ir):
    """IR Analysis
    This class provides higher level manipulations on IR, such as dead
    instruction removals.

    This class can be used as a common parent with `miasm2.ir.ir::ir` class.
    For instance:
        class ira_x86_16(ir_x86_16, ira)
    """

    def ira_regs_ids(self):
        """Returns ids of all registers used in the IR"""
        return self.arch.regs.all_regs_ids + [self.IRDst]

    def remove_dead_instr(self, irb, useful):
        """Remove dead affectations using previous reaches analysis
        @irb: irbloc instance
        @useful: useful statements from previous reach analysis
        Return True iff the block state has changed
        PRE: compute_reach(self)
        """
        modified = False
        for k, ir in enumerate(irb.irs):
            j = 0
            while j < len(ir):
                cur_instr = ir[j]
                if (isinstance(cur_instr.dst, ExprId)
                    and (irb.label, k, cur_instr) not in useful):
                    del ir[j]
                    modified = True
                else:
                    j += 1
        return modified

    def init_useful_instr(self):
        """Computes a set of triples (block, instruction number, instruction)
        containing initially useful instructions :
          - Instructions affecting final value of return registers
          - Instructions affecting IRDst register
          - Instructions writing in memory
          - Function call instructions
        Return set of intial useful instructions
        """

        useful = set()

        for node in self.graph.nodes():
            if node not in self.blocs:
                continue

            block = self.blocs[node]
            successors = self.graph.successors(node)
            has_son = bool(successors)
            for p_son in successors:
                if p_son not in self.blocs:
                    # Leaf has lost its son: don't remove anything
                    # reaching this block
                    for r in self.ira_regs_ids():
                        useful.update(block.cur_reach[-1][r].union(
                                block.defout[-1][r]))

            # Function call, memory write or IRDst affectation
            for k, ir in enumerate(block.irs):
                for i_cur in ir:
                    if i_cur.src.is_function_call():
                        # /!\ never remove ir calls
                        useful.add((block.label, k, i_cur))
                    if isinstance(i_cur.dst, ExprMem):
                        useful.add((block.label, k, i_cur))
                    useful.update(block.defout[k][self.IRDst])

            # Affecting return registers
            if not has_son:
                for r in self.get_out_regs(block):
                    useful.update(block.defout[-1][r]
                                  if block.defout[-1][r] else
                                  block.cur_reach[-1][r])

        return useful

    def _mark_useful_code(self):
        """Mark useful statements using previous reach analysis

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division,  Algorithm MK

        Return a set of triplets (block, instruction number, instruction) of
        useful instructions
        PRE: compute_reach(self)

        """

        useful = self.init_useful_instr()
        worklist = useful.copy()
        while worklist:
            elem = worklist.pop()
            useful.add(elem)
            irb, irs_ind, ins = elem

            block = self.blocs[irb]
            instr_defout = block.defout[irs_ind]
            cur_kill = block.cur_kill[irs_ind]
            cur_reach = block.cur_reach[irs_ind]

            # Handle dependencies of used variables in ins
            for reg in ins.get_r(True).intersection(self.ira_regs_ids()):
                worklist.update(
                    cur_reach[reg].difference(useful).difference(
                        cur_kill[reg]
                        if not instr_defout[reg] else
                        set()))
                for _, _, i in instr_defout[reg]:
                    # Loop case (i in defout of current block)
                    if i == ins:
                        worklist.update(cur_reach[reg].difference(useful))
        return useful

    def remove_dead_code(self):
        """Remove dead instructions in each block of the graph using the reach
        analysis .
        Returns True if a block has been modified
        PRE : compute_reach(self)
        """
        useful = self._mark_useful_code()
        modified = False
        for block in self.blocs.values():
            modified |= self.remove_dead_instr(block, useful)
        return modified

    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    @staticmethod
    def print_set(v_set):
        """Print each triplet contained in a set
        @v_set: set containing triplets elements
        """
        for p in v_set:
            print '    (%s, %s, %s)' % p

    def dump_bloc_state(self, irb):
        print '*'*80
        for k, irs in enumerate(irb.irs):
            for i in xrange(len(irs)):
                print 5*"-"
                print 'instr', k, irs[i]
                print 5*"-"
                for v in self.ira_regs_ids():
                    if irb.cur_reach[k][v]:
                        print 'REACH[%d][%s]' % (k, v)
                        self.print_set(irb.cur_reach[k][v])
                    if irb.cur_kill[k][v]:
                        print 'KILL[%d][%s]' % (k, v)
                        self.print_set(irb.cur_kill[k][v])
                    if irb.defout[k][v]:
                        print 'DEFOUT[%d][%s]' % (k, v)
                        self.print_set(irb.defout[k][v])

    def compute_reach_block(self, irb):
        """Variable influence computation for a single block
        @irb: irbloc instance
        PRE: init_reach()
        """

        reach_block = {key: value.copy()
                      for key, value in irb.cur_reach[0].iteritems()}

        # Compute reach from predecessors
        for n_pred in self.graph.predecessors(irb.label):
            p_block = self.blocs[n_pred]

            # Handle each register definition
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = p_block.defout[-1][c_reg].union(
                    p_block.cur_reach[-1][c_reg].difference(
                        p_block.cur_kill[-1][c_reg]))
                reach_block[c_reg].update(pred_through)

        # If a predecessor has changed
        if reach_block != irb.cur_reach[0]:
            irb.cur_reach[0] = reach_block
            for c_reg in self.ira_regs_ids():
                if irb.defout[0][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[0][c_reg].update(
                        reach_block[c_reg].difference(irb.defout[0][c_reg]))

        # Compute reach and kill for block's instructions
        for i in xrange(1, len(irb.irs)):
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = irb.defout[i - 1][c_reg].union(
                    irb.cur_reach[i - 1][c_reg].difference(
                        irb.cur_kill[i - 1][c_reg]))
                irb.cur_reach[i][c_reg].update(pred_through)
                if irb.defout[i][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[i][c_reg].update(
                        irb.cur_reach[i][c_reg].difference(
                            irb.defout[i][c_reg]))

    def _test_kill_reach_fix(self):
        """Return True iff a fixed point has been reached during reach
        analysis"""

        fixed = True
        for node in self.graph.nodes():
            if node in self.blocs:
                irb = self.blocs[node]
                if (irb.cur_reach != irb.prev_reach or
                    irb.cur_kill != irb.prev_kill):
                    fixed = False
                    irb.prev_reach = irb.cur_reach[:]
                    irb.prev_kill = irb.cur_kill[:]
        return fixed

    def compute_reach(self):
        """
        Compute reach, defout and kill sets until a fixed point is reached.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43
        """
        fixed_point = False
        log.debug('iteration...')
        while not fixed_point:
            for node in self.graph.nodes():
                if node in self.blocs:
                    self.compute_reach_block(self.blocs[node])
            fixed_point = self._test_kill_reach_fix()

    def dead_simp(self):
        """
        This function is used to analyse relation of a * complete function *
        This means the blocks under study represent a solid full function graph.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43
        """
        # Update r/w variables for all irblocs
        self.get_rw(self.ira_regs_ids())
        # Liveness step
        self.compute_reach()
        self.remove_dead_code()
        # Simplify expressions
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")