about summary refs log tree commit diff stats
path: root/miasm2/jitter/codegen.py
diff options
context:
space:
mode:
authorFabrice Desclaux <fabrice.desclaux@cea.fr>2016-07-30 00:12:33 +0200
committerFabrice Desclaux <fabrice.desclaux@cea.fr>2016-08-30 11:08:16 +0200
commitd441330ab409cfb0a8d3e0ebcfccafef54c32cd0 (patch)
treeb0dda66bbed78a243b6d32ec8760dd69ec5e367a /miasm2/jitter/codegen.py
parentf2a9a353b32bf59a06b8738ab05e1d20109f71c9 (diff)
downloadmiasm-d441330ab409cfb0a8d3e0ebcfccafef54c32cd0.tar.gz
miasm-d441330ab409cfb0a8d3e0ebcfccafef54c32cd0.zip
Jitter: code generator rework
Diffstat (limited to 'miasm2/jitter/codegen.py')
-rw-r--r--miasm2/jitter/codegen.py567
1 files changed, 567 insertions, 0 deletions
diff --git a/miasm2/jitter/codegen.py b/miasm2/jitter/codegen.py
new file mode 100644
index 00000000..c1f1e392
--- /dev/null
+++ b/miasm2/jitter/codegen.py
@@ -0,0 +1,567 @@
+import miasm2.expression.expression as m2_expr
+from miasm2.ir.ir import irbloc
+from miasm2.ir.translators import Translator
+from miasm2.core.asmbloc import expr_is_label, asm_block_bad, asm_label
+
+# Miasm to C translator
+translator = Translator.to_language("C")
+
+PREFETCH_ID = []
+PREFETCH_ID_SIZE = {}
+for size in [8, 16, 32, 64]:
+    PREFETCH_ID_SIZE[size] = []
+    for i in xrange(20):
+        name = 'pfmem%.2d_%d' % (size, i)
+        c = m2_expr.ExprId(name, size)
+        # globals()[name] = c
+        PREFETCH_ID.append(c)
+        PREFETCH_ID_SIZE[size].append(c)
+
+SIZE_TO_MASK = {x: 2**x - 1 for x in (1, 2, 3, 7, 8, 16, 32, 64)}
+
+MASK_INT = 0xffffffffffffffff
+
+
+class Attributes(object):
+
+    """
+    Store an irblock attributes
+    """
+
+    def __init__(self, log_mn=False, log_regs=False):
+        self.mem_read = False
+        self.mem_write = False
+        self.set_exception = False
+        self.op_set_exception = False
+        self.log_mn = log_mn
+        self.log_regs = log_regs
+        self.instr = None
+
+
+class CGen(object):
+
+    IMPLICIT_EXCEPTION_OP = set(['umod', 'udiv'])
+
+    """
+    Translate native assembly block to C
+    """
+
+    CODE_EXCEPTION_MEM_AT_INSTR = r"""
+    // except fetch mem at instr noauto
+    if ((VM_exception_flag & ~EXCEPT_CODE_AUTOMOD) & EXCEPT_DO_NOT_UPDATE_PC) {
+        %s = %s;
+        BlockDst->address = %s;
+        return JIT_RET_EXCEPTION;
+    }
+    """
+
+    CODE_EXCEPTION_MEM_POST_INSTR = r"""
+    // except fetch mem post instr
+    if (VM_exception_flag) {
+        %s = %s;
+        BlockDst->address = %s;
+        return JIT_RET_EXCEPTION;
+    }
+    """
+
+    CODE_EXCEPTION_AT_INSTR = r"""
+    if (CPU_exception_flag_at_instr) {
+        %s = %s;
+        BlockDst->address = %s;
+        return JIT_RET_EXCEPTION;
+    }
+    """
+
+    CODE_EXCEPTION_POST_INSTR = r"""
+    if (CPU_exception_flag) {
+        %s = %s;
+        BlockDst->address = %s;
+        return JIT_RET_EXCEPTION;
+    }
+    """
+
+    CODE_RETURN_EXCEPTION = r"""
+    return JIT_RET_EXCEPTION;
+    """
+
+    CODE_RETURN_NO_EXCEPTION = r"""
+    %s:
+    %s = %s;
+    BlockDst->address = %s;
+    return JIT_RET_NO_EXCEPTION;
+    """
+
+    CODE_CPU_EXCEPTION_POST_INSTR = r"""
+    if (CPU_exception_flag) {
+        %s = %s;
+        BlockDst->address = DST_value;
+        return JIT_RET_EXCEPTION;
+    }
+    """
+
+    CODE_VM_EXCEPTION_POST_INSTR = r"""
+    if (VM_exception_flag) {
+        %s = %s;
+        BlockDst->address = DST_value;
+        return JIT_RET_EXCEPTION;
+    }
+    """
+
+    CODE_INIT = r"""
+    int DST_case;
+    unsigned long long DST_value;
+    vm_cpu_t* mycpu = (vm_cpu_t*)jitcpu->cpu;
+
+    goto %s;
+    """
+
+    CODE_BAD_BLOCK = r"""
+    // Unknown mnemonic
+    CPU_exception_flag = EXCEPT_UNK_MNEMO;
+    """ + CODE_RETURN_EXCEPTION
+
+    def __init__(self, ir_arch):
+        self.ir_arch = ir_arch
+        self.PC = self.ir_arch.pc
+        self.init_arch_C()
+
+    def init_arch_C(self):
+        self.id_to_c_id = {}
+        for reg in self.ir_arch.arch.regs.all_regs_ids + PREFETCH_ID:
+            self.id_to_c_id[reg] = m2_expr.ExprId('mycpu->%s' % reg, reg.size)
+
+        self.id_to_new_c_id = {}
+
+        for reg in self.ir_arch.arch.regs.all_regs_ids + PREFETCH_ID:
+            self.id_to_new_c_id[reg] = m2_expr.ExprId(
+                'mycpu->%s_new' % reg, reg.size)
+
+        self.C_PC = self.id_to_c(self.PC)
+
+    def dst_to_c(self, src):
+        if not isinstance(src, m2_expr.Expr):
+            src = m2_expr.ExprInt(src, self.PC.size)
+        return self.id_to_c(src)
+
+    def patch_c_id(self, expr):
+        return expr.replace_expr(self.id_to_c_id)
+
+    def patch_c_new_id(self, expr):
+        return expr.replace_expr(self.id_to_new_c_id)
+
+    def id_to_c(self, expr):
+        return translator.from_expr(self.patch_c_id(expr))
+
+    def id_to_cnew(self, expr):
+        return translator.from_expr(self.patch_c_new_id(expr))
+
+    def get_post_instr_label(self, offset):
+        return self.ir_arch.symbol_pool.getby_name_create("lbl_gen_post_instr_%.8X" % (offset))
+
+    def add_label_index(self, dst2index, lbl):
+        dst2index[lbl] = len(dst2index)
+
+    def assignblk_to_irbloc(self, instr, assignblk):
+        """
+        Ensure IRDst is always set in the head @assignblk of the @instr
+        @assignblk: Assignblk instance
+        @instr: an instruction instance
+        """
+        if self.ir_arch.IRDst not in assignblk:
+            assignblk[self.ir_arch.IRDst] = m2_expr.ExprInt(
+                instr.offset + instr.l,
+                self.ir_arch.IRDst.size)
+
+        return irbloc(self.ir_arch.get_instr_label(instr), [assignblk])
+
+    def block2assignblks(self, block):
+        irblocks_list = []
+        for instr in block.lines:
+            assignblk_head, assignblks_extra = self.ir_arch.instr2ir(instr)
+            # Keep result in ordered list as first element is the assignblk head
+            # The remainings order is not really important
+            irblock_head = self.assignblk_to_irbloc(instr, assignblk_head)
+            irblocks = [irblock_head] + assignblks_extra
+
+            for irblock in irblocks:
+                assert irblock.dst is not None
+            irblocks_list.append(irblocks)
+        return irblocks_list
+
+    def gen_mem_prefetch(self, assignblk, mems_to_prefetch):
+        out = []
+        for expr, prefetcher in sorted(mems_to_prefetch.iteritems()):
+            str_src = self.id_to_c(expr)
+            str_dst = self.id_to_c(prefetcher)
+            out.append('%s = %s;' % (str_dst, str_src))
+        assignblk.C_prefetch = out
+        return out
+
+    def gen_assignments(self, assignblk, prefetchers):
+        out_main = []
+        out_mem = []
+        out_updt = []
+        id_to_update = set()
+
+        for dst, src in sorted(assignblk.iteritems()):
+            src = src.replace_expr(prefetchers)
+            if dst is self.ir_arch.IRDst:
+                pass
+            elif isinstance(dst, m2_expr.ExprId):
+                id_to_update.add(dst)
+                str_dst = self.id_to_cnew(dst)
+                if dst in self.ir_arch.arch.regs.regs_flt_expr:
+                    # Dont mask float affectation
+                    out_main.append('%s = (%s);' % (str_dst, self.id_to_c(src)))
+                else:
+                    out_main.append(
+                        '%s = (%s)&0x%X;' % (str_dst, self.id_to_c(src),
+                                             SIZE_TO_MASK[src.size]))
+            elif isinstance(dst, m2_expr.ExprMem):
+                ptr = dst.arg.replace_expr(prefetchers)
+                new_dst = m2_expr.ExprMem(ptr, dst.size)
+                str_dst = self.id_to_c(new_dst).replace('MEM_LOOKUP', 'MEM_WRITE')
+                out_mem.append('%s, %s);' % (str_dst[:-1], self.id_to_c(src)))
+            else:
+                raise ValueError("Unknown dst")
+
+        for dst, new_dst in dst_var.iteritems():
+            if dst is self.ir_arch.IRDst:
+                continue
+            out_updt.append('%s = %s;' % (self.id_to_c(expr), self.id_to_cnew(expr)))
+
+        assignblk.C_main = out_main
+        assignblk.C_mem = out_mem
+        assignblk.C_updt = out_updt
+
+    def gen_c_assignblk(self, assignblk):
+        mem_read, mem_write = False, False
+        local_prefetch = {}
+        prefetch_index = {8: 0, 16: 0, 32: 0, 64: 0}
+
+        # Prefetch memory read
+        for expr in assignblk.get_r(mem_read=True):
+            if not isinstance(expr, m2_expr.ExprMem):
+                continue
+            mem_read = True
+            index = prefetch_index[expr.size]
+            prefetch_index[expr.size] += 1
+            local_prefetch[expr] = PREFETCH_ID_SIZE[expr.size][index]
+
+        # Check if assignblk can write mem
+        mem_write = any(isinstance(expr, m2_expr.ExprMem)
+                        for expr in assignblk.get_w())
+
+        assignblk.mem_write = mem_write
+        assignblk.mem_read = mem_read
+
+        # Generate memory prefetch
+        return local_prefetch
+
+    def gen_check_memory_exception(self, address):
+        dst = self.dst_to_c(address)
+        return (self.CODE_EXCEPTION_MEM_AT_INSTR % (self.C_PC, dst, dst)).split('\n')
+
+    def gen_check_memory_exception_post(self, address):
+        dst = self.dst_to_c(address)
+        return (self.CODE_EXCEPTION_MEM_POST_INSTR % (self.C_PC, dst, dst)).split('\n')
+
+    def gen_check_cpu_exception(self, address):
+        dst = self.dst_to_c(address)
+        return (self.CODE_EXCEPTION_AT_INSTR % (self.C_PC, dst, dst)).split('\n')
+
+    def gen_check_cpu_exception_post(self, address):
+        dst = self.dst_to_c(address)
+        return (self.CODE_EXCEPTION_POST_INSTR % (self.C_PC, dst, dst)).split('\n')
+
+    def traverse_expr_dst(self, expr, dst2index):
+        """
+        Generate the index of the destination label for the @expr
+        @dst2index: dictionnary to link label to its index
+        """
+
+        if isinstance(expr, m2_expr.ExprCond):
+            cond = self.id_to_c(expr.cond)
+            src1, src1b = self.traverse_expr_dst(expr.src1, dst2index)
+            src2, src2b = self.traverse_expr_dst(expr.src2, dst2index)
+            return ("((%s)?(%s):(%s))" % (cond, src1, src2),
+                    "((%s)?(%s):(%s))" % (cond, src1b, src2b))
+        elif isinstance(expr, m2_expr.ExprInt):
+            offset = int(expr.arg)
+            self.add_label_index(dst2index, offset)
+            return ("%s" % dst2index[offset],
+                    hex(offset))
+        elif expr_is_label(expr):
+            label = expr.name
+            if label.offset != None:
+                offset = label.offset
+                self.add_label_index(dst2index, offset)
+                return ("%s" % dst2index[offset],
+                        hex(offset))
+            else:
+                self.add_label_index(dst2index, label)
+                return ("%s" % dst2index[label],
+                        "0")
+
+        elif (isinstance(expr, m2_expr.ExprId) or
+              isinstance(expr, m2_expr.ExprMem) or
+              isinstance(expr, m2_expr.ExprSlice)):
+            dst2index[expr] = -1
+            return ("-1",
+                    self.id_to_c(expr))
+        else:
+            raise RuntimeError("Unsupported IRDst type %s" % expr)
+
+    def gen_assignblk_dst(self, dst):
+        dst2index = {}
+        (ret, retb) = self.traverse_expr_dst(dst, dst2index)
+        ret = "DST_case = %s;" % ret
+        retb = "DST_value = %s;" % retb
+        return ['// %s' % dst2index,
+                '%s' % ret,
+                '%s' % retb], dst2index
+
+    def gen_post_instr_checks(self, attrib, dst):
+        out = []
+        dst = self.dst_to_c(dst)
+        if attrib.mem_read | attrib.mem_write:
+            out += (self.CODE_VM_EXCEPTION_POST_INSTR % (self.C_PC, dst)).split('\n')
+        if attrib.set_exception or attrib.op_set_exception:
+            out += (self.CODE_CPU_EXCEPTION_POST_INSTR % (self.C_PC, dst)).split('\n')
+
+        return out
+
+    def gen_pre_code(self, attrib):
+        out = []
+
+        if attrib.log_mn:
+            out.append('printf("%.8X %s\\n");' % (attrib.instr.offset,
+                                                  attrib.instr))
+        return out
+
+    def gen_post_code(self, attrib):
+        out = []
+        if attrib.log_regs:
+            out.append('dump_gpregs(jitcpu->cpu);')
+        return out
+
+    def gen_goto_code(self, attrib, instr_offsets, dst):
+        out = []
+        if isinstance(dst, asm_label):
+            out.append('goto %s;' % dst.name)
+        elif dst in instr_offsets:
+            lbl = self.ir_arch.symbol_pool.getby_offset_create(dst)
+            out += self.gen_post_code(attrib)
+            out += self.gen_post_instr_checks(attrib, dst)
+            out.append('goto %s;' % lbl.name)
+        else:
+            out += self.gen_post_code(attrib)
+            out.append('BlockDst->address = DST_value;')
+            out += self.gen_post_instr_checks(attrib, dst)
+            out.append('\t\treturn JIT_RET_NO_EXCEPTION;')
+        return out
+
+    def gen_dst_goto(self, attrib, instr_offsets, dst2index):
+        """
+        Generate code for possible @dst2index.
+
+        @attrib: an Attributs instance
+        @instr_offsets: list of instructions offsets
+        @dst2index: link from dstination to index
+        """
+
+        if not dst2index:
+            return []
+        out = []
+        out.append('switch(DST_case) {')
+        for dst, index in sorted(dst2index.iteritems(), key=lambda lblindex: lblindex[1]):
+            out.append('\tcase %d:' % index)
+            out += self.gen_goto_code(attrib, instr_offsets, dst)
+            out.append('\t\tbreak;')
+        out.append('};')
+        return out
+
+    def gen_c_code(self, assignblk, c_dst):
+        """
+        Generate the C code for @assignblk.
+        @assignblk: an Assignblk instance
+        @c_dst: irdst C code
+        """
+        out = []
+
+        out.append("// Prefetch")
+        out += assignblk.C_prefetch
+        out.append("// Dst")
+        out += c_dst
+        out.append("// Main")
+        out += assignblk.C_main
+
+        out.append("// Check op/mem exceptions")
+
+        # Check memory access if assignblk has memory read
+        if assignblk.C_prefetch:
+            out += self.gen_check_memory_exception(assignblk.instr_addr)
+
+        # Check if operator raised exception flags
+        if assignblk.op_set_exception:
+            out += self.gen_check_cpu_exception(assignblk.instr_addr)
+
+        out.append("// Mem updt")
+        out += assignblk.C_mem
+
+        out.append("// Check exception Mem write")
+        # Check memory write exceptions
+        if assignblk.mem_write:
+            out += self.gen_check_memory_exception(assignblk.instr_addr)
+
+        out.append("// Updt")
+        out += assignblk.C_updt
+
+        out.append("// Checks exception")
+
+        # Check post assignblk exception flags
+        if assignblk.set_exception:
+            out += self.gen_check_cpu_exception(assignblk.instr_addr)
+
+        return out
+
+    def is_exception_operator(self, operator):
+        """Return True if the @op operator can raise a runtime exception"""
+
+        return any(operator.startswith(except_op)
+                   for except_op in self.IMPLICIT_EXCEPTION_OP)
+
+    def get_caracteristics(self, irblock):
+        """
+        Get the carateristics of each assignblk in the @irblock
+        @irblock: an irbloc instance
+        """
+
+        for assignblk in irblock.irs:
+            assignblk.mem_read, assignblk.mem_write = False, False
+            assignblk.op_set_exception = False
+            # Check explicit exception raising
+            assignblk.set_exception = self.ir_arch.arch.regs.exception_flags in assignblk
+
+            element_read = assignblk.get_r(mem_read=True)
+            # Check implicit exception raising
+            assignblk.op_set_exception = any(self.is_exception_operator(operator)
+                                             for elem in assignblk.values()
+                                             for operator in m2_expr.get_expr_ops(elem))
+            # Check mem read
+            assignblk.mem_read = any(isinstance(expr, m2_expr.ExprMem)
+                                     for expr in element_read)
+            # Check mem write
+            assignblk.mem_write = any(isinstance(dst, m2_expr.ExprMem)
+                                      for dst in assignblk)
+
+    def get_attributes(self, instr, irblocks, log_mn=False, log_regs=False):
+        """
+        Get the carateristics of each @irblocks. Returns the corresponding
+        attributes object.
+        @irblock: a list of irbloc instance
+        @log_mn: generate code to log instructions
+        @log_regs: generate code to log registers states
+        """
+
+        attrib = Attributes(log_mn, log_regs)
+
+        for irblock in irblocks:
+            for assignblk in irblock.irs:
+                self.get_caracteristics(irblock)
+                attrib.mem_read |= assignblk.mem_read
+                attrib.mem_write |= assignblk.mem_write
+                attrib.set_exception |= assignblk.set_exception
+                attrib.op_set_exception |= assignblk.op_set_exception
+        attrib.instr = instr
+        return attrib
+
+    def gen_bad_block(self):
+        """
+        Generate the C code for a bad_block instance
+        """
+        return self.CODE_BAD_BLOCK.split("\n")
+
+    def get_block_post_label(self, block):
+        last_instr = block.lines[-1]
+        offset = last_instr.offset + last_instr.l
+        return self.ir_arch.symbol_pool.getby_offset_create(offset)
+
+    def gen_init(self, block):
+        """
+        Generate the init C code for a @block
+        @block: an asm_bloc instance
+        """
+
+        instr_offsets = [line.offset for line in block.lines]
+        instr_offsets.append(self.get_block_post_label(block).offset)
+        lbl_start = self.ir_arch.symbol_pool.getby_offset_create(instr_offsets[0])
+        return (self.CODE_INIT % lbl_start.name).split("\n"), instr_offsets
+
+    def gen_irblock(self, attrib, instr_offsets, instr, irblock):
+        """
+        Generate the C code for an @irblock
+        @instr: the current instruction to translate
+        @irblock: an irbloc instance
+        @attrib: an Attributs instance
+        """
+
+        out = []
+        dst2index = None
+        for index, assignblk in enumerate(irblock.irs):
+            if index == irblock.dst_linenb:
+                c_dst, dst2index = self.gen_assignblk_dst(irblock.dst)
+            else:
+                c_dst = []
+            assignblk.instr_addr = instr.offset
+            prefetchers = self.gen_c_assignblk(assignblk)
+            self.gen_mem_prefetch(assignblk, prefetchers)
+            self.gen_assignments(assignblk, prefetchers)
+
+            out += self.gen_c_code(assignblk, c_dst)
+
+        if dst2index:
+            out.append("// Set irdst")
+            # Gen goto on irdst set
+            out += self.gen_dst_goto(attrib, instr_offsets, dst2index)
+
+        return out
+
+    def gen_finalize(self, block):
+        """
+        Generate the C code for the final block instruction
+        """
+
+        lbl = self.get_block_post_label(block)
+        dst = self.dst_to_c(lbl.offset)
+        code = self.CODE_RETURN_NO_EXCEPTION % (lbl.name, self.C_PC, dst, dst)
+        return code.split('\n')
+
+    def gen_c(self, block, log_mn=False, log_regs=False):
+        """
+        Generate the C code for the @block and return it as a list of lines
+        @log_mn: log mnemonics
+        @log_regs: log registers
+        """
+
+        if isinstance(block, asm_block_bad):
+            return self.gen_bad_block()
+        irblocks_list = self.block2assignblks(block)
+
+        out, instr_offsets = self.gen_init(block)
+
+        for instr, irblocks in zip(block.lines, irblocks_list):
+            attrib = self.get_attributes(instr, irblocks, log_mn, log_regs)
+
+            for index, irblock in enumerate(irblocks):
+                self.ir_arch.irbloc_fix_regs_for_mode(
+                    irblock, self.ir_arch.attrib)
+
+                out.append("%-40s // %.16X %s" %
+                           (str(irblock.label.name) + ":", instr.offset, instr))
+                if index == 0:
+                    out += self.gen_pre_code(attrib)
+                out += self.gen_irblock(attrib, instr_offsets, instr, irblock)
+
+        out += self.gen_finalize(block)
+        return ['\t' + line for line in out]