about summary refs log tree commit diff stats
path: root/miasm2/ir/ir2C.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--miasm2/ir/ir2C.py601
1 files changed, 601 insertions, 0 deletions
diff --git a/miasm2/ir/ir2C.py b/miasm2/ir/ir2C.py
new file mode 100644
index 00000000..64591b44
--- /dev/null
+++ b/miasm2/ir/ir2C.py
@@ -0,0 +1,601 @@
+from miasm2.expression.expression import *
+from miasm2.expression.simplifications import expr_simp
+from miasm2.core import asmbloc
+import logging
+
+
+log_to_c_h = logging.getLogger("ir_helper")
+console_handler = logging.StreamHandler()
+console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
+log_to_c_h.addHandler(console_handler)
+log_to_c_h.setLevel(logging.WARN)
+
+
+def ExprInt_toC(self):
+    return str(self)
+
+
+def ExprId_toC(self):
+    if isinstance(self.name, asmbloc.asm_label):
+        return "0x%x" % self.name.offset
+    return str(self)
+
+
+def ExprAff_toC(self):
+    return "%s = %s" % (self.dst.toC(), self.src.toC())
+
+
+def ExprCond_toC(self):
+    return "(%s?%s:%s)" % (self.cond.toC(), self.src1.toC(), self.src2.toC())
+
+
+def ExprMem_toC(self):
+    return "MEM_LOOKUP_%.2d(vm_mngr, %s)" % (self._size, self.arg.toC())
+
+
+def ExprOp_toC(self):
+    dct_shift = {'a>>': "right_arith",
+                 '>>': "right_logic",
+                 '<<': "left_logic",
+                 'a<<': "left_logic",
+                 }
+    dct_rot = {'<<<': 'rot_left',
+               '>>>': 'rot_right',
+               }
+    dct_div = {'div8': "div_op",
+               'div16': "div_op",
+               'div32': "div_op",
+               'idiv32': "div_op",  # XXX to test
+               'rem8': "rem_op",
+               'rem16': "rem_op",
+               'rem32': "rem_op",
+               'irem32': "rem_op",  # XXX to test
+               '<<<c_rez': 'rcl_rez_op',
+               '<<<c_cf': 'rcl_cf_op',
+               '>>>c_rez': 'rcr_rez_op',
+               '>>>c_cf': 'rcr_cf_op',
+               }
+    if len(self.args) == 1:
+        if self.op == 'parity':
+            return "parity(%s&0x%x)" % (
+                self.args[0].toC(), size2mask(self.args[0].size))
+        elif self.op == '!':
+            return "(~ %s)&0x%x" % (
+                self.args[0].toC(), size2mask(self.args[0].size))
+        elif self.op in ["hex2bcd", "bcd2hex"]:
+            return "%s_%d(%s)" % (
+                self.op, self.args[0].size, self.args[0].toC())
+        elif (self.op.startswith("double_to_") or
+              self.op.endswith("_to_double")   or
+              self.op.startswith("access_")    or
+              self.op.startswith("load_")      or
+              self.op in ["-", "ftan", "frndint", "f2xm1",
+                "fsin", "fsqrt", "fabs", "fcos"]):
+            return "%s(%s)" % (self.op, self.args[0].toC())
+        else:
+            raise ValueError('unknown op: %r' % self.op)
+    elif len(self.args) == 2:
+        if self.op == "==":
+            return '(((%s&0x%x) == (%s&0x%x))?1:0)' % (
+                self.args[0].toC(), size2mask(self.args[0].size),
+                self.args[1].toC(), size2mask(self.args[1].size))
+        elif self.op in dct_shift:
+            return 'shift_%s_%.2d(%s , %s)' % (dct_shift[self.op],
+                                               self.args[0].size,
+                                               self.args[0].toC(),
+                                               self.args[1].toC())
+        elif self.is_associative():
+            o = ['(%s&0x%x)' % (a.toC(), size2mask(a.size)) for a in self.args]
+            o = str(self.op).join(o)
+            return "((%s)&0x%x)" % (o, size2mask(self.args[0].size))
+        elif self.op in ["%", "/"]:
+            o = ['(%s&0x%x)' % (a.toC(), size2mask(a.size)) for a in self.args]
+            o = str(self.op).join(o)
+            return "((%s)&0x%x)" % (o, size2mask(self.args[0].size))
+        elif self.op in ['-']:
+            return '(((%s&0x%x) %s (%s&0x%x))&0x%x)' % (
+                self.args[0].toC(), size2mask(self.args[0].size),
+                str(self.op),
+                self.args[1].toC(), size2mask(self.args[1].size),
+                size2mask(self.args[0].size))
+        elif self.op in dct_rot:
+            return '(%s(%s, %s, %s) &0x%x)' % (dct_rot[self.op],
+                                               self.args[0].size,
+                                               self.args[0].toC(),
+                                               self.args[1].toC(),
+                                               size2mask(self.args[0].size))
+        elif self.op in ['bsr', 'bsf']:
+            return 'my_%s(%s, %s)' % (self.op,
+                                      self.args[0].toC(),
+                                      self.args[1].toC())
+        elif self.op.startswith('cpuid'):
+            return "%s(%s, %s)" % (
+                self.op, self.args[0].toC(), self.args[1].toC())
+        elif self.op.startswith("fcom"):
+            return "%s(%s, %s)" % (
+                self.op, self.args[0].toC(), self.args[1].toC())
+        elif self.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale"]:
+            return "%s(%s, %s)" % (
+                self.op, self.args[0].toC(), self.args[1].toC())
+        elif self.op == "segm":
+            return "segm2addr(vmcpu, %s, %s)" % (
+                self.args[0].toC(), self.args[1].toC())
+        elif self.op in ['udiv', 'umod', 'idiv', 'imod']:
+            return '%s%d(vmcpu, %s, %s)' % (self.op,
+                                            self.args[0].size,
+                                            self.args[0].toC(),
+                                            self.args[1].toC())
+        elif self.op in ["bcdadd", "bcdadd_cf"]:
+            return "%s_%d(%s, %s)" % (self.op, self.args[0].size,
+                                      self.args[0].toC(),
+                                      self.args[1].toC())
+        else:
+            raise ValueError('unknown op: %r' % self.op)
+    elif len(self.args) == 3 and self.op in dct_div:
+        return '(%s(%s, %s, %s, %s) &0x%x)' % (dct_div[self.op],
+                                               self.args[0].size,
+                                               self.args[0].toC(),
+                                               self.args[1].toC(),
+                                               self.args[2].toC(),
+                                               size2mask(self.args[0].size))
+    elif len(self.args) >= 3 and self.is_associative():  # ?????
+        o = ['(%s&0x%x)' % (a.toC(), size2mask(a.size)) for a in self.args]
+        o = str(self.op).join(o)
+        r = "((%s)&0x%x)" % (o, size2mask(self.args[0].size))
+        return r
+    else:
+        raise NotImplementedError('unknown op: %s' % self)
+
+
+def ExprSlice_toC(self):
+    # XXX check mask for 64 bit & 32 bit compat
+    return "((%s>>%d) & 0x%X)" % (self.arg.toC(),
+                                  self.start,
+                                  (1 << (self.stop - self.start)) - 1)
+
+
+def ExprCompose_toC(self):
+    out = []
+    # XXX check mask for 64 bit & 32 bit compat
+    dst_cast = "uint%d_t" % self.size
+    for x in self.args:
+        out.append("(((%s)(%s & 0x%X)) << %d)" % (dst_cast,
+                                                  x[0].toC(),
+                  (1 << (x[2] - x[1])) - 1,
+            x[1]))
+    out = ' | '.join(out)
+    return '(' + out + ')'
+
+
+ExprInt.toC = ExprInt_toC
+ExprId.toC = ExprId_toC
+ExprAff.toC = ExprAff_toC
+ExprCond.toC = ExprCond_toC
+ExprMem.toC = ExprMem_toC
+ExprOp.toC = ExprOp_toC
+ExprSlice.toC = ExprSlice_toC
+ExprCompose.toC = ExprCompose_toC
+
+prefetch_id = []
+prefetch_id_size = {}
+for size in [8, 16, 32, 64]:
+    prefetch_id_size[size] = []
+    for i in xrange(20):
+        name = 'pfmem%.2d_%d' % (size, i)
+        c = ExprId(name, size)
+        globals()[name] = c
+        prefetch_id.append(c)
+        prefetch_id_size[size].append(c)
+
+
+reg_float_st0 = 'float_st0'
+reg_float_st1 = 'float_st1'
+reg_float_st2 = 'float_st2'
+reg_float_st3 = 'float_st3'
+reg_float_st4 = 'float_st4'
+reg_float_st5 = 'float_st5'
+reg_float_st6 = 'float_st6'
+reg_float_st7 = 'float_st7'
+
+
+float_st0 = ExprId(reg_float_st0, 64)
+float_st1 = ExprId(reg_float_st1, 64)
+float_st2 = ExprId(reg_float_st2, 64)
+float_st3 = ExprId(reg_float_st3, 64)
+float_st4 = ExprId(reg_float_st4, 64)
+float_st5 = ExprId(reg_float_st5, 64)
+float_st6 = ExprId(reg_float_st6, 64)
+float_st7 = ExprId(reg_float_st7, 64)
+
+fltregs32_str = ["ST(%d)" % i for i in xrange(8)]
+fltregs32_expr = [ExprId(x, 64) for x in fltregs32_str]
+
+
+float_id_e = [
+    float_st0,
+    float_st1,
+    float_st2,
+    float_st3,
+    float_st4,
+    float_st5,
+    float_st6,
+    float_st7,
+] + fltregs32_expr
+
+
+def init_arch_C(arch):
+    arch.id2Cid = {}
+    for x in arch.regs.all_regs_ids + prefetch_id:
+        arch.id2Cid[x] = ExprId('vmcpu->' + str(x), x.size)
+    for i in xrange(8):
+        arch.id2Cid[ExprId('ST(%d)' % i, 64)] = ExprId(
+            'vmcpu->' + "float_st%d" % i, 64)
+
+    arch.id2newCid = {}
+
+    for x in arch.regs.all_regs_ids + prefetch_id:
+        arch.id2newCid[x] = ExprId('vmcpu->%s_new' % x, x.size)
+
+    for i in xrange(8):
+        arch.id2newCid[ExprId('ST(%d)' % i, 64)] = ExprId(
+            'vmcpu->' + "float_st%d_new" % i, 64)
+
+
+def patch_c_id(arch, e):
+    return e.replace_expr(arch.id2Cid)
+
+
+def patch_c_new_id(arch, e):
+    return e.replace_expr(arch.id2newCid)
+
+
+mask_int = 0xffffffffffffffff
+
+
+pre_instr_test_exception = r"""
+// pre instruction test exception
+if (vm_mngr->exception_flags) {
+    %s;
+    RETURN_PC;
+}
+"""
+
+
+code_exception_fetch_mem_at_instr = r"""
+// except fetch mem at instr
+if (vm_mngr->exception_flags & EXCEPT_DO_NOT_UPDATE_PC) {
+    %s;
+    RETURN_PC;
+}
+"""
+code_exception_fetch_mem_post_instr = r"""
+// except fetch mem post instr
+if (vm_mngr->exception_flags) {
+    %s;
+    RETURN_PC;
+}
+"""
+
+
+code_exception_fetch_mem_at_instr_noautomod = r"""
+// except fetch mem at instr noauto
+if ((vm_mngr->exception_flags & ~EXCEPT_CODE_AUTOMOD) & EXCEPT_DO_NOT_UPDATE_PC) {
+    %s;
+    RETURN_PC;
+}
+"""
+code_exception_fetch_mem_post_instr_noautomod = r"""
+// except post instr noauto
+if (vm_mngr->exception_flags & ~EXCEPT_CODE_AUTOMOD) {
+    %s;
+    RETURN_PC;
+}
+"""
+
+
+code_exception_at_instr = r"""
+// except at instr
+if (vmcpu->exception_flags && vmcpu->exception_flags > EXCEPT_NUM_UPDT_EIP) {
+    %s;
+    RETURN_PC;
+}
+"""
+
+code_exception_post_instr = r"""
+// except post instr
+if (vmcpu->exception_flags) {
+    if (vmcpu->exception_flags > EXCEPT_NUM_UPDT_EIP)
+      %s;
+    else
+      %s;
+    RETURN_PC;
+}
+"""
+
+
+code_exception_at_instr_noautomod = r"""
+if ((vmcpu->exception_flags & ~EXCEPT_CODE_AUTOMOD) && vmcpu->exception_flags > EXCEPT_NUM_UPDT_EIP) {
+    %s;
+    RETURN_PC;
+}
+"""
+
+code_exception_post_instr_noautomod = r"""
+if (vmcpu->exception_flags & ~EXCEPT_CODE_AUTOMOD) {
+    if (vmcpu->exception_flags > EXCEPT_NUM_UPDT_EIP)
+      %s;
+    else
+      %s;
+    RETURN_PC;
+}
+"""
+
+
+my_size_mask = {1: 1, 2: 3, 3: 7, 7: 0x7f,
+                8: 0xFF,
+                16: 0xFFFF,
+                32: 0xFFFFFFFF,
+                64: 0xFFFFFFFFFFFFFFFFL}
+
+exception_flags = ExprId('exception_flags', 32)
+
+
+def set_pc(my_ir, src):
+    dst = my_ir.jit_pc
+    if not isinstance(src, Expr):
+        src = ExprInt_from(dst, src)
+    e = ExprAff(dst, src.zeroExtend(dst.size))
+    return e
+
+
+def Expr2C(my_ir, l, exprs, gen_exception_code=False):
+    id_to_update = []
+    out = ["// %s" % (l)]
+    out_pc = []
+
+    dst_dict = {}
+    src_mem = {}
+
+    prefect_index = {8: 0, 16: 0, 32: 0, 64: 0}
+    new_expr = []
+
+    e = set_pc(my_ir, l.offset & mask_int)
+    #out.append("%s;" % patch_c_id(my_ir.arch, e).toC())
+
+    pc_is_dst = False
+    fetch_mem = False
+    set_exception_flags = False
+    for e in exprs:
+        assert(isinstance(e, ExprAff))
+        assert(not isinstance(e.dst, ExprOp))
+        if isinstance(e.dst, ExprId):
+            if not e.dst in dst_dict:
+                dst_dict[e.dst] = []
+            dst_dict[e.dst].append(e)
+        else:
+            new_expr.append(e)
+        # test exception flags
+        ops = get_expr_ops(e)
+        if set(['umod', 'udiv']).intersection(ops):
+            set_exception_flags = True
+        if e.dst == exception_flags:
+            set_exception_flags = True
+            # TODO XXX test function whose set exception_flags
+
+        # search mem lookup for generate mem read prefetch
+        rs = e.src.get_r(mem_read=True)
+        for r in rs:
+            if (not isinstance(r, ExprMem)) or r in src_mem:
+                continue
+            fetch_mem = True
+            index = prefect_index[r.size]
+            prefect_index[r.size] += 1
+            pfmem = prefetch_id_size[r.size][index]
+            src_mem[r] = pfmem
+
+    for dst, exs in dst_dict.items():
+        if len(exs) == 1:
+            new_expr += exs
+            continue
+        exs = [expr_simp(x) for x in exs]
+        log_to_c_h.debug('warning: detected multi dst to same id')
+        log_to_c_h.debug('\t'.join([str(x) for x in exs]))
+        new_expr += exs
+    out_mem = []
+
+    # first, generate mem prefetch
+    mem_k = src_mem.keys()
+    mem_k.sort()
+    for k in mem_k:
+        str_src = patch_c_id(my_ir.arch, k).toC()
+        str_dst = patch_c_id(my_ir.arch, src_mem[k]).toC()
+        out.append('%s = %s;' % (str_dst, str_src))
+    src_w_len = {}
+    for k, v in src_mem.items():
+        src_w_len[k] = v
+    for e in new_expr:
+        src, dst = e.src, e.dst
+        # reload src using prefetch
+        src = src.replace_expr(src_w_len)
+        str_src = patch_c_id(my_ir.arch, src).toC()
+        str_dst = patch_c_id(my_ir.arch, dst).toC()
+        if isinstance(dst, ExprId):
+            id_to_update.append(dst)
+            str_dst = patch_c_new_id(my_ir.arch, dst)
+            if dst in float_id_e:
+                # dont mask float affectation
+                out.append('%s = (%s);' % (str_dst, str_src))
+            else:
+                out.append('%s = (%s)&0x%X;' % (str_dst, str_src,
+                                                my_size_mask[src.size]))
+        elif isinstance(dst, ExprMem):
+            fetch_mem = True
+            str_dst = str_dst.replace('MEM_LOOKUP', 'MEM_WRITE')
+            out_mem.append('%s, %s);' % (str_dst[:-1], str_src))
+
+        if e.dst == my_ir.arch.pc[my_ir.attrib]:
+            pc_is_dst = True
+            out_pc += ["RETURN_PC;"]
+
+    # if len(id_to_update) != len(set(id_to_update)):
+    # raise ValueError('Not implemented: multi dst to same id!', str([str(x)
+    # for x in exprs]))
+    out += out_mem
+
+    if gen_exception_code:
+        if fetch_mem:
+            e = set_pc(my_ir, l.offset & mask_int)
+            s1 = "%s" % patch_c_id(my_ir.arch, e).toC()
+            out.append(code_exception_fetch_mem_at_instr_noautomod % s1)
+        if set_exception_flags:
+            e = set_pc(my_ir, l.offset & mask_int)
+            s1 = "%s" % patch_c_id(my_ir.arch, e).toC()
+            out.append(code_exception_at_instr_noautomod % s1)
+
+    for i in id_to_update:
+        out.append('%s = %s;' %
+                   (patch_c_id(my_ir.arch, i), patch_c_new_id(my_ir.arch, i)))
+
+    post_instr = []
+    # test stop exec ####
+    if gen_exception_code:
+        if set_exception_flags:
+            if pc_is_dst:
+                post_instr.append("if (vm_mngr->exception_flags) { " +
+                    "/*pc = 0x%X; */RETURN_PC; }" % (l.offset))
+            else:
+                e = set_pc(my_ir, l.offset & mask_int)
+                s1 = "%s" % patch_c_id(my_ir.arch, e).toC()
+                e = set_pc(my_ir, (l.offset + l.l) & mask_int)
+                s2 = "%s" % patch_c_id(my_ir.arch, e).toC()
+                post_instr.append(
+                    code_exception_post_instr_noautomod % (s1, s2))
+
+        if fetch_mem:
+            if l.additional_info.except_on_instr:
+                offset = l.offset
+            else:
+                offset = l.offset + l.l
+
+            e = set_pc(my_ir, offset & mask_int)
+            s1 = "%s" % patch_c_id(my_ir.arch, e).toC()
+            post_instr.append(
+                code_exception_fetch_mem_post_instr_noautomod % (s1))
+
+    # pc manip after all modifications
+    return out, post_instr, post_instr + out_pc
+
+
+def label2offset(e):
+    if not isinstance(e, ExprId):
+        return e
+    if not isinstance(e.name, asmbloc.asm_label):
+        return e
+    return ExprInt_from(e, e.name.offset)
+
+
+def expr2pyobj(arch, e):
+    if isinstance(e, ExprId):
+        if isinstance(e.name, asmbloc.asm_label):
+            src_c = 'PyString_FromStringAndSize("%s", %d)' % (
+                e.name.name, len(e.name.name))
+        else:
+            src_c = 'PyLong_FromUnsignedLongLong(%s)' % patch_c_id(arch, e)
+    else:
+        raise NotImplementedError('unknown type for e: %s' % type(e))
+    return src_c
+
+
+def ir2C(my_ir, irbloc, lbl_done,
+    gen_exception_code=False, log_mn=False, log_regs=False):
+    out = []
+    # print "TRANS"
+    # print irbloc
+    out.append(["%s:" % irbloc.label.name])
+    assert(len(irbloc.irs) == len(irbloc.lines))
+    for l, exprs in zip(irbloc.lines, irbloc.irs):
+        if l.offset not in lbl_done:
+            e = set_pc(my_ir, l.offset & mask_int)
+            s1 = "%s" % patch_c_id(my_ir.arch, e).toC()
+            out.append([pre_instr_test_exception % (s1)])
+            lbl_done.add(l.offset)
+
+            if log_regs:
+                out.append([r'dump_gpregs(vmcpu);'])
+
+                if log_mn:
+                    out.append(['printf("%.8X %s\\n");' % (l.offset, str(l))])
+        # print l
+        # gen pc update
+        post_instr = ""
+        c_code, post_instr, _ = Expr2C(my_ir, l, exprs, gen_exception_code)
+        out.append(c_code + post_instr)
+    return out
+
+
+def irblocs2C(my_ir, resolvers, label, irblocs,
+    gen_exception_code=False, log_mn=False, log_regs=False):
+    out = []
+    out.append("goto %s;" % label.name)
+    bloc_labels = [x.label for x in irblocs]
+    assert(label in bloc_labels)
+
+    lbl_done = set([None])
+
+    for irbloc in irblocs:
+        # XXXX TEST
+        if irbloc.label.offset is None:
+            b_out = ir2C(my_ir, irbloc, lbl_done, gen_exception_code)
+        else:
+            b_out = ir2C(
+                my_ir, irbloc, lbl_done, gen_exception_code, log_mn, log_regs)
+        for exprs in b_out:
+            for l in exprs:
+                out.append(l)
+        dst = irbloc.dst
+        out.append("")
+        if asmbloc.expr_is_label(dst):
+            if dst.name in bloc_labels:
+                out.append("goto %s;" % dst.name.name)
+            else:
+                resolver = resolvers.get_resolver(dst.name.offset)
+
+                e = set_pc(my_ir, dst.name.offset & mask_int)
+                #out.append("%s;" % patch_c_id(my_ir.arch, e).toC())
+                out.append(resolver.ret())
+        elif isinstance(dst, ExprSlice) and isinstance(dst.arg, ExprId):
+            e = set_pc(my_ir, dst)
+            #out.append("%s;" % patch_c_id(my_ir.arch, e).toC())
+
+            e = patch_c_id(my_ir.arch, dst).toC()
+            out.append("return PyLong_FromUnsignedLongLong(%s);" % e)
+
+        elif isinstance(dst, ExprId):
+            e = set_pc(my_ir, dst)
+            #out.append("%s;" % patch_c_id(my_ir.arch, e).toC())
+
+            e = patch_c_id(my_ir.arch, dst).toC()
+            out.append("return PyLong_FromUnsignedLongLong(%s);" % e)
+        elif isinstance(dst, ExprCond):
+            dst_cond_c = patch_c_id(my_ir.arch, dst.cond).toC()
+            out.append("if (%s)" % dst_cond_c)
+
+            if dst.src1.name in bloc_labels:
+                out.append("    goto %s;" % dst.src1.name.name)
+            else:
+                resolver = resolvers.get_resolver(dst.src1.name.offset)
+                out.append(resolver.ret())
+
+            out.append("else")
+
+            if dst.src2.name in bloc_labels:
+                out.append("    goto %s;" % dst.src2.name.name)
+            else:
+                resolver = resolvers.get_resolver(dst.src2.name.offset)
+                out.append(resolver.ret())
+
+        else:
+            raise NotImplementedError('unknown type for dst: %s' % type(dst))
+    #print '\n'.join(out)
+    return out
+