about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorserpilliere <serpilliere@users.noreply.github.com>2018-12-20 20:05:55 +0100
committerGitHub <noreply@github.com>2018-12-20 20:05:55 +0100
commit011e1e209b3d00afe3c8e65000209a568c5eb791 (patch)
treed9d50826df2c94bdcc10a2e676cb4739ee685f66
parent75417940371bf1bf95d2f562141f2284d1f77f71 (diff)
parentbff40462030dc01ee651595370479619e15500d2 (diff)
downloadmiasm-011e1e209b3d00afe3c8e65000209a568c5eb791.tar.gz
miasm-011e1e209b3d00afe3c8e65000209a568c5eb791.zip
Merge pull request #904 from commial/feature/export-LLVM
Feature/export llvm
-rw-r--r--example/expression/export_llvm.py83
-rw-r--r--miasm2/jitter/llvmconvert.py223
-rwxr-xr-xtest/test_all.py3
3 files changed, 251 insertions, 58 deletions
diff --git a/example/expression/export_llvm.py b/example/expression/export_llvm.py
new file mode 100644
index 00000000..6f4ed591
--- /dev/null
+++ b/example/expression/export_llvm.py
@@ -0,0 +1,83 @@
+from argparse import ArgumentParser
+from miasm2.analysis.binary import Container
+from miasm2.analysis.machine import Machine
+from miasm2.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation
+from llvmlite import ir as llvm_ir
+from miasm2.expression.simplifications import expr_simp_high_to_explicit
+
+parser = ArgumentParser("LLVM export example")
+parser.add_argument("target", help="Target binary")
+parser.add_argument("addr", help="Target address")
+parser.add_argument("--architecture", "-a", help="Force architecture")
+args = parser.parse_args()
+
+# This part focus on obtaining an IRCFG to transform #
+cont = Container.from_stream(open(args.target))
+machine = Machine(args.architecture if args.architecture else cont.arch)
+ir = machine.ir(cont.loc_db)
+dis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)
+asmcfg = dis.dis_multiblock(int(args.addr, 0))
+ircfg = ir.new_ircfg_from_asmcfg(asmcfg)
+ircfg.simplify(expr_simp_high_to_explicit)
+######################################################
+
+# Instanciate a context and the function to fill
+context = LLVMContext_IRCompilation()
+context.ir_arch = ir
+
+func = LLVMFunction_IRCompilation(context, name="test")
+func.ret_type = llvm_ir.VoidType()
+func.init_fc()
+
+# Here, as an example, we arbitrarily represent registers with global
+# variables. Locals allocas are used for the computation during the function,
+# and is finally saved in the aforementionned global variable.
+
+# In other words, for each registers:
+# entry:
+#     ...
+#     %reg_val_in = load i32 @REG
+#     %REG = alloca i32
+#     store i32 %reg_val_in, i32* %REG
+#     ...
+# exit:
+#     ...
+#     %reg_val_out = load i32 %REG
+#     store i32 %reg_val_out, i32* @REG
+#     ...
+
+all_regs = set()
+for block in ircfg.blocks.itervalues():
+    for irs in block.assignblks:
+        for dst, src in irs.get_rw(mem_read=True).iteritems():
+            elem = src.union(set([dst]))
+            all_regs.update(x for x in elem
+                            if x.is_id())
+
+reg2glob = {}
+for var in all_regs:
+    # alloca reg = global reg
+    data = context.mod.globals.get(str(var), None)
+    if data is None:
+        data = llvm_ir.GlobalVariable(context.mod,  LLVMType.IntType(var.size), name=str(var))
+    data.initializer = LLVMType.IntType(var.size)(0)
+    value = func.builder.load(data)
+    func.local_vars_pointers[var.name] = func.builder.alloca(llvm_ir.IntType(var.size), name=var.name)
+    func.builder.store(value, func.local_vars_pointers[var.name])
+    reg2glob[var] = data
+
+# IRCFG is imported, without the final "ret void"
+func.from_ircfg(ircfg, append_ret=False)
+
+# Finish the saving of registers (temporary version to global)
+for reg, glob in reg2glob.iteritems():
+    value = func.builder.load(func.local_vars_pointers[reg.name])
+    func.builder.store(value, glob)
+
+# Finish the function
+func.builder.ret_void()
+
+# Get it back
+open("out.ll", "w").write(str(func))
+# The optimized CFG can be seen with:
+# $ opt -O2 -dot-cfg -S out.ll && xdot cfg.test.dot
diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py
index 78402dc7..04dc2d2b 100644
--- a/miasm2/jitter/llvmconvert.py
+++ b/miasm2/jitter/llvmconvert.py
@@ -15,10 +15,10 @@ import os
 from llvmlite import binding as llvm
 from llvmlite import ir as llvm_ir
 from miasm2.expression.expression import ExprId, ExprInt, ExprMem, ExprSlice, \
-    ExprCond, ExprLoc, ExprOp, ExprCompose, LocKey
+    ExprCond, ExprLoc, ExprOp, ExprCompose, LocKey, Expr
 import miasm2.jitter.csts as m2_csts
 import miasm2.core.asmblock as m2_asmblock
-from miasm2.jitter.codegen import CGen
+from miasm2.jitter.codegen import CGen, Attributes
 from miasm2.expression.expression_helper import possible_values
 
 
@@ -114,6 +114,7 @@ class LLVMContext():
         """Create a module, with needed functions"""
         self.mod = llvm_ir.Module(name=name)
         self.add_fc(self.known_fc)
+        self.add_op()
 
     def get_execengine(self):
         "Return the Execution Engine associated with this context"
@@ -140,6 +141,60 @@ class LLVMContext():
             if readonly:
                 fn.attributes.add("readonly")
 
+    def add_op(self):
+        "Add operations functions"
+
+        i8 = LLVMType.IntType(8)
+        p8 = llvm_ir.PointerType(i8)
+        itype = LLVMType.IntType(64)
+        ftype = llvm_ir.FloatType()
+        dtype = llvm_ir.DoubleType()
+        fc = {"llvm.ctpop.i8": {"ret": i8,
+                                "args": [i8]},
+              "llvm.nearbyint.f32": {"ret": ftype,
+                                     "args": [ftype]},
+              "llvm.nearbyint.f64": {"ret": dtype,
+                                     "args": [dtype]},
+              "llvm.trunc.f32": {"ret": ftype,
+                                 "args": [ftype]},
+              "segm2addr": {"ret": itype,
+                            "args": [p8,
+                                     itype,
+                                     itype]},
+              "x86_cpuid": {"ret": itype,
+                        "args": [itype,
+                                 itype]},
+              "fpu_fcom_c0": {"ret": itype,
+                          "args": [dtype,
+                                   dtype]},
+              "fpu_fcom_c1": {"ret": itype,
+                          "args": [dtype,
+                                   dtype]},
+              "fpu_fcom_c2": {"ret": itype,
+                          "args": [dtype,
+                                   dtype]},
+              "fpu_fcom_c3": {"ret": itype,
+                          "args": [dtype,
+                                   dtype]},
+              "llvm.sqrt.f32": {"ret": ftype,
+                                "args": [ftype]},
+              "llvm.sqrt.f64": {"ret": dtype,
+                                "args": [dtype]},
+              "llvm.fabs.f32": {"ret": ftype,
+                                "args": [ftype]},
+              "llvm.fabs.f64": {"ret": dtype,
+                                "args": [dtype]},
+        }
+
+        for k in [8, 16]:
+            fc["bcdadd_%s" % k] = {"ret": LLVMType.IntType(k),
+                                   "args": [LLVMType.IntType(k),
+                                            LLVMType.IntType(k)]}
+            fc["bcdadd_cf_%s" % k] = {"ret": LLVMType.IntType(k),
+                                      "args": [LLVMType.IntType(k),
+                                               LLVMType.IntType(k)]}
+        self.add_fc(fc, readonly=True)
+
 
     def memory_lookup(self, func, addr, size):
         """Perform a memory lookup at @addr of size @size (in bit)"""
@@ -187,7 +242,6 @@ class LLVMContext_JIT(LLVMContext):
         LLVMContext.new_module(self, name)
         self.add_memlookups()
         self.add_get_exceptionflag()
-        self.add_op()
         self.add_log_functions()
 
     def arch_specific(self):
@@ -257,60 +311,6 @@ class LLVMContext_JIT(LLVMContext):
         self.add_fc({"get_exception_flag": {"ret": LLVMType.IntType(64),
                                             "args": [p8]}}, readonly=True)
 
-    def add_op(self):
-        "Add operations functions"
-
-        i8 = LLVMType.IntType(8)
-        p8 = llvm_ir.PointerType(i8)
-        itype = LLVMType.IntType(64)
-        ftype = llvm_ir.FloatType()
-        dtype = llvm_ir.DoubleType()
-        fc = {"llvm.ctpop.i8": {"ret": i8,
-                                "args": [i8]},
-              "llvm.nearbyint.f32": {"ret": ftype,
-                                     "args": [ftype]},
-              "llvm.nearbyint.f64": {"ret": dtype,
-                                     "args": [dtype]},
-              "llvm.trunc.f32": {"ret": ftype,
-                                 "args": [ftype]},
-              "segm2addr": {"ret": itype,
-                            "args": [p8,
-                                     itype,
-                                     itype]},
-              "x86_cpuid": {"ret": itype,
-                        "args": [itype,
-                                 itype]},
-              "fpu_fcom_c0": {"ret": itype,
-                          "args": [dtype,
-                                   dtype]},
-              "fpu_fcom_c1": {"ret": itype,
-                          "args": [dtype,
-                                   dtype]},
-              "fpu_fcom_c2": {"ret": itype,
-                          "args": [dtype,
-                                   dtype]},
-              "fpu_fcom_c3": {"ret": itype,
-                          "args": [dtype,
-                                   dtype]},
-              "llvm.sqrt.f32": {"ret": ftype,
-                                "args": [ftype]},
-              "llvm.sqrt.f64": {"ret": dtype,
-                                "args": [dtype]},
-              "llvm.fabs.f32": {"ret": ftype,
-                                "args": [ftype]},
-              "llvm.fabs.f64": {"ret": dtype,
-                                "args": [dtype]},
-        }
-
-        for k in [8, 16]:
-            fc["bcdadd_%s" % k] = {"ret": LLVMType.IntType(k),
-                                   "args": [LLVMType.IntType(k),
-                                            LLVMType.IntType(k)]}
-            fc["bcdadd_cf_%s" % k] = {"ret": LLVMType.IntType(k),
-                                      "args": [LLVMType.IntType(k),
-                                               LLVMType.IntType(k)]}
-        self.add_fc(fc, readonly=True)
-
     def add_log_functions(self):
         "Add functions for state logging"
 
@@ -482,7 +482,7 @@ class LLVMContext_IRCompilation(LLVMContext):
         return builder.store(value, ptr_casted)
 
 
-class LLVMFunction():
+class LLVMFunction(object):
     """Represent a LLVM function
 
     Implementation note:
@@ -893,6 +893,22 @@ class LLVMFunction():
                 self.update_cache(expr, ret)
                 return ret
 
+            unsigned_cmps = {
+                "==": "==",
+                "<u": "<",
+                "<=u": "<="
+            }
+            if op in unsigned_cmps:
+                op = unsigned_cmps[op]
+                args = [self.add_ir(arg) for arg in expr.args]
+                ret = builder.select(builder.icmp_unsigned(op,
+                                                           args[0],
+                                                           args[1]),
+                                     llvm_ir.IntType(expr.size)(1),
+                                     llvm_ir.IntType(expr.size)(0))
+                self.update_cache(expr, ret)
+                return ret
+
             if op in [">>", "<<", "a>>"]:
                 assert len(expr.args) == 2
                 # Undefined behavior must be enforced to 0
@@ -1661,3 +1677,94 @@ class LLVMFunction():
         engine.finalize_object()
 
         return engine.get_function_address(self.fc.name)
+
+
+class LLVMFunction_IRCompilation(LLVMFunction):
+    """LLVMFunction made for IR export, in conjunction with
+    LLVMContext_IRCompilation.
+
+    This class offers only the basics, and decision must be made by the class
+    user on how actual registers, ABI, etc. are reflected
+
+
+    Example of use:
+    >>> context = LLVMContext_IRCompilation()
+    >>> context.ir_arch = ir
+    >>>
+    >>> func = LLVMFunction_IRCompilation(context, name="test")
+    >>> func.ret_type = llvm_ir.VoidType()
+    >>> func.init_fc()
+    >>>
+    >>> # Insert here function additionnal inits
+    >>> XX = func.builder.alloca(...)
+    >>> func.local_vars_pointers["EAX"] = XX
+    >>> #
+    >>>
+    >>> func.from_ircfg(ircfg)
+    """
+
+    def init_fc(self):
+        super(LLVMFunction_IRCompilation, self).init_fc()
+
+        # Create a global IRDst if not any
+        IRDst = self.llvm_context.ir_arch.IRDst
+        if str(IRDst) not in self.mod.globals:
+            llvm_ir.GlobalVariable(self.mod, LLVMType.IntType(IRDst.size),
+                                   name=str(IRDst))
+
+        # Create an 'exit' basic block, the final leave
+        self.exit_bbl = self.append_basic_block("exit")
+
+    def gen_jump2dst(self, _attrib, _instr_offsets, dst):
+        self.main_stream = False
+
+        if isinstance(dst, Expr):
+            if dst.is_int():
+                loc = self.llvm_context.ir_arch.loc_db.getby_offset_create(int(dst))
+                dst = ExprLoc(loc, dst.size)
+            assert dst.is_loc()
+            bbl = self.get_basic_block_by_loc_key(dst.loc_key)
+            if bbl is not None:
+                # "local" jump, inside this function
+                self.builder.branch(bbl)
+                return
+
+            # extern jump
+            dst = self.add_ir(dst)
+
+        # Emulate indirect jump with:
+        #   @IRDst = dst
+        #   goto exit
+        self.builder.store(dst, self.mod.get_global("IRDst"))
+        self.builder.branch(self.exit_bbl)
+
+    def gen_irblock(self, irblock):
+        instr_attrib = Attributes()
+        attributes = [Attributes() for _ in xrange(len(irblock.assignblks))]
+        instr_offsets = None
+        return super(LLVMFunction_IRCompilation, self).gen_irblock(
+            instr_attrib, attributes, instr_offsets, irblock
+        )
+
+    def from_ircfg(self, ircfg, append_ret=True):
+        # Create basic blocks
+        for loc_key, irblock in ircfg.blocks.iteritems():
+            self.append_basic_block(loc_key)
+
+        # Add IRBlocks
+        for label, irblock in ircfg.blocks.iteritems():
+            self.builder.position_at_end(self.get_basic_block_by_loc_key(label))
+            self.gen_irblock(irblock)
+
+        # Branch the entry BBL on the IRCFG head
+        self.builder.position_at_end(self.entry_bbl)
+        heads = ircfg.heads()
+        assert len(heads) == 1
+        starting_label = list(heads).pop()
+        self.builder.branch(self.get_basic_block_by_loc_key(starting_label))
+
+        # Returns with the builder on the exit block
+        self.builder.position_at_end(self.exit_bbl)
+
+        if append_ret:
+            self.builder.ret_void()
diff --git a/test/test_all.py b/test/test_all.py
index 459d529e..4cb18241 100755
--- a/test/test_all.py
+++ b/test/test_all.py
@@ -675,6 +675,9 @@ testset += ExampleExpression(["expr_c.py"],
 testset += ExampleExpression(["constant_propagation.py",
                               Example.get_sample("simple_test.bin"), "-s", "0"],
                              products=["%s.propag.dot" % Example.get_sample("simple_test.bin")])
+testset += ExampleExpression(["export_llvm.py", "-a", "x86_32", Example.get_sample("simple_test.bin"), "0"],
+                             products=["out.ll"])
+
 
 for script in [["basic_op.py"],
                ["basic_simplification.py"],