diff options
| author | serpilliere <serpilliere@users.noreply.github.com> | 2018-12-20 20:05:55 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-12-20 20:05:55 +0100 |
| commit | 011e1e209b3d00afe3c8e65000209a568c5eb791 (patch) | |
| tree | d9d50826df2c94bdcc10a2e676cb4739ee685f66 | |
| parent | 75417940371bf1bf95d2f562141f2284d1f77f71 (diff) | |
| parent | bff40462030dc01ee651595370479619e15500d2 (diff) | |
| download | miasm-011e1e209b3d00afe3c8e65000209a568c5eb791.tar.gz miasm-011e1e209b3d00afe3c8e65000209a568c5eb791.zip | |
Merge pull request #904 from commial/feature/export-LLVM
Feature/export llvm
| -rw-r--r-- | example/expression/export_llvm.py | 83 | ||||
| -rw-r--r-- | miasm2/jitter/llvmconvert.py | 223 | ||||
| -rwxr-xr-x | test/test_all.py | 3 |
3 files changed, 251 insertions, 58 deletions
diff --git a/example/expression/export_llvm.py b/example/expression/export_llvm.py new file mode 100644 index 00000000..6f4ed591 --- /dev/null +++ b/example/expression/export_llvm.py @@ -0,0 +1,83 @@ +from argparse import ArgumentParser +from miasm2.analysis.binary import Container +from miasm2.analysis.machine import Machine +from miasm2.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation +from llvmlite import ir as llvm_ir +from miasm2.expression.simplifications import expr_simp_high_to_explicit + +parser = ArgumentParser("LLVM export example") +parser.add_argument("target", help="Target binary") +parser.add_argument("addr", help="Target address") +parser.add_argument("--architecture", "-a", help="Force architecture") +args = parser.parse_args() + +# This part focus on obtaining an IRCFG to transform # +cont = Container.from_stream(open(args.target)) +machine = Machine(args.architecture if args.architecture else cont.arch) +ir = machine.ir(cont.loc_db) +dis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) +asmcfg = dis.dis_multiblock(int(args.addr, 0)) +ircfg = ir.new_ircfg_from_asmcfg(asmcfg) +ircfg.simplify(expr_simp_high_to_explicit) +###################################################### + +# Instanciate a context and the function to fill +context = LLVMContext_IRCompilation() +context.ir_arch = ir + +func = LLVMFunction_IRCompilation(context, name="test") +func.ret_type = llvm_ir.VoidType() +func.init_fc() + +# Here, as an example, we arbitrarily represent registers with global +# variables. Locals allocas are used for the computation during the function, +# and is finally saved in the aforementionned global variable. + +# In other words, for each registers: +# entry: +# ... +# %reg_val_in = load i32 @REG +# %REG = alloca i32 +# store i32 %reg_val_in, i32* %REG +# ... +# exit: +# ... +# %reg_val_out = load i32 %REG +# store i32 %reg_val_out, i32* @REG +# ... + +all_regs = set() +for block in ircfg.blocks.itervalues(): + for irs in block.assignblks: + for dst, src in irs.get_rw(mem_read=True).iteritems(): + elem = src.union(set([dst])) + all_regs.update(x for x in elem + if x.is_id()) + +reg2glob = {} +for var in all_regs: + # alloca reg = global reg + data = context.mod.globals.get(str(var), None) + if data is None: + data = llvm_ir.GlobalVariable(context.mod, LLVMType.IntType(var.size), name=str(var)) + data.initializer = LLVMType.IntType(var.size)(0) + value = func.builder.load(data) + func.local_vars_pointers[var.name] = func.builder.alloca(llvm_ir.IntType(var.size), name=var.name) + func.builder.store(value, func.local_vars_pointers[var.name]) + reg2glob[var] = data + +# IRCFG is imported, without the final "ret void" +func.from_ircfg(ircfg, append_ret=False) + +# Finish the saving of registers (temporary version to global) +for reg, glob in reg2glob.iteritems(): + value = func.builder.load(func.local_vars_pointers[reg.name]) + func.builder.store(value, glob) + +# Finish the function +func.builder.ret_void() + +# Get it back +open("out.ll", "w").write(str(func)) +# The optimized CFG can be seen with: +# $ opt -O2 -dot-cfg -S out.ll && xdot cfg.test.dot diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index 78402dc7..04dc2d2b 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -15,10 +15,10 @@ import os from llvmlite import binding as llvm from llvmlite import ir as llvm_ir from miasm2.expression.expression import ExprId, ExprInt, ExprMem, ExprSlice, \ - ExprCond, ExprLoc, ExprOp, ExprCompose, LocKey + ExprCond, ExprLoc, ExprOp, ExprCompose, LocKey, Expr import miasm2.jitter.csts as m2_csts import miasm2.core.asmblock as m2_asmblock -from miasm2.jitter.codegen import CGen +from miasm2.jitter.codegen import CGen, Attributes from miasm2.expression.expression_helper import possible_values @@ -114,6 +114,7 @@ class LLVMContext(): """Create a module, with needed functions""" self.mod = llvm_ir.Module(name=name) self.add_fc(self.known_fc) + self.add_op() def get_execengine(self): "Return the Execution Engine associated with this context" @@ -140,6 +141,60 @@ class LLVMContext(): if readonly: fn.attributes.add("readonly") + def add_op(self): + "Add operations functions" + + i8 = LLVMType.IntType(8) + p8 = llvm_ir.PointerType(i8) + itype = LLVMType.IntType(64) + ftype = llvm_ir.FloatType() + dtype = llvm_ir.DoubleType() + fc = {"llvm.ctpop.i8": {"ret": i8, + "args": [i8]}, + "llvm.nearbyint.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.nearbyint.f64": {"ret": dtype, + "args": [dtype]}, + "llvm.trunc.f32": {"ret": ftype, + "args": [ftype]}, + "segm2addr": {"ret": itype, + "args": [p8, + itype, + itype]}, + "x86_cpuid": {"ret": itype, + "args": [itype, + itype]}, + "fpu_fcom_c0": {"ret": itype, + "args": [dtype, + dtype]}, + "fpu_fcom_c1": {"ret": itype, + "args": [dtype, + dtype]}, + "fpu_fcom_c2": {"ret": itype, + "args": [dtype, + dtype]}, + "fpu_fcom_c3": {"ret": itype, + "args": [dtype, + dtype]}, + "llvm.sqrt.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.sqrt.f64": {"ret": dtype, + "args": [dtype]}, + "llvm.fabs.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.fabs.f64": {"ret": dtype, + "args": [dtype]}, + } + + for k in [8, 16]: + fc["bcdadd_%s" % k] = {"ret": LLVMType.IntType(k), + "args": [LLVMType.IntType(k), + LLVMType.IntType(k)]} + fc["bcdadd_cf_%s" % k] = {"ret": LLVMType.IntType(k), + "args": [LLVMType.IntType(k), + LLVMType.IntType(k)]} + self.add_fc(fc, readonly=True) + def memory_lookup(self, func, addr, size): """Perform a memory lookup at @addr of size @size (in bit)""" @@ -187,7 +242,6 @@ class LLVMContext_JIT(LLVMContext): LLVMContext.new_module(self, name) self.add_memlookups() self.add_get_exceptionflag() - self.add_op() self.add_log_functions() def arch_specific(self): @@ -257,60 +311,6 @@ class LLVMContext_JIT(LLVMContext): self.add_fc({"get_exception_flag": {"ret": LLVMType.IntType(64), "args": [p8]}}, readonly=True) - def add_op(self): - "Add operations functions" - - i8 = LLVMType.IntType(8) - p8 = llvm_ir.PointerType(i8) - itype = LLVMType.IntType(64) - ftype = llvm_ir.FloatType() - dtype = llvm_ir.DoubleType() - fc = {"llvm.ctpop.i8": {"ret": i8, - "args": [i8]}, - "llvm.nearbyint.f32": {"ret": ftype, - "args": [ftype]}, - "llvm.nearbyint.f64": {"ret": dtype, - "args": [dtype]}, - "llvm.trunc.f32": {"ret": ftype, - "args": [ftype]}, - "segm2addr": {"ret": itype, - "args": [p8, - itype, - itype]}, - "x86_cpuid": {"ret": itype, - "args": [itype, - itype]}, - "fpu_fcom_c0": {"ret": itype, - "args": [dtype, - dtype]}, - "fpu_fcom_c1": {"ret": itype, - "args": [dtype, - dtype]}, - "fpu_fcom_c2": {"ret": itype, - "args": [dtype, - dtype]}, - "fpu_fcom_c3": {"ret": itype, - "args": [dtype, - dtype]}, - "llvm.sqrt.f32": {"ret": ftype, - "args": [ftype]}, - "llvm.sqrt.f64": {"ret": dtype, - "args": [dtype]}, - "llvm.fabs.f32": {"ret": ftype, - "args": [ftype]}, - "llvm.fabs.f64": {"ret": dtype, - "args": [dtype]}, - } - - for k in [8, 16]: - fc["bcdadd_%s" % k] = {"ret": LLVMType.IntType(k), - "args": [LLVMType.IntType(k), - LLVMType.IntType(k)]} - fc["bcdadd_cf_%s" % k] = {"ret": LLVMType.IntType(k), - "args": [LLVMType.IntType(k), - LLVMType.IntType(k)]} - self.add_fc(fc, readonly=True) - def add_log_functions(self): "Add functions for state logging" @@ -482,7 +482,7 @@ class LLVMContext_IRCompilation(LLVMContext): return builder.store(value, ptr_casted) -class LLVMFunction(): +class LLVMFunction(object): """Represent a LLVM function Implementation note: @@ -893,6 +893,22 @@ class LLVMFunction(): self.update_cache(expr, ret) return ret + unsigned_cmps = { + "==": "==", + "<u": "<", + "<=u": "<=" + } + if op in unsigned_cmps: + op = unsigned_cmps[op] + args = [self.add_ir(arg) for arg in expr.args] + ret = builder.select(builder.icmp_unsigned(op, + args[0], + args[1]), + llvm_ir.IntType(expr.size)(1), + llvm_ir.IntType(expr.size)(0)) + self.update_cache(expr, ret) + return ret + if op in [">>", "<<", "a>>"]: assert len(expr.args) == 2 # Undefined behavior must be enforced to 0 @@ -1661,3 +1677,94 @@ class LLVMFunction(): engine.finalize_object() return engine.get_function_address(self.fc.name) + + +class LLVMFunction_IRCompilation(LLVMFunction): + """LLVMFunction made for IR export, in conjunction with + LLVMContext_IRCompilation. + + This class offers only the basics, and decision must be made by the class + user on how actual registers, ABI, etc. are reflected + + + Example of use: + >>> context = LLVMContext_IRCompilation() + >>> context.ir_arch = ir + >>> + >>> func = LLVMFunction_IRCompilation(context, name="test") + >>> func.ret_type = llvm_ir.VoidType() + >>> func.init_fc() + >>> + >>> # Insert here function additionnal inits + >>> XX = func.builder.alloca(...) + >>> func.local_vars_pointers["EAX"] = XX + >>> # + >>> + >>> func.from_ircfg(ircfg) + """ + + def init_fc(self): + super(LLVMFunction_IRCompilation, self).init_fc() + + # Create a global IRDst if not any + IRDst = self.llvm_context.ir_arch.IRDst + if str(IRDst) not in self.mod.globals: + llvm_ir.GlobalVariable(self.mod, LLVMType.IntType(IRDst.size), + name=str(IRDst)) + + # Create an 'exit' basic block, the final leave + self.exit_bbl = self.append_basic_block("exit") + + def gen_jump2dst(self, _attrib, _instr_offsets, dst): + self.main_stream = False + + if isinstance(dst, Expr): + if dst.is_int(): + loc = self.llvm_context.ir_arch.loc_db.getby_offset_create(int(dst)) + dst = ExprLoc(loc, dst.size) + assert dst.is_loc() + bbl = self.get_basic_block_by_loc_key(dst.loc_key) + if bbl is not None: + # "local" jump, inside this function + self.builder.branch(bbl) + return + + # extern jump + dst = self.add_ir(dst) + + # Emulate indirect jump with: + # @IRDst = dst + # goto exit + self.builder.store(dst, self.mod.get_global("IRDst")) + self.builder.branch(self.exit_bbl) + + def gen_irblock(self, irblock): + instr_attrib = Attributes() + attributes = [Attributes() for _ in xrange(len(irblock.assignblks))] + instr_offsets = None + return super(LLVMFunction_IRCompilation, self).gen_irblock( + instr_attrib, attributes, instr_offsets, irblock + ) + + def from_ircfg(self, ircfg, append_ret=True): + # Create basic blocks + for loc_key, irblock in ircfg.blocks.iteritems(): + self.append_basic_block(loc_key) + + # Add IRBlocks + for label, irblock in ircfg.blocks.iteritems(): + self.builder.position_at_end(self.get_basic_block_by_loc_key(label)) + self.gen_irblock(irblock) + + # Branch the entry BBL on the IRCFG head + self.builder.position_at_end(self.entry_bbl) + heads = ircfg.heads() + assert len(heads) == 1 + starting_label = list(heads).pop() + self.builder.branch(self.get_basic_block_by_loc_key(starting_label)) + + # Returns with the builder on the exit block + self.builder.position_at_end(self.exit_bbl) + + if append_ret: + self.builder.ret_void() diff --git a/test/test_all.py b/test/test_all.py index 459d529e..4cb18241 100755 --- a/test/test_all.py +++ b/test/test_all.py @@ -675,6 +675,9 @@ testset += ExampleExpression(["expr_c.py"], testset += ExampleExpression(["constant_propagation.py", Example.get_sample("simple_test.bin"), "-s", "0"], products=["%s.propag.dot" % Example.get_sample("simple_test.bin")]) +testset += ExampleExpression(["export_llvm.py", "-a", "x86_32", Example.get_sample("simple_test.bin"), "0"], + products=["out.ll"]) + for script in [["basic_op.py"], ["basic_simplification.py"], |