from argparse import ArgumentParser from miasm2.analysis.binary import Container from miasm2.analysis.machine import Machine from miasm2.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation from llvmlite import ir as llvm_ir from miasm2.expression.simplifications import expr_simp_high_to_explicit parser = ArgumentParser("LLVM export example") parser.add_argument("target", help="Target binary") parser.add_argument("addr", help="Target address") parser.add_argument("--architecture", "-a", help="Force architecture") args = parser.parse_args() # This part focus on obtaining an IRCFG to transform # cont = Container.from_stream(open(args.target)) machine = Machine(args.architecture if args.architecture else cont.arch) ir = machine.ir(cont.loc_db) dis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) asmcfg = dis.dis_multiblock(int(args.addr, 0)) ircfg = ir.new_ircfg_from_asmcfg(asmcfg) ircfg.simplify(expr_simp_high_to_explicit) ###################################################### # Instantiate a context and the function to fill context = LLVMContext_IRCompilation() context.ir_arch = ir func = LLVMFunction_IRCompilation(context, name="test") func.ret_type = llvm_ir.VoidType() func.init_fc() # Here, as an example, we arbitrarily represent registers with global # variables. Locals allocas are used for the computation during the function, # and is finally saved in the aforementioned global variable. # In other words, for each registers: # entry: # ... # %reg_val_in = load i32 @REG # %REG = alloca i32 # store i32 %reg_val_in, i32* %REG # ... # exit: # ... # %reg_val_out = load i32 %REG # store i32 %reg_val_out, i32* @REG # ... all_regs = set() for block in ircfg.blocks.itervalues(): for irs in block.assignblks: for dst, src in irs.get_rw(mem_read=True).iteritems(): elem = src.union(set([dst])) all_regs.update(x for x in elem if x.is_id()) reg2glob = {} for var in all_regs: # alloca reg = global reg data = context.mod.globals.get(str(var), None) if data is None: data = llvm_ir.GlobalVariable(context.mod, LLVMType.IntType(var.size), name=str(var)) data.initializer = LLVMType.IntType(var.size)(0) value = func.builder.load(data) func.local_vars_pointers[var.name] = func.builder.alloca(llvm_ir.IntType(var.size), name=var.name) func.builder.store(value, func.local_vars_pointers[var.name]) reg2glob[var] = data # IRCFG is imported, without the final "ret void" func.from_ircfg(ircfg, append_ret=False) # Finish the saving of registers (temporary version to global) for reg, glob in reg2glob.iteritems(): value = func.builder.load(func.local_vars_pointers[reg.name]) func.builder.store(value, glob) # Finish the function func.builder.ret_void() # Get it back open("out.ll", "w").write(str(func)) # The optimized CFG can be seen with: # $ opt -O2 -dot-cfg -S out.ll && xdot cfg.test.dot