1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
from argparse import ArgumentParser
from miasm2.analysis.binary import Container
from miasm2.analysis.machine import Machine
from miasm2.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation
from llvmlite import ir as llvm_ir
from miasm2.expression.simplifications import expr_simp_high_to_explicit
parser = ArgumentParser("LLVM export example")
parser.add_argument("target", help="Target binary")
parser.add_argument("addr", help="Target address")
parser.add_argument("--architecture", "-a", help="Force architecture")
args = parser.parse_args()
# This part focus on obtaining an IRCFG to transform #
cont = Container.from_stream(open(args.target))
machine = Machine(args.architecture if args.architecture else cont.arch)
ir = machine.ir(cont.loc_db)
dis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)
asmcfg = dis.dis_multiblock(int(args.addr, 0))
ircfg = ir.new_ircfg_from_asmcfg(asmcfg)
ircfg.simplify(expr_simp_high_to_explicit)
######################################################
# Instanciate a context and the function to fill
context = LLVMContext_IRCompilation()
context.ir_arch = ir
func = LLVMFunction_IRCompilation(context, name="test")
func.ret_type = llvm_ir.VoidType()
func.init_fc()
# Here, as an example, we arbitrarily represent registers with global
# variables. Locals allocas are used for the computation during the function,
# and is finally saved in the aforementionned global variable.
# In other words, for each registers:
# entry:
# ...
# %reg_val_in = load i32 @REG
# %REG = alloca i32
# store i32 %reg_val_in, i32* %REG
# ...
# exit:
# ...
# %reg_val_out = load i32 %REG
# store i32 %reg_val_out, i32* @REG
# ...
all_regs = set()
for block in ircfg.blocks.itervalues():
for irs in block.assignblks:
for dst, src in irs.get_rw(mem_read=True).iteritems():
elem = src.union(set([dst]))
all_regs.update(x for x in elem
if x.is_id())
reg2glob = {}
for var in all_regs:
# alloca reg = global reg
data = context.mod.globals.get(str(var), None)
if data is None:
data = llvm_ir.GlobalVariable(context.mod, LLVMType.IntType(var.size), name=str(var))
data.initializer = LLVMType.IntType(var.size)(0)
value = func.builder.load(data)
func.local_vars_pointers[var.name] = func.builder.alloca(llvm_ir.IntType(var.size), name=var.name)
func.builder.store(value, func.local_vars_pointers[var.name])
reg2glob[var] = data
# IRCFG is imported, without the final "ret void"
func.from_ircfg(ircfg, append_ret=False)
# Finish the saving of registers (temporary version to global)
for reg, glob in reg2glob.iteritems():
value = func.builder.load(func.local_vars_pointers[reg.name])
func.builder.store(value, glob)
# Finish the function
func.builder.ret_void()
# Get it back
open("out.ll", "w").write(str(func))
# The optimized CFG can be seen with:
# $ opt -O2 -dot-cfg -S out.ll && xdot cfg.test.dot
|