""" This example demonstrates the recovering of possible C types for an arbitrary variable in an assembly code (the types are inferred from the function argument types). It also displays the C code used to access this variable. Input: * definitions of the C types that can be used by the code * layout of structures (packed/not packed) * prototype of the analyzed function Algorithm: The DepGraph of the target variable is computed, which gives possible expressions for this variable. For each DepGraph solution, if the expression depends on typed arguments, the code infers the variable type and displays the C code to access this variable. Here be dragons: For the moment, Miasm can infer C types (and generate C) for simple expressions. To summarize, Miasm only supports accesses that do not involve arithmetic or conditional expressions such as the following: * var1.field * var1[12][4] * *(var1.field->tab[4]) Unsupported forms: * var1 + var2 * var1[var2+4] * var1?var2->field:6 In the following example, we have an explicit cast for "age", from uint16_t to uint64_t, and for "height", from uint32_t to uint64_t. We are adding a naive reduction rule to support such a cast. First, in the type inference engine: ExprCompose(int, 0) => int Then, in the C generator: ExprCompose(var1, 0) => var1 """ from __future__ import print_function import sys from future.utils import viewitems, viewvalues from miasm.analysis.machine import Machine from miasm.analysis.binary import Container from miasm.expression.expression import ExprOp, ExprCompose, ExprId, ExprInt from miasm.analysis.depgraph import DependencyGraph from miasm.arch.x86.ctype import CTypeAMD64_unk from miasm.core.objc import ExprToAccessC, CHandler from miasm.core.objc import CTypesManagerNotPacked from miasm.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct from miasm.core.locationdb import LocationDB def find_call(ircfg): """Returns (irb, index) which call""" for irb in viewvalues(ircfg.blocks): out = set() if len(irb) < 2: continue assignblk = irb[-2] for src in viewvalues(assignblk): if not isinstance(src, ExprOp): continue if not src.op.startswith('call_func'): continue out.add((irb.loc_key, len(irb) - 2)) if len(out) != 1: continue loc_key, index = out.pop() yield loc_key, index class MyExprToAccessC(ExprToAccessC): """Custom ExprToAccessC to complete expression traduction to C""" def reduce_compose(self, node, **kwargs): """Custom reduction rule: {XXX, 0} -> XXX""" if not (isinstance(node.expr, ExprCompose) and len(node.expr.args) == 2 and node.expr.args[1].is_int(0)): return None found = [] for subcgenobj in node.args[0].info: found.append(subcgenobj) return found reduction_rules = ExprToAccessC.reduction_rules + [reduce_compose] def get_funcs_arg0(ctx, lifter_model_call, ircfg, lbl_head): """Compute DependencyGraph on the func @lbl_head""" g_dep = DependencyGraph(ircfg, follow_call=False) element = lifter_model_call.arch.regs.RSI for loc_key, index in find_call(ircfg): irb = ircfg.get_block(loc_key) instr = irb[index].instr print('Analysing references from:', hex(instr.offset), instr) g_list = g_dep.get(irb.loc_key, set([element]), index, set([lbl_head])) for dep in g_list: emul_result = dep.emul(lifter_model_call, ctx) value = emul_result[element] yield value class MyCHandler(CHandler): """Custom CHandler to add complementary C handling rules""" exprToAccessC_cls = MyExprToAccessC loc_db = LocationDB() data = open(sys.argv[1], 'rb').read() # Digest C information text = """ struct human { unsigned short age; unsigned int height; char name[50]; }; struct ll_human { struct ll_human* next; struct human human; }; """ base_types = CTypeAMD64_unk() types_ast = CAstTypes() types_ast.add_c_decl(text) types_mngr = CTypesManagerNotPacked(types_ast, base_types) # Analyze binary cont = Container.fallback_container(data, None, addr=0) machine = Machine("x86_64") dis_engine, lifter_model_call = machine.dis_engine, machine.lifter_model_call mdis = dis_engine(cont.bin_stream, loc_db=loc_db) addr_head = 0 asmcfg = mdis.dis_multiblock(addr_head) lbl_head = loc_db.get_offset_location(addr_head) lifter = lifter_model_call(loc_db) ircfg = lifter.new_ircfg_from_asmcfg(asmcfg) open('graph_irflow.dot', 'w').write(ircfg.dot()) # Main function's first argument's type is "struct ll_human*" ptr_llhuman = types_mngr.get_objc(CTypePtr(CTypeStruct('ll_human'))) arg0 = ExprId('ptr', 64) ctx = {lifter.arch.regs.RDI: arg0} expr_types = {arg0: (ptr_llhuman,), ExprInt(0x8A, 64): (ptr_llhuman,)} mychandler = MyCHandler(types_mngr, expr_types) for expr in get_funcs_arg0(ctx, lifter, ircfg, lbl_head): print("Access:", expr) for c_str, ctype in mychandler.expr_to_c_and_types(expr): print('\taccess:', c_str) print('\tc type:', ctype)