diff options
36 files changed, 1968 insertions, 811 deletions
diff --git a/example/expression/access_c.py b/example/expression/access_c.py index 7255e23a..f285eb55 100644 --- a/example/expression/access_c.py +++ b/example/expression/access_c.py @@ -1,16 +1,3 @@ -import sys - -from miasm2.analysis.machine import Machine -from miasm2.analysis.binary import Container -from miasm2.expression.expression import ExprOp, ExprCompose, ExprId -from miasm2.analysis.depgraph import DependencyGraph - -from miasm2.arch.x86.ctype import CTypeAMD64_unk - -from miasm2.core.objc import CTypeAnalyzer, ExprToAccessC, CHandler -from miasm2.core.objc import CTypesManagerNotPacked -from miasm2.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct - """ This example demonstrates the recovering of possible C types for an arbitrary @@ -54,6 +41,20 @@ ExprCompose(var1, 0) => var1 """ +import sys + +from miasm2.analysis.machine import Machine +from miasm2.analysis.binary import Container +from miasm2.expression.expression import ExprOp, ExprCompose, ExprId, ExprInt +from miasm2.analysis.depgraph import DependencyGraph + +from miasm2.arch.x86.ctype import CTypeAMD64_unk + +from miasm2.core.objc import ExprToAccessC, CHandler +from miasm2.core.objc import CTypesManagerNotPacked +from miasm2.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct + + def find_call(ira): """Returns (irb, index) which call""" @@ -74,24 +75,10 @@ def find_call(ira): yield irb, index -class MyCTypeAnalyzer(CTypeAnalyzer): - """Custom CTypeAnalyzer to complete type analysis""" - - def reduce_compose(self, node, _): - """Custom reduction rule: {XXX, 0} -> typeof(XXX)""" - if not (isinstance(node.expr, ExprCompose) and - len(node.expr.args) == 2 and - node.expr.args[1].is_int(0)): - return None - return node.args[0].info - - reduction_rules = CTypeAnalyzer.reduction_rules + [reduce_compose] - - class MyExprToAccessC(ExprToAccessC): """Custom ExprToAccessC to complete expression traduction to C""" - def reduce_compose(self, node, _): + def reduce_compose(self, node, **kwargs): """Custom reduction rule: {XXX, 0} -> XXX""" if not (isinstance(node.expr, ExprCompose) and len(node.expr.args) == 2 and @@ -123,7 +110,6 @@ def get_funcs_arg0(ctx, ira, lbl_head): class MyCHandler(CHandler): """Custom CHandler to add complementary C handling rules""" - cTypeAnalyzer_cls = MyCTypeAnalyzer exprToAccessC_cls = MyExprToAccessC @@ -170,17 +156,13 @@ open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot()) ptr_llhuman = types_mngr.get_objc(CTypePtr(CTypeStruct('ll_human'))) arg0 = ExprId('ptr', 64) ctx = {ir_arch_a.arch.regs.RDI: arg0} -expr_types = {arg0.name: ptr_llhuman} +expr_types = {arg0: (ptr_llhuman,), + ExprInt(0x8A, 64): (ptr_llhuman,)} mychandler = MyCHandler(types_mngr, expr_types) for expr in get_funcs_arg0(ctx, ir_arch_a, lbl_head): print "Access:", expr - target_types = mychandler.expr_to_types(expr) - for target_type in target_types: - print '\tType:', target_type - c_strs = mychandler.expr_to_c(expr) - for c_str in c_strs: - print "\tC access:", c_str - print - + for c_str, ctype in mychandler.expr_to_c_and_types(expr): + print '\taccess:', c_str + print '\tc type:', ctype diff --git a/example/expression/constant_propagation.py b/example/expression/constant_propagation.py new file mode 100644 index 00000000..70394580 --- /dev/null +++ b/example/expression/constant_propagation.py @@ -0,0 +1,54 @@ +""" +Example of "constant expression" propagation. +A "constant expression" is an expression based on constants or init regs. + +""" + +from argparse import ArgumentParser + +from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine +from miasm2.analysis.machine import Machine +from miasm2.analysis.binary import Container +from miasm2.analysis.cst_propag import propagate_cst_expr +from miasm2.analysis.data_flow import dead_simp +from miasm2.expression.simplifications import expr_simp + + +parser = ArgumentParser("Constant expression propagation") +parser.add_argument('filename', help="File to analyze") +parser.add_argument('address', help="Starting address for disassembly engine") +parser.add_argument('-s', "--simplify", action="store_true", + help="Apply simplifications rules (liveness, graph simplification, ...)") + +args = parser.parse_args() + + +machine = Machine("x86_32") + +cont = Container.from_stream(open(args.filename)) +ira, dis_engine = machine.ira, machine.dis_engine +mdis = dis_engine(cont.bin_stream) +ir_arch = ira(mdis.symbol_pool) +addr = int(args.address, 0) + + +blocks = mdis.dis_multiblock(addr) +for block in blocks: + ir_arch.add_block(block) + + +init_infos = ir_arch.arch.regs.regs_init +cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) + +if args.simplify: + ir_arch.simplify(expr_simp) + modified = True + while modified: + modified = False + modified |= dead_simp(ir_arch) + modified |= ir_arch.remove_empty_assignblks() + modified |= ir_arch.remove_jmp_blocks() + modified |= ir_arch.merge_blocks() + + +open("%s.propag.dot" % args.filename, 'w').write(ir_arch.graph.dot()) diff --git a/example/expression/expr_c.py b/example/expression/expr_c.py index 7adc7b50..ca92153a 100644 --- a/example/expression/expr_c.py +++ b/example/expression/expr_c.py @@ -41,9 +41,8 @@ types_mngr = CTypesManagerNotPacked(types_ast, base_types) ptr_rectangle = types_mngr.get_objc(CTypePtr(CTypeStruct('rectangle'))) ptr = ExprId('ptr', 64) -expr_types = {ptr.name: ptr_rectangle} - -mychandler = CHandler(types_mngr, expr_types) +c_context = {ptr.name: ptr_rectangle} +mychandler = CHandler(types_mngr, {}) # Parse some C accesses c_acceses = ["ptr->width", @@ -55,8 +54,8 @@ c_acceses = ["ptr->width", ] for c_str in c_acceses: - expr = mychandler.c_to_expr(c_str) - c_type = mychandler.c_to_type(c_str) + expr = mychandler.c_to_expr(c_str, c_context) + c_type = mychandler.c_to_type(c_str, c_context) print 'C access:', c_str print '\tExpr:', expr print '\tType:', c_type diff --git a/example/expression/expr_reduce.py b/example/expression/expr_reduce.py index b5fc96c8..bb94ceb9 100644 --- a/example/expression/expr_reduce.py +++ b/example/expression/expr_reduce.py @@ -20,7 +20,7 @@ class StructLookup(ExprReducer): FIELD_A_PTR = "FIELD_A_PTR" FIELD_A = "FIELD_A" - def reduce_int(self, node, _): + def reduce_int(self, node, **kwargs): """ Reduction: int -> CST """ @@ -28,7 +28,7 @@ class StructLookup(ExprReducer): return self.CST return None - def reduce_ptr_struct(self, node, _): + def reduce_ptr_struct(self, node, **kwargs): """ Reduction: ECX -> FIELD_A_PTR """ @@ -36,7 +36,7 @@ class StructLookup(ExprReducer): return self.FIELD_A_PTR return None - def reduce_ptr_plus_int(self, node, _): + def reduce_ptr_plus_int(self, node, **kwargs): """ Reduction: ECX + CST -> FIELD_A_PTR """ @@ -46,7 +46,7 @@ class StructLookup(ExprReducer): return self.FIELD_A_PTR return None - def reduce_cst_op(self, node, _): + def reduce_cst_op(self, node, **kwargs): """ Reduction: CST + CST -> CST """ @@ -56,7 +56,7 @@ class StructLookup(ExprReducer): return self.CST return None - def reduce_at_struct_ptr(self, node, _): + def reduce_at_struct_ptr(self, node, **kwargs): """ Reduction: @FIELD_A_PTR -> FIELD_A """ diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index c9d4c7af..438188ab 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -11,7 +11,7 @@ from miasm2.core.bin_stream import bin_stream_str from miasm2.core import asmblock from miasm2.expression.expression import get_rw from miasm2.expression.modint import uint32 -from miasm2.ir.symbexec import SymbolicExecutionEngine +from miasm2.ir.symbexec import SymbolicExecutionEngine, get_block from miasm2.expression.simplifications import expr_simp from miasm2.expression import stp from miasm2.core import parse_asm @@ -30,21 +30,6 @@ if not args: sys.exit(0) -def get_block(ir_arch, mdis, ad): - if isinstance(ad, asmblock.AsmLabel): - l = ad - else: - l = mdis.symbol_pool.getby_offset_create(ad) - if not l in ir_arch.blocks: - ad = l.offset - b = mdis.dis_block(ad) - ir_arch.add_block(b) - b = ir_arch.get_block(l) - if b is None: - raise LookupError('no block found at that address: %s' % l) - return b - - def emul_symb(ir_arch, mdis, states_todo, states_done): while states_todo: ad, symbols, conds = states_todo.pop() diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py index cb342213..7eb209cd 100644 --- a/example/ida/ctype_propagation.py +++ b/example/ida/ctype_propagation.py @@ -10,13 +10,15 @@ from miasm2.expression import expression as m2_expr from miasm2.expression.simplifications import expr_simp from miasm2.analysis.depgraph import DependencyGraph from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.arch.x86.ctype import CTypeAMD64_unk +from miasm2.arch.x86.ctype import CTypeAMD64_unk, CTypeX86_unk from miasm2.expression.expression import ExprId -from miasm2.core.objc import CTypesManagerNotPacked, CTypeAnalyzer, ExprToAccessC, CHandler +from miasm2.core.objc import CTypesManagerNotPacked, ExprToAccessC, CHandler from miasm2.core.ctypesmngr import CAstTypes from miasm2.expression.expression import ExprMem, ExprId, ExprInt, ExprOp, ExprAff from miasm2.ir.symbexec_types import SymbExecCType from miasm2.expression.parser import str_to_expr +from miasm2.ir.symbexec import SymbolicExecutionEngine, SymbolicState +from miasm2.analysis.cst_propag import add_state, propagate_cst_expr from utils import guess_machine @@ -25,7 +27,6 @@ class TypePropagationForm(ida_kernwin.Form): def __init__(self, ira): self.ira = ira - self.stk_unalias_force = False default_types_info = r"""ExprId("RDX", 64): char *""" archs = ["AMD64_unk", "X86_32_unk"] @@ -35,41 +36,31 @@ class TypePropagationForm(ida_kernwin.Form): BUTTON CANCEL NONE Dependency Graph Settings <##Header file :{headerFile}> -<Architecture/complator:{cbReg}> +<Architecture/complator:{arch}> <Types informations:{strTypesInfo}> -<Unalias stack:{rUnaliasStack}>{cMethod}> +<Unalias stack:{rUnaliasStack}>{cUnalias}> """, { 'headerFile': ida_kernwin.Form.FileInput(swidth=20, open=True), - 'cbReg': ida_kernwin.Form.DropdownListControl( + 'arch': ida_kernwin.Form.DropdownListControl( items=archs, readonly=False, selval=archs[0]), 'strTypesInfo': ida_kernwin.Form.MultiLineTextControl(text=default_types_info, flags=ida_kernwin.Form.MultiLineTextControl.TXTF_FIXEDFONT), - 'cMethod': ida_kernwin.Form.ChkGroupControl(("rUnaliasStack",)), + 'cUnalias': ida_kernwin.Form.ChkGroupControl(("rUnaliasStack",)), }) - self.Compile() + form, args = self.Compile() + form.rUnaliasStack.checked = True - @property - def unalias_stack(self): - return self.cMethod.value & 1 or self.stk_unalias_force - -def get_block(ir_arch, mdis, addr): - """Get IRBlock at address @addr""" - lbl = ir_arch.get_label(addr) - if not lbl in ir_arch.blocks: - block = mdis.dis_block(lbl.offset) - ir_arch.add_block(block) - irblock = ir_arch.get_block(lbl) - if irblock is None: - raise LookupError('No block found at that address: %s' % lbl) - return irblock - - -def get_types_mngr(headerFile): +def get_types_mngr(headerFile, arch): text = open(headerFile).read() - base_types = CTypeAMD64_unk() + if arch == "AMD64_unk": + base_types = CTypeAMD64_unk() + elif arch =="X86_32_unk": + base_types = CTypeX86_unk() + else: + raise NotImplementedError("Unsupported arch") types_ast = CAstTypes() # Add C types definition @@ -79,16 +70,11 @@ def get_types_mngr(headerFile): return types_mngr -class MyCTypeAnalyzer(CTypeAnalyzer): - allow_none_result = True - - class MyExprToAccessC(ExprToAccessC): allow_none_result = True class MyCHandler(CHandler): - cTypeAnalyzer_cls = MyCTypeAnalyzer exprToAccessC_cls = MyExprToAccessC @@ -103,52 +89,89 @@ class TypePropagationEngine(SymbExecCType): class SymbExecCTypeFix(SymbExecCType): + def __init__(self, ir_arch, + symbols, chandler, + cst_propag_link, + func_read=None, func_write=None, + sb_expr_simp=expr_simp): + super(SymbExecCTypeFix, self).__init__(ir_arch, + symbols, + chandler, + func_read=func_read, + func_write=func_write, + sb_expr_simp=expr_simp) + + self.cst_propag_link = cst_propag_link + def emulbloc(self, irb, step=False): """ Symbolic execution of the @irb on the current state @irb: irblock instance @step: display intermediate steps """ + offset2cmt = {} - for assignblk in irb.irs: + for index, assignblk in enumerate(irb.irs): + if set(assignblk) == set([self.ir_arch.IRDst, self.ir_arch.pc]): + # Don't display on jxx + continue instr = assignblk.instr - tmp_rw = assignblk.get_rw() - for dst, src in assignblk.iteritems(): - for arg in set(instr.args).union(set([src])): - if arg in tmp_rw and arg not in tmp_rw.values(): - continue - objc = self.eval_expr(arg) - if objc is None: - continue - if self.is_type_offset(objc): - continue + tmp_r = assignblk.get_r() + tmp_w = assignblk.get_w() + + todo = set() + + # Replace PC with value to match IR args + pc_fixed = {self.ir_arch.pc: m2_expr.ExprInt(instr.offset + instr.l, self.ir_arch.pc.size)} + for arg in tmp_r: + arg = expr_simp(arg.replace_expr(pc_fixed)) + if arg in tmp_w and not arg.is_mem(): + continue + todo.add(arg) + + for expr in todo: + if expr.is_int(): + continue + for c_str, c_type in self.chandler.expr_to_c_and_types(expr, self.symbols): + expr = self.cst_propag_link.get((irb.label, index), {}).get(expr, expr) offset2cmt.setdefault(instr.offset, set()).add( - "%s: %s" % (arg, str(objc))) - self.eval_ir(assignblk) + "\n%s: %s\n%s" % (expr, c_str, c_type)) + self.eval_ir(assignblk) for offset, value in offset2cmt.iteritems(): idc.MakeComm(offset, '\n'.join(value)) + print "%x\n" % offset, '\n'.join(value) return self.eval_expr(self.ir_arch.IRDst) class CTypeEngineFixer(SymbExecCTypeFix): - def __init__(self, ir_arch, types_mngr, state): + def __init__(self, ir_arch, types_mngr, state, cst_propag_link): mychandler = MyCHandler(types_mngr, state.symbols) super(CTypeEngineFixer, self).__init__(ir_arch, state.symbols, - mychandler) + mychandler, + cst_propag_link) -def add_state(ir_arch, todo, states, addr, state): - addr = ir_arch.get_label(addr) - if addr not in states: - states[addr] = state - todo.add(addr) - else: - todo.add(addr) - states[addr] = states[addr].merge(state) +def get_ira_call_fixer(ira): + + class iraCallStackFixer(ira): + + def call_effects(self, ad, instr): + print hex(instr.offset), instr + stk_before = idc.GetSpd(instr.offset) + stk_after = idc.GetSpd(instr.offset + instr.l) + stk_diff = stk_after - stk_before + print hex(stk_diff) + return [AssignBlock([ExprAff(self.ret_reg, ExprOp('call_func_ret', ad)), + ExprAff(self.sp, self.sp + ExprInt(stk_diff, self.sp.size)) + ], + instr + )] + + return iraCallStackFixer def analyse_function(): @@ -159,7 +182,11 @@ def analyse_function(): bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) - ir_arch = ira(mdis.symbol_pool) + + + iraCallStackFixer = get_ira_call_fixer(ira) + ir_arch = iraCallStackFixer(mdis.symbol_pool) + # Get the current function func = ida_funcs.get_func(idc.ScreenEA()) @@ -169,13 +196,20 @@ def analyse_function(): for block in blocks: ir_arch.add_block(block) + # Get settings settings = TypePropagationForm(ir_arch) ret = settings.Execute() if not ret: return - types_mngr = get_types_mngr(settings.headerFile.value) + cst_propag_link = {} + if settings.cUnalias.value: + init_infos = {ir_arch.sp: ir_arch.arch.regs.regs_init[ir_arch.sp] } + cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) + + + types_mngr = get_types_mngr(settings.headerFile.value, settings.arch.value) mychandler = MyCHandler(types_mngr, {}) infos_types = {} for line in settings.strTypesInfo.value.split('\n'): @@ -184,14 +218,13 @@ def analyse_function(): expr_str, ctype_str = line.split(':') expr_str, ctype_str = expr_str.strip(), ctype_str.strip() expr = str_to_expr(expr_str) - ast = mychandler.type_analyzer.types_mngr.types_ast.parse_c_type( + ast = mychandler.types_mngr.types_ast.parse_c_type( ctype_str) - ctype = mychandler.type_analyzer.types_mngr.types_ast.ast_parse_declaration(ast.ext[ - 0]) + ctype = mychandler.types_mngr.types_ast.ast_parse_declaration(ast.ext[0]) objc = types_mngr.get_objc(ctype) print '=' * 20 print expr, objc - infos_types[expr] = objc + infos_types[expr] = set([objc]) # Add fake head lbl_real_start = ir_arch.symbol_pool.getby_offset(addr) @@ -220,21 +253,18 @@ def analyse_function(): done.add((lbl, state)) symbexec_engine = TypePropagationEngine(ir_arch, types_mngr, state) - get_block(ir_arch, mdis, lbl) - + assert lbl in ir_arch.blocks addr = symbexec_engine.emul_ir_block(lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) ir_arch._graph = None sons = ir_arch.graph.successors(lbl) for son in sons: - if son.offset is None: - continue - add_state(ir_arch, todo, states, son.offset, + add_state(ir_arch, todo, states, son, symbexec_engine.get_state()) for lbl, state in states.iteritems(): - symbexec_engine = CTypeEngineFixer(ir_arch, types_mngr, state) + symbexec_engine = CTypeEngineFixer(ir_arch, types_mngr, state, cst_propag_link) addr = symbexec_engine.emul_ir_block(lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) diff --git a/example/samples/human.S b/example/samples/human.S index 750aa5b7..6cdeab0f 100644 --- a/example/samples/human.S +++ b/example/samples/human.S @@ -1,11 +1,11 @@ ;; Walk a human link list and print its information main: TEST RDI, RDI - JZ end + JZ next PUSH RBX MOV RBX, RDI -loop: +loop_arg: LEA RSI, QWORD PTR [RBX+0x10] LEA RDI, QWORD PTR [name-_+RIP] XOR EAX, EAX @@ -23,9 +23,23 @@ loop: MOV RBX, QWORD PTR [RBX] TEST RBX, RBX - JNZ loop + JNZ loop_arg POP RBX +next: + + + LEA RBX, QWORD PTR [struct_human_ptr-_+RIP] +loop_global: + CMP RBX, 0 + JZ end + + LEA RSI, QWORD PTR [RBX+0x10] + LEA RDI, QWORD PTR [name-_+RIP] + XOR EAX, EAX + CALL printf + MOV RBX, QWORD PTR [RBX] + JMP loop_global end: RET @@ -39,3 +53,5 @@ height: .string "Height: %d\n" name: .string "Name: %s\n" +struct_human_ptr: +.dword 0xdead, 0xcafe diff --git a/miasm2/analysis/cst_propag.py b/miasm2/analysis/cst_propag.py new file mode 100644 index 00000000..d55d7e60 --- /dev/null +++ b/miasm2/analysis/cst_propag.py @@ -0,0 +1,178 @@ +import logging + +from miasm2.ir.symbexec import SymbolicExecutionEngine +from miasm2.expression.expression import ExprMem +from miasm2.expression.expression_helper import possible_values +from miasm2.expression.simplifications import expr_simp +from miasm2.ir.ir import IRBlock, AssignBlock + +LOG_CST_PROPAG = logging.getLogger("cst_propag") +CONSOLE_HANDLER = logging.StreamHandler() +CONSOLE_HANDLER.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +LOG_CST_PROPAG.addHandler(CONSOLE_HANDLER) +LOG_CST_PROPAG.setLevel(logging.WARNING) + + +class SymbExecState(SymbolicExecutionEngine): + """ + State manager for SymbolicExecution + """ + def __init__(self, ir_arch, state): + super(SymbExecState, self).__init__(ir_arch, {}) + self.set_state(state) + + +def add_state(ir_arch, todo, states, addr, state): + """ + Add or merge the computed @state for the block at @addr. Update @todo + @ir_arch: IR instance + @todo: modified block set + @states: dictionnary linking a label to its entering state. + @addr: address of the concidered block + @state: computed state + """ + addr = ir_arch.get_label(addr) + todo.add(addr) + if addr not in states: + states[addr] = state + else: + states[addr] = states[addr].merge(state) + + +def is_expr_cst(ir_arch, expr): + """Return true if @expr is only composed of ExprInt and init_regs + @ir_arch: IR instance + @expr: Expression to test""" + + elements = expr.get_r(mem_read=True) + for element in elements: + if element.is_mem(): + continue + if element.is_id() and element in ir_arch.arch.regs.all_regs_ids_init: + continue + if element.is_int(): + continue + return False + else: + # Expr is a constant + return True + + +class SymbExecStateFix(SymbolicExecutionEngine): + """ + Emul blocks and replace expressions with their corresponding constant if + any. + + """ + # Function used to test if an Expression is considered as a constant + is_expr_cst = lambda _, ir_arch, expr: is_expr_cst(ir_arch, expr) + + def __init__(self, ir_arch, state, cst_propag_link): + super(SymbExecStateFix, self).__init__(ir_arch, {}) + self.set_state(state) + self.cst_propag_link = cst_propag_link + + def propag_expr_cst(self, expr): + """Propagate consttant expressions in @expr + @expr: Expression to update""" + elements = expr.get_r(mem_read=True) + to_propag = {} + for element in elements: + # Only ExprId can be safely propagated + if not element.is_id(): + continue + value = self.eval_expr(element) + if self.is_expr_cst(self.ir_arch, value): + to_propag[element] = value + return expr_simp(expr.replace_expr(to_propag)) + + def emulbloc(self, irb, step=False): + """ + Symbolic execution of the @irb on the current state + @irb: IRBlock instance + @step: display intermediate steps + """ + assignblks = [] + for index, assignblk in enumerate(irb.irs): + new_assignblk = {} + links = {} + for dst, src in assignblk.iteritems(): + src = self.propag_expr_cst(src) + if dst.is_mem(): + ptr = dst.arg + ptr = self.propag_expr_cst(ptr) + dst = ExprMem(ptr, dst.size) + new_assignblk[dst] = src + + for arg in assignblk.instr.args: + new_arg = self.propag_expr_cst(arg) + links[new_arg] = arg + self.cst_propag_link[(irb.label, index)] = links + + self.eval_ir(assignblk) + assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) + self.ir_arch.blocks[irb.label] = IRBlock(irb.label, assignblks) + + +def compute_cst_propagation_states(ir_arch, init_addr, init_infos): + """ + Propagate "constant expressions" in a function. + The attribute "constant expression" is true if the expression is based on + constants or "init" regs values. + + @ir_arch: IntermediateRepresentation instance + @init_addr: analysis start address + @init_infos: dictionnary linking expressions to their values at @init_addr + """ + + done = set() + state = SymbExecState.StateEngine(init_infos) + lbl = ir_arch.get_label(init_addr) + todo = set([lbl]) + states = {lbl: state} + + while todo: + if not todo: + break + lbl = todo.pop() + state = states[lbl] + if (lbl, state) in done: + continue + done.add((lbl, state)) + symbexec_engine = SymbExecState(ir_arch, state) + + assert lbl in ir_arch.blocks + addr = symbexec_engine.emul_ir_block(lbl) + symbexec_engine.del_mem_above_stack(ir_arch.sp) + + for dst in possible_values(addr): + value = dst.value + if value.is_mem(): + LOG_CST_PROPAG.warning('Bad destination: %s', value) + continue + elif value.is_int(): + value = ir_arch.get_label(value) + add_state(ir_arch, todo, states, value, + symbexec_engine.get_state()) + + return states + + +def propagate_cst_expr(ir_arch, addr, init_infos): + """ + Propagate "constant expressions" in a @ir_arch. + The attribute "constant expression" is true if the expression is based on + constants or "init" regs values. + + @ir_arch: IntermediateRepresentation instance + @addr: analysis start address + @init_infos: dictionnary linking expressions to their values at @init_addr + + Returns a mapping between replaced Expression and their new values. + """ + states = compute_cst_propagation_states(ir_arch, addr, init_infos) + cst_propag_link = {} + for lbl, state in states.iteritems(): + symbexec = SymbExecStateFix(ir_arch, state, cst_propag_link) + symbexec.emulbloc(ir_arch.blocks[lbl]) + return cst_propag_link diff --git a/miasm2/core/ctypesmngr.py b/miasm2/core/ctypesmngr.py index eeffb696..4bd32cb3 100644 --- a/miasm2/core/ctypesmngr.py +++ b/miasm2/core/ctypesmngr.py @@ -118,10 +118,12 @@ class CTypeStruct(CTypeBase): """C type for structure""" def __init__(self, name, fields=None): + assert name is not None self.name = name if fields is None: fields = () - for _, field in fields: + for field_name, field in fields: + assert field_name is not None assert isinstance(field, CTypeBase) self.fields = tuple(fields) super(CTypeStruct, self).__init__() @@ -146,10 +148,12 @@ class CTypeUnion(CTypeBase): """C type for union""" def __init__(self, name, fields=None): + assert name is not None self.name = name if fields is None: fields = [] - for _, field in fields: + for field_name, field in fields: + assert field_name is not None assert isinstance(field, CTypeBase) self.fields = tuple(fields) super(CTypeUnion, self).__init__() @@ -195,7 +199,7 @@ class CTypeFunc(CTypeBase): if type_ret: assert isinstance(type_ret, CTypeBase) if args: - for arg in args: + for arg_name, arg in args: assert isinstance(arg, CTypeBase) args = tuple(args) else: @@ -221,7 +225,7 @@ class CTypeFunc(CTypeBase): return "<Func:%s (%s) %s(%s)>" % (self.type_ret, self.abi, self.name, - ", ".join([str(arg) for arg in self.args])) + ", ".join(["%s %s" % (name, arg) for (name, arg) in self.args])) class CTypeEllipsis(CTypeBase): @@ -292,6 +296,7 @@ class FuncNameIdentifier(c_ast.NodeVisitor): class CAstTypes(object): """Store all defined C types and typedefs""" INTERNAL_PREFIX = "__GENTYPE__" + ANONYMOUS_PREFIX = "__ANONYMOUS__" def __init__(self, knowntypes=None, knowntypedefs=None): if knowntypes is None: @@ -342,10 +347,20 @@ class CAstTypes(object): self.cpt += 1 return self.INTERNAL_PREFIX + "%d" % cpt + def gen_anon_name(self): + """Generate name for anonymous strucs/union""" + cpt = self.cpt + self.cpt += 1 + return self.ANONYMOUS_PREFIX + "%d" % cpt + def is_generated_name(self, name): """Return True if the name is internal""" return name.startswith(self.INTERNAL_PREFIX) + def is_anonymous_name(self, name): + """Return True if the name is anonymous""" + return name.startswith(self.ANONYMOUS_PREFIX) + def add_type(self, type_id, type_obj): """Add new C type @type_id: Type descriptor (CTypeBase instance) @@ -521,7 +536,11 @@ class CAstTypes(object): args = [] if ast.decls: for arg in ast.decls: - args.append((arg.name, self.ast_to_typeid(arg))) + if arg.name is None: + arg_name = self.gen_anon_name() + else: + arg_name = arg.name + args.append((arg_name, self.ast_to_typeid(arg))) decl = CTypeStruct(name, args) return decl @@ -531,7 +550,11 @@ class CAstTypes(object): args = [] if ast.decls: for arg in ast.decls: - args.append((arg.name, self.ast_to_typeid(arg))) + if arg.name is None: + arg_name = self.gen_anon_name() + else: + arg_name = arg.name + args.append((arg_name, self.ast_to_typeid(arg))) decl = CTypeUnion(name, args) return decl @@ -568,7 +591,14 @@ class CAstTypes(object): type_ret = self.ast_to_typeid(ast.type) name, decl_info = self.get_funcname(ast.type) if ast.args: - args = [self.ast_to_typeid(arg) for arg in ast.args.params] + args = [] + for arg in ast.args.params: + typeid = self.ast_to_typeid(arg) + if isinstance(typeid, CTypeEllipsis): + arg_name = None + else: + arg_name = arg.name + args.append((arg_name, typeid)) else: args = [] diff --git a/miasm2/core/objc.py b/miasm2/core/objc.py index 917d0ea9..d6c100ca 100644 --- a/miasm2/core/objc.py +++ b/miasm2/core/objc.py @@ -6,6 +6,7 @@ C helper for Miasm: """ +import warnings from pycparser import c_parser, c_ast from miasm2.expression.expression_reduce import ExprReducer @@ -17,53 +18,123 @@ from miasm2.core.ctypesmngr import CTypeUnion, CTypeStruct, CTypeId, CTypePtr,\ PADDING_TYPE_NAME = "___padding___" +def missing_definition(objtype): + warnings.warn("Null size type: Missing definition? %r" % objtype) + +""" +Display C type +source: "The C Programming Language - 2nd Edition - Ritchie Kernighan.pdf" +p. 124 +""" + +def objc_to_str(objc, result=None): + if result is None: + result = "" + while True: + if isinstance(objc, ObjCArray): + result += "[%d]" % objc.elems + objc = objc.objtype + elif isinstance(objc, ObjCPtr): + if not result and isinstance(objc.objtype, ObjCFunc): + result = objc.objtype.name + if isinstance(objc.objtype, (ObjCPtr, ObjCDecl, ObjCStruct, ObjCUnion)): + result = "*%s" % result + else: + result = "(*%s)" % result + + objc = objc.objtype + elif isinstance(objc, (ObjCDecl, ObjCStruct, ObjCUnion)): + if result: + result = "%s %s" % (objc, result) + else: + result = str(objc) + break + elif isinstance(objc, ObjCFunc): + args_str = [] + for name, arg in objc.args: + args_str.append(objc_to_str(arg, name)) + args = ", ".join(args_str) + result += "(%s)" % args + objc = objc.type_ret + elif isinstance(objc, ObjCInt): + return "int" + elif isinstance(objc, ObjCEllipsis): + return "..." + else: + raise TypeError("Unknown c type") + return result + + class ObjC(object): """Generic ObjC""" - def set_align_size(self, align, size): - """Set C object alignment and size""" + def __init__(self, align, size): + self._align = align + self._size = size - self.align = align - self.size = size + @property + def align(self): + """Alignment (in bytes) of the C object""" + return self._align - def eq_base(self, other): - return (self.__class__ == other.__class__ and - self.align == other.align and - self.size == other.size) + @property + def size(self): + """Size (in bytes) of the C object""" + return self._size + + def cmp_base(self, other): + assert self.__class__ in OBJC_PRIO + assert other.__class__ in OBJC_PRIO + + if OBJC_PRIO[self.__class__] != OBJC_PRIO[other.__class__]: + return cmp(OBJC_PRIO[self.__class__], OBJC_PRIO[other.__class__]) + if self.align != other.align: + return cmp(self.align, other.align) + return cmp(self.size, other.size) + + def __hash__(self): + return hash((self.__class__, self._align, self._size)) + + def __str__(self): + return objc_to_str(self) class ObjCDecl(ObjC): """C Declaration identified""" def __init__(self, name, align, size): - super(ObjCDecl, self).__init__() - self.name, self.align, self.size = name, align, size + super(ObjCDecl, self).__init__(align, size) + self._name = name + + name = property(lambda self: self._name) + + def __hash__(self): + return hash((super(ObjCDecl, self).__hash__(), self._name)) def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.name) def __str__(self): - return '%s' % (self.name) + return str(self.name) - def __eq__(self, other): - if not self.eq_base(other): - return False - return self.name == other.name + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.name, other.name) class ObjCInt(ObjC): """C integer""" def __init__(self): - super(ObjCInt, self).__init__() - self.size = None - self.align = None + super(ObjCInt, self).__init__(None, None) def __str__(self): return 'int' - def __eq__(self, other): - return self.eq_base(other) + def __cmp__(self, other): + return self.cmp_base(other) class ObjCPtr(ObjC): @@ -77,37 +148,38 @@ class ObjCPtr(ObjC): @void_p_size: pointer size (in bytes) """ - super(ObjCPtr, self).__init__() + super(ObjCPtr, self).__init__(void_p_align, void_p_size) + self._lock = False + self.objtype = objtype - self.align = void_p_align - self.size = void_p_size + if objtype is None: + self._lock = False + + def get_objtype(self): + assert self._lock is True + return self._objtype + + def set_objtype(self, objtype): + assert self._lock is False + self._lock = True + self._objtype = objtype + + objtype = property(get_objtype, set_objtype) + + def __hash__(self): + # Don't try to hash on an unlocked Ptr (still mutable) + assert self._lock + return hash((super(ObjCPtr, self).__hash__(), hash(self._objtype))) def __repr__(self): return '<%s %r>' % (self.__class__.__name__, self.objtype.__class__) - def __str__(self): - target = self.objtype - if isinstance(target, ObjCDecl): - return "%s *" % target.name - elif isinstance(target, ObjCPtr): - return "%s *" % target - elif isinstance(target, ObjCStruct): - return "struct %s *" % target.name - elif isinstance(target, ObjCUnion): - return "union %s *" % target.name - elif isinstance(target, ObjCArray): - return "%s (*)[%s]" % (target.objtype, target.elems) - elif isinstance(target, ObjCFunc): - args = ", ".join([str(arg) for arg in target.args]) - return "%s (*%s)(%s)" % (target.type_ret, target.name, args) - else: - return '*%s' % (target) - - def __eq__(self, other): - if not self.eq_base(other): - return False - return self.objtype == other.objtype + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.objtype, other.objtype) class ObjCArray(ObjC): @@ -120,42 +192,42 @@ class ObjCArray(ObjC): @elems: number of elements in the array """ - super(ObjCArray, self).__init__() - self.elems = elems - self.objtype = objtype - self.align = objtype.align - self.size = elems * objtype.size + super(ObjCArray, self).__init__(objtype.align, elems * objtype.size) + self._elems = elems + self._objtype = objtype + + objtype = property(lambda self: self._objtype) + elems = property(lambda self: self._elems) + + def __hash__(self): + return hash((super(ObjCArray, self).__hash__(), self._elems, hash(self._objtype))) def __repr__(self): return '<%r[%d]>' % (self.objtype, self.elems) - def __str__(self): - return '%s[%d]' % (self.objtype, self.elems) - - def __eq__(self, other): - if not self.eq_base(other): - return False - return (self.elems == other.elems and - self.objtype == other.objtype) + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + ret = cmp(self.elems, other.elems) + if ret: + return ret + return cmp(self.objtype, other.objtype) class ObjCStruct(ObjC): """C object for structures""" - def __init__(self, name): - super(ObjCStruct, self).__init__() - self.name = name - self.fields = [] - - def add_field(self, name, objtype, offset, size): - """Add a field into the structure - @name: field name - @objtype: field type - @offset: field offset in the structure - @size: field size - """ + def __init__(self, name, align, size, fields): + super(ObjCStruct, self).__init__(align, size) + self._name = name + self._fields = tuple(fields) - self.fields.append((name, objtype, offset, size)) + name = property(lambda self: self._name) + fields = property(lambda self: self._fields) + + def __hash__(self): + return hash((super(ObjCStruct, self).__hash__(), self._name)) def __repr__(self): out = [] @@ -169,34 +241,25 @@ class ObjCStruct(ObjC): def __str__(self): return 'struct %s' % (self.name) - def __eq__(self, other): - if not (self.eq_base(other) and self.name == other.name): - return False - if len(self.fields) != len(other.fields): - return False - for field_a, field_b in zip(self.fields, other.fields): - if field_a != field_b: - return False - return True - + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.name, other.name) class ObjCUnion(ObjC): """C object for unions""" - def __init__(self, name): - super(ObjCUnion, self).__init__() - self.name = name - self.fields = [] - - def add_field(self, name, objtype, offset, size): - """Add a field into the structure - @name: field name - @objtype: field type - @offset: field offset in the structure - @size: field size - """ + def __init__(self, name, align, size, fields): + super(ObjCUnion, self).__init__(align, size) + self._name = name + self._fields = tuple(fields) - self.fields.append((name, objtype, offset, size)) + name = property(lambda self: self._name) + fields = property(lambda self: self._fields) + + def __hash__(self): + return hash((super(ObjCUnion, self).__hash__(), self._name)) def __repr__(self): out = [] @@ -210,40 +273,42 @@ class ObjCUnion(ObjC): def __str__(self): return 'union %s' % (self.name) - def __eq__(self, other): - if not (self.eq_base(other) and self.name == other.name): - return False - if len(self.fields) != len(other.fields): - return False - for field_a, field_b in zip(self.fields, other.fields): - if field_a != field_b: - return False - return True - + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.name, other.name) class ObjCEllipsis(ObjC): """C integer""" def __init__(self): - super(ObjCEllipsis, self).__init__() - self.size = None - self.align = None + super(ObjCEllipsis, self).__init__(None, None) - def __eq__(self, other): - return self.eq_base(other) + align = property(lambda self: self._align) + size = property(lambda self: self._size) + + def __cmp__(self, other): + return self.cmp_base(other) class ObjCFunc(ObjC): """C object for Functions""" def __init__(self, name, abi, type_ret, args, void_p_align, void_p_size): - super(ObjCFunc, self).__init__() - self.name = name - self.abi = abi - self.type_ret = type_ret - self.args = args - self.align = void_p_align - self.size = void_p_size + super(ObjCFunc, self).__init__(void_p_align, void_p_size) + self._name = name + self._abi = abi + self._type_ret = type_ret + self._args = tuple(args) + + args = property(lambda self: self._args) + type_ret = property(lambda self: self._type_ret) + abi = property(lambda self: self._abi) + name = property(lambda self: self._name) + + def __hash__(self): + return hash((super(ObjCFunc, self).__hash__(), hash(self._args), self._name)) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, @@ -254,20 +319,27 @@ class ObjCFunc(ObjC): out.append("Function (%s) %s: (align: %d)" % (self.abi, self.name, self.align)) out.append(" ret: %s" % (str(self.type_ret))) out.append(" Args:") - for arg in self.args: - out.append(" %s" % arg) + for name, arg in self.args: + out.append(" %s %s" % (name, arg)) return '\n'.join(out) - def __eq__(self, other): - if not (self.eq_base(other) and self.name == other.name and - self.type_ret == other.type_ret): - return False - if len(self.args) != len(other.args): - return False - for arg_a, arg_b in zip(self.args, other.args): - if arg_a != arg_b: - return False - return True + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.name, other.name) + +OBJC_PRIO = { + ObjC: 0, + ObjCDecl:1, + ObjCInt:2, + ObjCPtr:3, + ObjCArray:4, + ObjCStruct:5, + ObjCUnion:6, + ObjCEllipsis:7, + ObjCFunc:8, +} def access_simplifier(expr): @@ -342,6 +414,22 @@ class CGen(object): default_size = 64 + + def __init__(self, ctype): + self._ctype = ctype + + @property + def ctype(self): + """Type (ObjC instance) of the current object""" + return self._ctype + + def __hash__(self): + return hash(self.__class__) + + def __eq__(self, other): + return (self.__class__ == other.__class__ and + self._ctype == other.ctype) + def to_c(self): """Generate corresponding C""" @@ -358,8 +446,20 @@ class CGenInt(CGen): def __init__(self, integer): assert isinstance(integer, (int, long)) - self.integer = integer - self.ctype = ObjCInt() + self._integer = integer + super(CGenInt, self).__init__(ObjCInt()) + + @property + def integer(self): + """Value of the object""" + return self._integer + + def __hash__(self): + return hash((super(CGenInt, self).__hash__(), self._integer)) + + def __eq__(self, other): + return (super(CGenInt, self).__eq__(other) and + self._integer == other.integer) def to_c(self): """Generate corresponding C""" @@ -380,9 +480,21 @@ class CGenId(CGen): """ID of a C object""" def __init__(self, ctype, name): - self.ctype = ctype - self.name = name + self._name = name assert isinstance(name, str) + super(CGenId, self).__init__(ctype) + + @property + def name(self): + """Name of the Id""" + return self._name + + def __hash__(self): + return hash((super(CGenId, self).__hash__(), self._name)) + + def __eq__(self, other): + return (super(CGenId, self).__eq__(other) and + self._name == other.name) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, @@ -413,14 +525,32 @@ class CGenField(CGen): """ def __init__(self, struct, field, fieldtype, void_p_align, void_p_size): - self.struct = struct - self.field = field + self._struct = struct + self._field = field assert isinstance(field, str) if isinstance(fieldtype, ObjCArray): ctype = fieldtype else: ctype = ObjCPtr(fieldtype, void_p_align, void_p_size) - self.ctype = ctype + super(CGenField, self).__init__(ctype) + + @property + def struct(self): + """Structure containing the field""" + return self._struct + + @property + def field(self): + """Field name""" + return self._field + + def __hash__(self): + return hash((super(CGenField, self).__hash__(), self._struct, self._field)) + + def __eq__(self, other): + return (super(CGenField, self).__eq__(other) and + self._struct == other.struct and + self._field == other.field) def to_c(self): """Generate corresponding C""" @@ -467,8 +597,8 @@ class CGenArray(CGen): - X[] => X* """ - def __init__(self, name, element, void_p_align, void_p_size): - ctype = name.ctype + def __init__(self, base, elems, void_p_align, void_p_size): + ctype = base.ctype if isinstance(ctype, ObjCPtr): pass elif isinstance(ctype, ObjCArray) and isinstance(ctype.objtype, ObjCArray): @@ -477,21 +607,39 @@ class CGenArray(CGen): ctype = ObjCPtr(ctype.objtype, void_p_align, void_p_size) else: raise TypeError("Strange case") - self.ctype = ctype - self.name = name - self.element = element + self._base = base + self._elems = elems + super(CGenArray, self).__init__(ctype) + + @property + def base(self): + """Base object supporting the array""" + return self._base + + @property + def elems(self): + """Number of elements in the array""" + return self._elems + + def __hash__(self): + return hash((super(CGenArray, self).__hash__(), self._base, self._elems)) + + def __eq__(self, other): + return (super(CGenField, self).__eq__(other) and + self._base == other.base and + self._elems == other.elems) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, - self.name) + self.base) def to_c(self): """Generate corresponding C""" if isinstance(self.ctype, ObjCPtr): - out_str = "&((%s)[%d])" % (self.name.to_c(), self.element) + out_str = "&((%s)[%d])" % (self.base.to_c(), self.elems) elif isinstance(self.ctype, ObjCArray): - out_str = "(%s)[%d]" % (self.name.to_c(), self.element) + out_str = "(%s)[%d]" % (self.base.to_c(), self.elems) else: raise RuntimeError("Strange case") return out_str @@ -502,12 +650,12 @@ class CGenArray(CGen): if isinstance(self.ctype, ObjCPtr): return ExprOp("addr", ExprOp("[]", - self.name.to_expr(), - ExprInt(self.element, self.default_size))) + self.base.to_expr(), + ExprInt(self.elems, self.default_size))) elif isinstance(self.ctype, ObjCArray): return ExprOp("[]", - self.name.to_expr(), - ExprInt(self.element, self.default_size)) + self.base.to_expr(), + ExprInt(self.elems, self.default_size)) else: raise RuntimeError("Strange case") @@ -522,28 +670,40 @@ class CGenDeref(CGen): - X* => X """ - def __init__(self, mem): - assert isinstance(mem.ctype, ObjCPtr) - self.ctype = mem.ctype.objtype - self.mem = mem + def __init__(self, ptr): + assert isinstance(ptr.ctype, ObjCPtr) + self._ptr = ptr + super(CGenDeref, self).__init__(ptr.ctype.objtype) + + @property + def ptr(self): + """Pointer object""" + return self._ptr + + def __hash__(self): + return hash((super(CGenDeref, self).__hash__(), self._ptr)) + + def __eq__(self, other): + return (super(CGenField, self).__eq__(other) and + self._ptr == other.ptr) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, - self.mem) + self.ptr) def to_c(self): """Generate corresponding C""" - if not isinstance(self.mem.ctype, ObjCPtr): + if not isinstance(self.ptr.ctype, ObjCPtr): raise RuntimeError() - return "*(%s)" % (self.mem.to_c()) + return "*(%s)" % (self.ptr.to_c()) def to_expr(self): """Generate Miasm expression representing the C access""" - if not isinstance(self.mem.ctype, ObjCPtr): + if not isinstance(self.ptr.ctype, ObjCPtr): raise RuntimeError() - return ExprOp("deref", self.mem.to_expr()) + return ExprOp("deref", self.ptr.to_expr()) def ast_get_c_access_expr(ast, expr_types, lvl=0): @@ -615,201 +775,6 @@ def parse_access(c_access): return access -class CTypeAnalyzer(ExprReducer): - """ - Return the C type(s) of a native Miasm expression - """ - - def __init__(self, expr_types, types_mngr, enforce_strict_access=True): - """Init TypeAnalyzer - @expr_types: a dictionnary linking ID names to their types - @types_mngr: types manager - @enforce_strict_access: If false, get type even on expression - pointing to a middle of an object. If true, raise exception if such a - pointer is encountered - """ - - self.expr_types = expr_types - self.types_mngr = types_mngr - self.enforce_strict_access = enforce_strict_access - - def updt_expr_types(self, expr_types): - """Update expr_types - @expr_types: Dictionnary associating name to type - """ - - self.expr_types = expr_types - - CST = ObjCInt() - - def get_typeof(self, base_type, offset, deref, lvl=0): - """Return a list of pointers (or None) on the element at @offset of an - object of type @base_type - - In case of no @deref, stops recursion as soon as we reached the base of - an object. - In other cases, we need to go down to the final dereferenced object - - @base_type: type of main object - @offset: offset (in bytes) of the target sub object - @deref: get type for a pointer or a deref - @lvl: actual recursion level - """ - void_type = self.types_mngr.void_ptr - - if isinstance(base_type, ObjCStruct): - if offset == 0 and not deref: - # In this case, return the struct* - obj = ObjCPtr(base_type, void_type.align, void_type.size) - new_type = [obj] - return new_type - for _, subtype, f_offset, size in base_type.fields: - if not f_offset <= offset < f_offset + size: - continue - new_type = self.get_typeof( - subtype, offset - f_offset, deref, lvl + 1) - break - else: - raise RuntimeError('cannot find struct field') - elif isinstance(base_type, ObjCArray): - sub_offset = offset % (base_type.objtype.size) - element_num = offset / (base_type.objtype.size) - if element_num >= base_type.elems: - return None - if offset == 0 and not deref: - # In this case, return the array - return [base_type] - obj = self.get_typeof( - base_type.objtype, sub_offset, deref, lvl + 1) - new_type = obj - - elif isinstance(base_type, ObjCDecl): - if self.enforce_strict_access and offset != 0: - return [] - obj = ObjCPtr(base_type, void_type.align, void_type.size) - new_type = [obj] - - elif isinstance(base_type, ObjCUnion): - out = [] - if offset == 0 and not deref: - # In this case, return the struct* - obj = ObjCPtr(base_type, void_type.align, void_type.size) - new_type = [obj] - return new_type - for _, objtype, f_offset, size in base_type.fields: - if not f_offset <= offset < f_offset + size: - continue - new_type = self.get_typeof( - objtype, offset - f_offset, deref, lvl + 1) - out += new_type - new_type = out - elif isinstance(base_type, ObjCPtr): - if self.enforce_strict_access: - assert offset % base_type.size == 0 - obj = ObjCPtr(base_type, void_type.align, void_type.size) - new_type = [obj] - else: - raise NotImplementedError("deref type %r" % base_type) - return new_type - - def reduce_id(self, node, _): - """Get type of ExprId""" - if not(isinstance(node.expr, ExprId) and node.expr.name in self.expr_types): - return None - return [self.expr_types[node.expr.name]] - - def reduce_int(self, node, _): - """Get type of ExprInt""" - - if not isinstance(node.expr, ExprInt): - return None - return [self.CST] - - def get_solo_type(self, node): - """Return the type of the @node if it has only one possible type, - different from not None. In othe cases, return None. - """ - if node.info is None or len(node.info) != 1: - return None - return type(node.info[0]) - - def reduce_ptr_plus_cst(self, node, lvl): - """Get type of ptr + CST""" - - if not node.expr.is_op("+") or len(node.args) != 2: - return None - args_types = set([self.get_solo_type(node.args[0]), - self.get_solo_type(node.args[1])]) - if args_types != set([ObjCInt, ObjCPtr]): - return None - arg0, arg1 = node.args - out = [] - ptr_offset = int(arg1.expr) - for info in arg0.info: - ptr_basetype = info.objtype - # Array-like: int* ptr; ptr[1] = X - out += self.get_typeof(ptr_basetype, - ptr_offset % ptr_basetype.size, - False, - lvl) - - return out - - def reduce_cst_op_cst(self, node, _): - """Get type of CST + CST""" - - if not node.expr.is_op("+") or len(node.args) != 2: - return None - if node.args[0] is None or node.args[1] is None: - return None - args_types = set([self.get_solo_type(node.args[0]), - self.get_solo_type(node.args[1])]) - if args_types != set([ObjCInt]): - return None - return [self.CST] - - def reduce_deref(self, node, lvl): - """Get type of a dereferenced expression: - * @NN[ptr<elem>] -> elem (type) - * @64[ptr<ptr<elem>>] -> ptr<elem> - * @32[ptr<struct>] -> struct.00 - """ - - if not isinstance(node.expr, ExprMem): - return None - if node.arg.info is None: - return None - found = [] - for subtype in node.arg.info: - # subtype : ptr<elem> - if not isinstance(subtype, (ObjCPtr, ObjCArray)): - return None - target = subtype.objtype - # target : type(elem) - for ptr_target in self.get_typeof(target, 0, True, lvl): - r_target = ptr_target.objtype - # ptr_target: ptr<elem> - # r_target: elem - if (not(self.enforce_strict_access) or - r_target.size != node.expr.size / 8): - continue - found.append(r_target) - if not found: - return None - return found - - reduction_rules = [reduce_id, reduce_int, - reduce_ptr_plus_cst, reduce_cst_op_cst, - reduce_deref, - ] - - def get_type(self, expr): - """Return the C type(s) of the native Miasm expression @expr - @expr: Miasm expression""" - - return self.reduce(expr) - - class ExprToAccessC(ExprReducer): """ Generate the C access object(s) for a given native Miasm expression @@ -898,109 +863,103 @@ class ExprToAccessC(ExprReducer): OUT: - CGenArray(CGenField(toto, b), 1) """ + if base_type.size == 0: + missing_definition(base_type) + return set() + void_type = self.types_mngr.void_ptr if isinstance(base_type, ObjCStruct): - assert 0 <= offset < base_type.size + if not 0 <= offset < base_type.size: + return set() + if offset == 0 and not deref: # In this case, return the struct* - return [cgenobj] + return set([cgenobj]) - out = [] - for fieldname, subtype, f_offset, size in base_type.fields: - if not f_offset <= offset < f_offset + size: + for fieldname, subtype, field_offset, size in base_type.fields: + if not field_offset <= offset < field_offset + size: continue fieldptr = CGenField(CGenDeref(cgenobj), fieldname, subtype, void_type.align, void_type.size) - ret = self.cgen_access( - fieldptr, subtype, offset - f_offset, deref, lvl + 1) - for sname in ret: - finalobj = sname - out.append(finalobj) - new_type = out + new_type = self.cgen_access(fieldptr, subtype, + offset - field_offset, + deref, lvl + 1) break else: - raise RuntimeError('Cannot find struct field') + return set() elif isinstance(base_type, ObjCArray): + if base_type.objtype.size == 0: + missing_definition(base_type.objtype) + return set() element_num = offset / (base_type.objtype.size) - assert element_num < base_type.elems - f_offset = offset % base_type.objtype.size - cur_objtype = base_type - curobj = cgenobj - subtype = cur_objtype.objtype - if subtype == ObjCArray: - raise NotImplementedError("TODO") - else: - if f_offset != 0: - curobj = CGenArray(curobj, element_num, - void_type.align, void_type.size) - ret = self.cgen_access( - curobj, curobj.ctype.objtype, f_offset, deref, lvl + 1) - else: - curobj = CGenArray(curobj, element_num, - void_type.align, void_type.size) - ret = [curobj] - new_type = ret + field_offset = offset % base_type.objtype.size + if element_num >= base_type.elems: + return set() + if offset == 0 and not deref: + # In this case, return the array + return set([cgenobj]) + + curobj = CGenArray(cgenobj, element_num, + void_type.align, + void_type.size) + if field_offset == 0: + # We point to the start of the sub object, + # return it directly + return set([curobj]) + new_type = self.cgen_access(curobj, base_type.objtype, + field_offset, deref, lvl + 1) + elif isinstance(base_type, ObjCDecl): - if self.enforce_strict_access: - if offset % base_type.size != 0: - return [] + if self.enforce_strict_access and offset % base_type.size != 0: + return set() elem_num = offset / base_type.size nobj = CGenArray(cgenobj, elem_num, void_type.align, void_type.size) - new_type = [(nobj)] + new_type = set([nobj]) elif isinstance(base_type, ObjCUnion): - out = [] if offset == 0 and not deref: # In this case, return the struct* - return [cgenobj] + return set([cgenobj]) - for fieldname, objtype, f_offset, size in base_type.fields: - if not f_offset <= offset < f_offset + size: + out = set() + for fieldname, objtype, field_offset, size in base_type.fields: + if not field_offset <= offset < field_offset + size: continue field = CGenField(CGenDeref(cgenobj), fieldname, objtype, void_type.align, void_type.size) - new_type = self.cgen_access( - field, objtype, offset - f_offset, deref, lvl + 1) - if new_type is None: - continue - for sname in new_type: - finalobj = sname - out.append(finalobj) + out.update(self.cgen_access(field, objtype, + offset - field_offset, + deref, lvl + 1)) new_type = out elif isinstance(base_type, ObjCPtr): elem_num = offset / base_type.size - if self.enforce_strict_access: - assert offset % base_type.size == 0 - + if self.enforce_strict_access and offset % base_type.size != 0: + return set() nobj = CGenArray(cgenobj, elem_num, void_type.align, void_type.size) - new_type = [(nobj)] + new_type = set([nobj]) else: raise NotImplementedError("deref type %r" % base_type) return new_type - def reduce_id(self, node, _): - """Generate access for ExprId""" - - if not (isinstance(node.expr, ExprId) and - node.expr.name in self.expr_types): - return None - - objc = self.expr_types[node.expr.name] - out = CGenId(objc, node.expr.name) - return [out] + def reduce_known_expr(self, node, ctxt, **kwargs): + """Generate access for known expr""" + if node.expr in ctxt: + objcs = ctxt[node.expr] + return set(CGenId(objc, str(node.expr)) for objc in objcs) + return None - def reduce_int(self, node, _): + def reduce_int(self, node, **kwargs): """Generate access for ExprInt""" if not isinstance(node.expr, ExprInt): return None - return [CGenInt(int(node.expr))] + return set([CGenInt(int(node.expr))]) def get_solo_type(self, node): """Return the type of the @node if it has only one possible type, @@ -1008,33 +967,35 @@ class ExprToAccessC(ExprReducer): """ if node.info is None or len(node.info) != 1: return None - return type(node.info[0].ctype) + return type(list(node.info)[0].ctype) - def reduce_op(self, node, lvl): + def reduce_op(self, node, lvl=0, **kwargs): """Generate access for ExprOp""" - if not node.expr.is_op("+") or len(node.args) != 2: return None - args_types = set([self.get_solo_type(node.args[0]), - self.get_solo_type(node.args[1])]) - if args_types != set([ObjCInt, ObjCPtr]): + type_arg1 = self.get_solo_type(node.args[1]) + if type_arg1 != ObjCInt: return None - arg0, arg1 = node.args - out = [] + if arg0.info is None: + return None + void_type = self.types_mngr.void_ptr + out = set() ptr_offset = int(arg1.expr) - for name in arg0.info: - assert isinstance(name.ctype, ObjCPtr) - ptr_basetype = name.ctype.objtype + for info in arg0.info: + if isinstance(info.ctype, ObjCArray): + field_type = info.ctype + elif isinstance(info.ctype, ObjCPtr): + field_type = info.ctype.objtype + else: + continue + target_type = info.ctype.objtype + # Array-like: int* ptr; ptr[1] = X - ret = self.cgen_access(name, - ptr_basetype, - ptr_offset, False, lvl) - for subcgenobj in ret: - out.append(subcgenobj) + out.update(self.cgen_access(info, field_type, ptr_offset, False, lvl)) return out - def reduce_mem(self, node, lvl): + def reduce_mem(self, node, lvl=0, **kwargs): """Generate access for ExprMem: * @NN[ptr<elem>] -> elem (type) * @64[ptr<ptr<elem>>] -> ptr<elem> @@ -1045,44 +1006,61 @@ class ExprToAccessC(ExprReducer): return None if node.arg.info is None: return None - assert isinstance(node.arg.info, list) - found = [] + assert isinstance(node.arg.info, set) + void_type = self.types_mngr.void_ptr + found = set() for subcgenobj in node.arg.info: - if not isinstance(subcgenobj.ctype, ObjCPtr): - return None - target = subcgenobj.ctype.objtype - # target : type(elem) - if isinstance(target, (ObjCStruct, ObjCUnion)): - for finalcgenobj in self.cgen_access(subcgenobj, target, 0, True, lvl): - target = finalcgenobj.ctype.objtype - if not(self.enforce_strict_access) or target.size == node.expr.size / 8: - nobj = CGenDeref(finalcgenobj) - found.append(nobj) - elif isinstance(target, ObjCArray): - final = target.objtype - if not(self.enforce_strict_access) or final.size == node.expr.size / 8: - nobj = CGenDeref(subcgenobj) - found.append(nobj) - - else: - if not(self.enforce_strict_access) or target.size == node.expr.size / 8: - nobj = CGenDeref(subcgenobj) - found.append(nobj) - assert found + if isinstance(subcgenobj.ctype, ObjCArray): + nobj = CGenArray(subcgenobj, 0, + void_type.align, + void_type.size) + target = nobj.ctype.objtype + for finalcgenobj in self.cgen_access(nobj, target, 0, True, lvl): + assert isinstance(finalcgenobj.ctype, ObjCPtr) + if self.enforce_strict_access and finalcgenobj.ctype.objtype.size != node.expr.size / 8: + continue + found.add(CGenDeref(finalcgenobj)) + + elif isinstance(subcgenobj.ctype, ObjCPtr): + target = subcgenobj.ctype.objtype + # target : type(elem) + if isinstance(target, (ObjCStruct, ObjCUnion)): + for finalcgenobj in self.cgen_access(subcgenobj, target, 0, True, lvl): + target = finalcgenobj.ctype.objtype + if self.enforce_strict_access and target.size != node.expr.size / 8: + continue + found.add(CGenDeref(finalcgenobj)) + elif isinstance(target, ObjCArray): + if self.enforce_strict_access and subcgenobj.ctype.size != node.expr.size / 8: + continue + found.update(self.cgen_access(CGenDeref(subcgenobj), target, + 0, False, lvl)) + else: + if self.enforce_strict_access and target.size != node.expr.size / 8: + continue + found.add(CGenDeref(subcgenobj)) + if not found: + return None return found - reduction_rules = [reduce_id, + reduction_rules = [reduce_known_expr, reduce_int, reduce_op, reduce_mem, ] - def get_access(self, expr): + def get_accesses(self, expr, expr_context=None): """Generate C access(es) for the native Miasm expression @expr @expr: native Miasm expression + @expr_context: a dictionnary linking known expressions to their + types. An expression is linked to a tuple of types. """ - - return self.reduce(expr) + if expr_context is None: + expr_context = self.expr_types + ret = self.reduce(expr, ctxt=expr_context) + if ret.info is None: + return set() + return ret.info class ExprCToExpr(ExprReducer): @@ -1145,25 +1123,25 @@ class ExprCToExpr(ExprReducer): CST = "CST" - def reduce_id(self, node, _): - """Reduce ExprId""" - if not isinstance(node.expr, ExprId): - return None - if node.expr.name in self.expr_types: - objc = self.expr_types[node.expr.name] + def reduce_known_expr(self, node, ctxt, **kwargs): + """Reduce known expressions""" + if str(node.expr) in ctxt: + objc = ctxt[str(node.expr)] out = (node.expr, objc) - else: + elif node.expr.is_id(): out = (node.expr, None) + else: + out = None return out - def reduce_int(self, node, _): + def reduce_int(self, node, **kwargs): """Reduce ExprInt""" if not isinstance(node.expr, ExprInt): return None return self.CST - def reduce_op_memberof(self, node, _): + def reduce_op_memberof(self, node, **kwargs): """Reduce -> operator""" if not node.expr.is_op('->'): @@ -1173,6 +1151,8 @@ class ExprCToExpr(ExprReducer): assert isinstance(node.args[1].expr, ExprId) field = node.args[1].expr.name src, src_type = node.args[0].info + if src_type is None: + return None assert isinstance(src_type, (ObjCPtr, ObjCArray)) struct_dst = src_type.objtype assert isinstance(struct_dst, ObjCStruct) @@ -1192,7 +1172,7 @@ class ExprCToExpr(ExprReducer): assert found return out - def reduce_op_field(self, node, _): + def reduce_op_field(self, node, **kwargs): """Reduce field operator (Struct or Union)""" if not node.expr.is_op('field'): @@ -1245,7 +1225,7 @@ class ExprCToExpr(ExprReducer): assert found return out - def reduce_op_array(self, node, _): + def reduce_op_array(self, node, **kwargs): """Reduce array operator""" if not node.expr.is_op('[]'): @@ -1282,7 +1262,7 @@ class ExprCToExpr(ExprReducer): out = (expr, objtype) return out - def reduce_op_addr(self, node, _): + def reduce_op_addr(self, node, **kwargs): """Reduce addr operator""" if not node.expr.is_op('addr'): @@ -1309,7 +1289,7 @@ class ExprCToExpr(ExprReducer): raise NotImplementedError("unk type") return out - def reduce_op_deref(self, node, _): + def reduce_op_deref(self, node, **kwargs): """Reduce deref operator""" if not node.expr.is_op('deref'): @@ -1317,11 +1297,19 @@ class ExprCToExpr(ExprReducer): out = [] src, src_type = node.args[0].info assert isinstance(src_type, (ObjCPtr, ObjCArray)) - size = src_type.objtype.size * 8 - out = (ExprMem(src, size), (src_type.objtype)) + void_type = self.types_mngr.void_ptr + if isinstance(src_type, ObjCPtr): + if isinstance(src_type.objtype, ObjCArray): + size = void_type.size*8 + else: + size = src_type.objtype.size * 8 + out = (ExprMem(src, size), (src_type.objtype)) + else: + size = src_type.objtype.size * 8 + out = (ExprMem(src, size), (src_type.objtype)) return out - reduction_rules = [reduce_id, + reduction_rules = [reduce_known_expr, reduce_int, reduce_op_memberof, reduce_op_field, @@ -1330,14 +1318,17 @@ class ExprCToExpr(ExprReducer): reduce_op_deref, ] - def get_expr(self, expr): + def get_expr(self, expr, c_context): """Translate a Miasm expression @expr (representing a C access) into a - native Miasm expression and its C type - + tuple composed of a native Miasm expression and its C type. @expr: Miasm expression (representing a C access) + @c_context: a dictionnary linking known tokens (strings) to their + types. A token is linked to only one type. """ - - return self.reduce(expr) + ret = self.reduce(expr, ctxt=c_context) + if ret.info is None: + return (None, None) + return ret.info class CTypesManager(object): @@ -1370,22 +1361,22 @@ class CTypesManager(object): out = self.leaf_types.types.get(type_id, None) assert out is not None elif isinstance(type_id, CTypeUnion): - out = ObjCUnion(type_id.name) + args = [] align_max, size_max = 0, 0 for name, field in type_id.fields: objc = self._get_objc(field, resolved, to_fix, lvl + 1) resolved[field] = objc align_max = max(align_max, objc.align) size_max = max(size_max, objc.size) - out.add_field(name, objc, 0, objc.size) + args.append((name, objc, 0, objc.size)) align, size = self.union_compute_align_size(align_max, size_max) - out.set_align_size(align, size) + out = ObjCUnion(type_id.name, align, size, args) elif isinstance(type_id, CTypeStruct): - out = ObjCStruct(type_id.name) align_max, size_max = 0, 0 + args = [] offset, align_max = 0, 1 pad_index = 0 for name, field in type_id.fields: @@ -1398,13 +1389,13 @@ class CTypesManager(object): pad_index += 1 size = new_offset - offset pad_objc = self._get_objc(CTypeArray(self.padding, size), resolved, to_fix, lvl + 1) - out.add_field(pad_name, pad_objc, offset, pad_objc.size) + args.append((pad_name, pad_objc, offset, pad_objc.size)) offset = new_offset - out.add_field(name, objc, offset, objc.size) + args.append((name, objc, offset, objc.size)) offset += objc.size align, size = self.struct_compute_align_size(align_max, offset) - out.set_align_size(align, size) + out = ObjCStruct(type_id.name, align, size, args) elif isinstance(type_id, CTypePtr): target = type_id.target @@ -1434,10 +1425,10 @@ class CTypesManager(object): type_id.type_ret, resolved, to_fix, lvl + 1) resolved[type_id.type_ret] = type_ret args = [] - for arg in type_id.args: + for name, arg in type_id.args: objc = self._get_objc(arg, resolved, to_fix, lvl + 1) resolved[arg] = objc - args.append(objc) + args.append((name, objc)) out = ObjCFunc(type_id.name, type_id.abi, type_ret, args, self.void_ptr.align, self.void_ptr.size) elif isinstance(type_id, CTypeEllipsis): @@ -1486,7 +1477,7 @@ class CTypesManager(object): return True elif isinstance(objc, ObjCFunc): assert self.check_objc(objc.type_ret, done) - for arg in objc.args: + for name, arg in objc.args: assert self.check_objc(arg, done) return True else: @@ -1583,18 +1574,16 @@ class CHandler(object): """ exprCToExpr_cls = ExprCToExpr - cTypeAnalyzer_cls = CTypeAnalyzer exprToAccessC_cls = ExprToAccessC def __init__(self, types_mngr, expr_types, simplify_c=access_simplifier, enforce_strict_access=True): self.exprc2expr = self.exprCToExpr_cls(expr_types, types_mngr) - self.type_analyzer = self.cTypeAnalyzer_cls(expr_types, types_mngr, - enforce_strict_access) self.access_c_gen = self.exprToAccessC_cls(expr_types, types_mngr, enforce_strict_access) + self.types_mngr = types_mngr self.simplify_c = simplify_c self.expr_types = expr_types @@ -1605,41 +1594,75 @@ class CHandler(object): self.expr_types = expr_types self.exprc2expr.updt_expr_types(expr_types) - self.type_analyzer.updt_expr_types(expr_types) self.access_c_gen.updt_expr_types(expr_types) - def expr_to_c(self, expr): - """Convert a Miasm @expr into it's C equivatlent string - @expr: Miasm expression""" + def expr_to_c_access(self, expr, expr_context=None): + """Generate the C access object(s) for a given native Miasm expression. + @expr: Miasm expression + @expr_context: a dictionnary linking known expressions to a set of types + """ + + if expr_context is None: + expr_context = self.expr_types + return self.access_c_gen.get_accesses(expr, expr_context) + + + def expr_to_c_and_types(self, expr, expr_context=None): + """Generate the C access string and corresponding type for a given + native Miasm expression. + @expr_context: a dictionnary linking known expressions to a set of types + """ + + accesses = set() + for access in self.expr_to_c_access(expr, expr_context): + c_str = access_str(access.to_expr().visit(self.simplify_c)) + accesses.add((c_str, access.ctype)) + return accesses + + def expr_to_c(self, expr, expr_context=None): + """Convert a Miasm @expr into it's C equivalent string + @expr_context: a dictionnary linking known expressions to a set of types + """ - expr_access = self.access_c_gen.get_access(expr) - accesses = [access for access in expr_access.info] - accesses_simp = [access_str(access.to_expr().visit(self.simplify_c)) - for access in accesses] - return accesses_simp + return set(access[0] + for access in self.expr_to_c_and_types(expr, expr_context)) - def expr_to_types(self, expr): + def expr_to_types(self, expr, expr_context=None): """Get the possible types of the Miasm @expr - @expr: Miasm expression""" + @expr_context: a dictionnary linking known expressions to a set of types + """ - return self.type_analyzer.get_type(expr).info + return set(access.ctype + for access in self.expr_to_c_access(expr, expr_context)) - def c_to_expr(self, c_str): - """Convert a C string expression to a Miasm expression - @c_str: C string""" + def c_to_expr_and_type(self, c_str, c_context): + """Convert a C string expression to a Miasm expression and it's + corresponding c type + @c_str: C string + @c_context: a dictionnary linking known tokens (strings) to its type. + """ ast = parse_access(c_str) - access_c = ast_get_c_access_expr(ast, self.expr_types) - return self.exprc2expr.get_expr(access_c).info[0] + access_c = ast_get_c_access_expr(ast, c_context) + return self.exprc2expr.get_expr(access_c, c_context) + + def c_to_expr(self, c_str, c_context): + """Convert a C string expression to a Miasm expression + @c_str: C string + @c_context: a dictionnary linking known tokens (strings) to its type. + """ + + expr, _ = self.c_to_expr_and_type(c_str, c_context) + return expr - def c_to_type(self, c_str): + def c_to_type(self, c_str, c_context): """Get the type of a C string expression - @expr: Miasm expression""" + @expr: Miasm expression + @c_context: a dictionnary linking known tokens (strings) to its type. + """ - ast = parse_access(c_str) - access_c = ast_get_c_access_expr(ast, self.expr_types) - ret_type = self.exprc2expr.get_expr(access_c).info[1] - return ret_type + _, ctype = self.c_to_expr_and_type(c_str, c_context) + return ctype class CLeafTypes(object): diff --git a/miasm2/expression/expression_reduce.py b/miasm2/expression/expression_reduce.py index a1be27cc..45386ca2 100644 --- a/miasm2/expression/expression_reduce.py +++ b/miasm2/expression/expression_reduce.py @@ -99,7 +99,7 @@ class ExprReducer(object): raise TypeError("Unknown Expr Type %r", type(expr)) return node - def reduce(self, expr): + def reduce(self, expr, **kwargs): """Returns an ExprNode tree mirroring @expr tree. The ExprNode is computed by applying reduction rules to the expression @expr @@ -107,9 +107,9 @@ class ExprReducer(object): """ node = self.expr2node(expr) - return self.categorize(node, 0) + return self.categorize(node, lvl=0, **kwargs) - def categorize(self, node, lvl=0): + def categorize(self, node, lvl=0, **kwargs): """Recursively apply rules to @node @node: ExprNode to analyze @@ -121,17 +121,17 @@ class ExprReducer(object): if isinstance(expr, (ExprId, ExprInt)): pass elif isinstance(expr, ExprMem): - arg = self.categorize(node.arg, lvl + 1) + arg = self.categorize(node.arg, lvl=lvl + 1, **kwargs) node = ExprNode(ExprMem(arg.expr, expr.size)) node.arg = arg elif isinstance(expr, ExprSlice): - arg = self.categorize(node.arg, lvl + 1) + arg = self.categorize(node.arg, lvl=lvl + 1, **kwargs) node = ExprNode(ExprSlice(arg.expr, expr.start, expr.stop)) node.arg = arg elif isinstance(expr, ExprOp): new_args = [] for arg in node.args: - new_a = self.categorize(arg, lvl + 1) + new_a = self.categorize(arg, lvl=lvl + 1, **kwargs) assert new_a.expr.size == arg.expr.size new_args.append(new_a) node = ExprNode(ExprOp(expr.op, *[x.expr for x in new_args])) @@ -141,27 +141,27 @@ class ExprReducer(object): new_args = [] new_expr_args = [] for arg in node.args: - arg = self.categorize(arg, lvl + 1) + arg = self.categorize(arg, lvl=lvl + 1, **kwargs) new_args.append(arg) new_expr_args.append(arg.expr) new_expr = ExprCompose(*new_expr_args) node = ExprNode(new_expr) node.args = new_args elif isinstance(expr, ExprCond): - cond = self.categorize(node.cond, lvl + 1) - src1 = self.categorize(node.src1, lvl + 1) - src2 = self.categorize(node.src2, lvl + 1) + cond = self.categorize(node.cond, lvl=lvl + 1, **kwargs) + src1 = self.categorize(node.src1, lvl=lvl + 1, **kwargs) + src2 = self.categorize(node.src2, lvl=lvl + 1, **kwargs) node = ExprNode(ExprCond(cond.expr, src1.expr, src2.expr)) node.cond, node.src1, node.src2 = cond, src1, src2 else: raise TypeError("Unknown Expr Type %r", type(expr)) - node.info = self.apply_rules(node, lvl) + node.info = self.apply_rules(node, lvl=lvl, **kwargs) log_reduce.debug("\t" * lvl + "Reduce result: %s %r", node.expr, node.info) return node - def apply_rules(self, node, lvl=0): + def apply_rules(self, node, lvl=0, **kwargs): """Find and apply reduction rules to @node @node: ExprNode to analyse @@ -169,7 +169,8 @@ class ExprReducer(object): """ for rule in self.reduction_rules: - ret = rule(self, node, lvl) + ret = rule(self, node, lvl=lvl, **kwargs) + if ret is not None: log_reduce.debug("\t" * lvl + "Rule found: %r", rule) return ret diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 603d3fd0..afb6b382 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -469,13 +469,15 @@ class IntermediateRepresentation(object): if (isinstance(addr, m2_expr.ExprId) and isinstance(addr.name, AsmLabel)): addr = addr.name - if isinstance(addr, m2_expr.ExprInt): + if isinstance(addr, AsmLabel): + return addr + + try: addr = int(addr) - if isinstance(addr, (int, long)): - addr = self.symbol_pool.getby_offset_create(addr) - elif isinstance(addr, AsmLabel): - addr = self.symbol_pool.getby_name_create(addr.name) - return addr + except (ValueError, TypeError): + return None + + return self.symbol_pool.getby_offset_create(addr) def get_block(self, addr): """Returns the irbloc associated to an ExprId/ExprInt/label/int diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 6d6ba630..593ab49a 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -16,6 +16,18 @@ log.addHandler(console_handler) log.setLevel(logging.INFO) +def get_block(ir_arch, mdis, addr): + """Get IRBlock at address @addr""" + lbl = ir_arch.get_label(addr) + if not lbl in ir_arch.blocks: + block = mdis.dis_block(lbl.offset) + ir_arch.add_block(block) + irblock = ir_arch.get_block(lbl) + if irblock is None: + raise LookupError('No block found at that address: %s' % lbl) + return irblock + + class SymbolMngr(object): """ Store registers and memory symbolic values diff --git a/miasm2/ir/symbexec_types.py b/miasm2/ir/symbexec_types.py index 297c0c9e..a8e8bdf2 100644 --- a/miasm2/ir/symbexec_types.py +++ b/miasm2/ir/symbexec_types.py @@ -10,7 +10,10 @@ class SymbolicStateCTypes(StateEngine): """Store C types of symbols""" def __init__(self, symbols): - self._symbols = frozenset(symbols.items()) + tmp = {} + for expr, types in symbols.iteritems(): + tmp[expr] = frozenset(types) + self._symbols = frozenset(tmp.iteritems()) def __hash__(self): return hash((self.__class__, self._symbols)) @@ -34,12 +37,16 @@ class SymbolicStateCTypes(StateEngine): def merge(self, other): """Merge two symbolic states - Only expressions with equal C types in both states are kept. + The resulting types are the union of types of both states. @other: second symbolic state """ - symb_a = self.symbols.items() - symb_b = other.symbols.items() - symbols = dict(set(symb_a).intersection(symb_b)) + symb_a = self.symbols + symb_b = other.symbols + symbols = {} + for expr in set(symb_a).union(set(symb_b)): + ctypes = symb_a.get(expr, set()).union(symb_b.get(expr, set())) + if ctypes: + symbols[expr] = ctypes return self.__class__(symbols) @property @@ -63,137 +70,13 @@ class SymbExecCType(SymbolicExecutionEngine): func_write=None, sb_expr_simp=expr_simp): self.chandler = chandler + super(SymbExecCType, self).__init__(ir_arch, {}, func_read, func_write, sb_expr_simp) self.symbols = dict(symbols) - offset_types = [] - for name in [('int',), ('long',), - ('long', 'long'), - ('char',), ('short',), - - ('unsigned', 'char',), ('unsigned', 'short',), - ('unsigned', 'int',), ('unsigned', 'long',), - ('unsigned', 'long', 'long')]: - objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId(*name)) - offset_types.append(objc) - self.offset_types = offset_types - - def is_type_offset(self, objc): - """Return True if @objc is char/short/int/long""" - return objc in self.offset_types - - def get_tpye_int_by_size(self, size): - """Return a char/short/int/long type with the size equal to @size - @size: size in bit""" - - for objc in self.offset_types: - if objc.size == size / 8: - return objc - return None - - def is_offset_list(self, types, size): - """Return the corresponding char/short/int/long type of @size, if every - types in the list @types are type offset - @types: a list of c types - @size: size in bit""" - - for arg_type in types: - if not self.is_type_offset(arg_type): - return None - objc = self.get_tpye_int_by_size(size) - if objc: - return objc - # default size - objc = self.offset_types[0] - return objc - - def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): - """ - Deep First evaluate nodes: - 1. evaluate node's sons - 2. simplify - """ - - expr = self.expr_simp(expr) - - if expr in cache: - return cache[expr] - elif expr in state: - return state[expr] - elif isinstance(expr, ExprInt): - objc = self.get_tpye_int_by_size(expr.size) - if objc is None: - objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId('int')) - return objc - elif isinstance(expr, ExprId): - if expr in state: - return state[expr] - return None - elif isinstance(expr, ExprMem): - ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level + 1) - if ptr is None: - return None - self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = ptr - ptr_expr = ExprId(self.OBJC_INTERNAL, expr.arg.size) - objcs = self.chandler.expr_to_types(ExprMem(ptr_expr, expr.size)) - if objcs is None: - return None - objc = objcs[0] - return objc - elif isinstance(expr, ExprCond): - src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level + 1) - src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level + 1) - types = [src1, src2] - objc = self.is_offset_list(types, expr.size) - if objc: - return objc - return None - elif isinstance(expr, ExprSlice): - objc = self.get_tpye_int_by_size(expr.size) - if objc is None: - # default size - objc = self.offset_types[0] - return objc - elif isinstance(expr, ExprOp): - args = [] - types = [] - for oarg in expr.args: - arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1) - types.append(arg) - if None in types: - return None - objc = self.is_offset_list(types, expr.size) - if objc: - return objc - # Find Base + int - if expr.op != '+': - return None - args = list(expr.args) - if args[-1].is_int(): - offset = args.pop() - types.pop() - if len(args) == 1: - arg, arg_type = args.pop(), types.pop() - self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = arg_type - ptr_expr = ExprId(self.OBJC_INTERNAL, arg.size) - objc = self.chandler.expr_to_types(ptr_expr + offset) - objc = objc[0] - return objc - return None - elif isinstance(expr, ExprCompose): - types = set() - for oarg in expr.args: - arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1) - types.add(arg) - objc = self.is_offset_list(types, expr.size) - if objc: - return objc - return None - else: - raise TypeError("Unknown expr type") def get_state(self): """Return the current state of the SymbolicEngine""" @@ -207,22 +90,43 @@ class SymbExecCType(SymbolicExecutionEngine): pool_out = {} eval_cache = {} for dst, src in assignblk.iteritems(): - src = self.eval_expr(src, eval_cache) + objcs = self.chandler.expr_to_types(src, self.symbols) if isinstance(dst, ExprMem): continue elif isinstance(dst, ExprId): - pool_out[dst] = src + pool_out[dst] = frozenset(objcs) else: - raise ValueError("affected zarb", str(dst)) + raise ValueError("Unsupported affectation", str(dst)) return pool_out.iteritems() + def eval_expr(self, expr, eval_cache=None): + return frozenset(self.chandler.expr_to_types(expr, self.symbols)) + def apply_change(self, dst, src): - objc = src - if objc is None and dst in self.symbols: - del self.symbols[dst] + if src is None: + if dst in self.symbols: + del self.symbols[dst] else: - self.symbols[dst] = objc + self.symbols[dst] = src def del_mem_above_stack(self, stack_ptr): """No stack deletion""" return + + def dump_id(self): + """ + Dump modififed registers symbols only + """ + for expr, expr_types in sorted(self.symbols.iteritems()): + if not expr.is_mem(): + print expr + for expr_type in expr_types: + print '\t', expr_type + + def dump_mem(self): + """ + Dump modififed memory symbols + """ + for expr, value in sorted(self.symbols.iteritems()): + if expr.is_mem(): + print expr, value diff --git a/test/expr_type/test_chandler.py b/test/expr_type/test_chandler.py new file mode 100644 index 00000000..8070841d --- /dev/null +++ b/test/expr_type/test_chandler.py @@ -0,0 +1,550 @@ +""" +Regression test for objc +* ast parsed C to C Miasm expression +* C Miasm expression to native expression +* Miasm expression to type +""" + +from miasm2.expression.expression import ExprInt, ExprId, ExprMem +from miasm2.expression.simplifications import expr_simp + +from miasm2.core.objc import parse_access +from miasm2.core.objc import ast_get_c_access_expr +from miasm2.core.objc import ExprCToExpr, ExprToAccessC, CHandler + + +from miasm2.core.ctypesmngr import CTypeStruct, CTypeUnion, CAstTypes, CTypePtr, CTypeId +from miasm2.core.objc import CTypesManagerNotPacked + +from miasm2.arch.x86.ctype import CTypeAMD64_unk + + +text_1 = """ +# 1 "test.h" +typedef enum { + TMP0, + TMP1, + TMP2, + TMP3, +} MyEnum; + + +typedef struct mini_st { + int x; + int y; + short z; +} Mini; + +typedef union mini_un { + int x; + int y; + short z; +} MiniUnion; + +typedef unsigned char block[8]; + +typedef struct mini_st mini_st_struct; + +typedef mini_st_struct mini_st_struct2; + + +typedef struct test_st { + int a; + int b; + int** ptr; + short tab1[4*sizeof(int)]; + short* tab2[2*2+4*4-16/4]; + int* xptr; + int tab3[0x10][0x20]; + + Mini f_mini; + + Mini minitab[0x10]; + + Mini *minitabptr[0x10]; + int (*(tab4[0x20]))[0x10]; + MyEnum myenum; + block chunk; + + union testU{ + int myint; + char mychars[4]; + } myunion; + + union testV{ + int myint; + char mychars[4]; + struct tutu { + int a; + unsigned int b; + } mystruct_x; + } myunion_x; + + + union testW{ + union testX{ + int a; + unsigned int b; + char c; + } u0; + union testX{ + int a; + char b; + } u1; + } myunion_y; + + union { + int a1; + }; + struct { + int a2; + }; + + Mini (*(tab5[0x20]))[0x10]; + int *tab6[4][4][4][4]; + +} Test; + +typedef int (*func)(int, char); + +typedef func ( *(xxx[5]) )[4]; + +typedef union char_int_st { + char a; + int b; +} Char_int; + +typedef int array1[4]; +typedef int array2[4]; +typedef unsigned int array3[4]; +typedef Char_int array4[4]; + +typedef char dummy[((sizeof(char)<<3) >> 1)*4/2 - 1 + 2]; + +struct recurse { + struct recurse* next; + int a; +}; + +int strlen(const char *s); + """ + +text_2 = """ +struct test_context { + int a; + struct test_st test; + int b; +}; + +""" +base_types = CTypeAMD64_unk() +types_ast = CAstTypes() + +# Add C types definition +types_ast.add_c_decl(text_1) +types_ast.add_c_decl(text_2) + + +types_mngr = CTypesManagerNotPacked(types_ast, base_types) + +for type_id, type_desc in types_mngr.types_ast._types.iteritems(): + print type_id + obj = types_mngr.get_objc(type_id) + print obj + print repr(obj) + types_mngr.check_objc(obj) + +for type_id, type_desc in types_mngr.types_ast._typedefs.iteritems(): + print type_id + obj = types_mngr.get_objc(type_id) + print obj + print repr(obj) + types_mngr.check_objc(obj) + +void_ptr = types_mngr.void_ptr + +obj_dummy = types_mngr.get_objc(CTypeId("dummy")) +obj_int = types_mngr.get_objc(CTypeId("int")) +obj_uint = types_mngr.get_objc(CTypeId("unsigned", "int")) +obj_long = types_mngr.get_objc(CTypeId("long")) +obj_array1 = types_mngr.get_objc(CTypeId("array1")) +obj_array2 = types_mngr.get_objc(CTypeId("array2")) +obj_array3 = types_mngr.get_objc(CTypeId("array3")) +obj_array4 = types_mngr.get_objc(CTypeId("array4")) + +obj_charint = types_mngr.get_objc(CTypeUnion("char_int")) + +assert cmp(obj_int, obj_uint) != 0 +assert cmp(obj_int, obj_long) != 0 + +assert cmp(obj_array1, obj_array1) == 0 +assert cmp(obj_array1, obj_array2) == 0 +assert cmp(obj_array1, obj_array3) != 0 +assert cmp(obj_array1, obj_array4) != 0 + +assert cmp(obj_charint, obj_charint) == 0 +assert cmp(obj_charint, obj_uint) != 0 + +obj_test = types_mngr.get_objc(CTypePtr(CTypeId("Test"))) + +ptr_test = ExprId("ptr_Test", 64) +obj_recurse = types_mngr.get_objc(CTypePtr(CTypeStruct("recurse"))) +# Test cmp same recursive object +obj_recurse_bis = types_mngr.get_objc(CTypePtr(CTypeStruct("recurse"))) +assert cmp(obj_recurse, obj_recurse_bis) == 0 + + +set_test = set([obj_recurse, obj_recurse_bis]) +assert len(set_test) == 1 +ptr_recurse = ExprId("ptr_recurse", 64) + + +obj_test_st = types_mngr.get_objc(CTypeStruct("test_st")) +print repr(obj_test_st) +obj_test_context = types_mngr.get_objc(CTypeStruct("test_context")) +print repr(obj_test_context) +assert obj_test_context.size > obj_test_st.size + +assert cmp(obj_test_st, obj_recurse) != 0 + + +expr_types = {ptr_test: set([obj_test]), + ptr_recurse: set([obj_recurse])} + + +c_context = {ptr_test.name: obj_test, + ptr_recurse.name: obj_recurse} + + +tests = [ + ( + ExprMem(ptr_test, 32), + [("int", "(ptr_Test)->a")] + ), + ( + ptr_test, + [('struct test_st *', "ptr_Test")] + ), + ( + ExprMem(ptr_test + ExprInt(0, 64), 32), + [("int", "(ptr_Test)->a")] + ), + ( + ExprMem(ptr_test + ExprInt(8, 64), 64), + [("int **", "(ptr_Test)->ptr")] + ), + ( + ExprMem(ptr_test + ExprInt(8, 64), 64) + ExprInt(8 * 3, 64), + [("int **", "&(((ptr_Test)->ptr)[3])")] + ), + ( + ExprMem(ExprMem(ptr_test + ExprInt(8, 64), 64) + + ExprInt(8 * 3, 64), 64), + [("int *", "((ptr_Test)->ptr)[3]")] + ), + ( + ExprMem( + ExprMem( + ExprMem( + ptr_test + ExprInt(8, 64), + 64) + ExprInt(8 * 3, 64), + 64) + ExprInt(4 * 9, 64), + 32), + [("int", "(((ptr_Test)->ptr)[3])[9]")] + ), + ( + ptr_test + ExprInt(0x10, 64), + [("short [16]", "(ptr_Test)->tab1")] + ), + ( + ptr_test + ExprInt(0x12, 64), + [("short *", "&(((ptr_Test)->tab1)[1])")] + ), + ( + ExprMem(ptr_test + ExprInt(0x10, 64), 16), + [("short", "*((ptr_Test)->tab1)")] + ), + ( + ExprMem(ptr_test + ExprInt(0x10 + 2 * 3, 64), 16), + [("short", "((ptr_Test)->tab1)[3]")] + ), + ( + ExprMem(ptr_test + ExprInt(0xb8 + 4, 64), 32), + [("int", "(((ptr_Test)->tab3)[0])[1]")] + ), + ( + ExprMem(ptr_test + ExprInt(0xb8 + 32 * 4 * 3 + 4 * 7, 64), 32), + [("int", "(((ptr_Test)->tab3)[3])[7]")] + ), + ( + ptr_test + ExprInt(0xb8 + 4, 64), + [("int *", "&((((ptr_Test)->tab3)[0])[1])")] + ), + + # struct of struct + ( + ptr_test + ExprInt(0x8b8, 64), + [("struct mini_st *", '&((ptr_Test)->f_mini)')] + ), + ( + ptr_test + ExprInt(0x8bc, 64), + [("int *", "&(((ptr_Test)->f_mini).y)")] + ), + ( + ExprMem(ptr_test + ExprInt(0x8bc, 64), 32), + [("int", "((ptr_Test)->f_mini).y")] + ), + + # struct of array of struct + ( + ptr_test + ExprInt(0x8c4, 64), + [('struct mini_st [16]', '(ptr_Test)->minitab')] + ), + + ( + ptr_test + ExprInt(0x8c4 + 3 * 4, 64), + [('struct mini_st *', '&(((ptr_Test)->minitab)[1])')] + ), + + ( + ExprMem(ptr_test + ExprInt(0x8c4, 64), 32), + [("int", "((ptr_Test)->minitab)->x")] + ), + + ( + ExprMem(ptr_test + ExprInt(0x8c4 + 12 * 4, 64), 32), + [("int", "(((ptr_Test)->minitab)[4]).x")] + ), + + + ( + ExprMem(ptr_test + ExprInt(0x8c4 + 4, 64), 32), + [("int", "((ptr_Test)->minitab)->y")] + ), + + ( + ExprMem(ptr_test + ExprInt(0x8c4 + 12 * 4 + 4, 64), 32), + [("int", "(((ptr_Test)->minitab)[4]).y")] + ), + + # struct of array of ptr of struct + + ( + ExprMem(ptr_test + ExprInt(0x988 + 8 * 4, 64), 64), + [('struct mini_st *', "((ptr_Test)->minitabptr)[4]")] + ), + + ( + ExprMem( + (ExprMem(ptr_test + ExprInt(0x988 + 8 * 4, 64), 64) + + ExprInt(8, 64)), + 16), + [("short", "(((ptr_Test)->minitabptr)[4])->z")] + ), + + # tab4 + + ( + ptr_test + ExprInt(0xa08, 64), + [("int (*[32])[16]", "(ptr_Test)->tab4")] + ), + + ( + ExprMem(ptr_test + ExprInt(0xa08 + 0x8 * 2, 64), 64), + [("int (*)[16]", "((ptr_Test)->tab4)[2]")] + ), + + ( + ExprMem(ExprMem(ptr_test + ExprInt(0xa08 + 0x8 * 2, 64), 64), 64), + [("int [16]", "*(((ptr_Test)->tab4)[2])")] + ), + + ( + ExprMem(ExprMem(ptr_test + ExprInt(0xa08 + 0x8 * 2, 64), 64), 64) + ExprInt(4 * 5, 64), + [("int *", "&((*(((ptr_Test)->tab4)[2]))[5])")] + ), + + # enum + ( + ExprMem(ptr_test + ExprInt(2824, 64), 32), + [("int", "(ptr_Test)->myenum")] + ), + + # typedef array + ( + ExprMem(ptr_test + ExprInt(2828 + 1, 64), 8), + [("uchar", "((ptr_Test)->chunk)[1]")] + ), + + + # union + ( + ptr_test + ExprInt(2836, 64), + [("union testU *", '&((ptr_Test)->myunion)')] + ), + + ( + ExprMem(ptr_test + ExprInt(2836, 64), 8), + [("char", "*(((ptr_Test)->myunion).mychars)")] + ), + + ( + ExprMem(ptr_test + ExprInt(2836 + 1, 64), 8), + [("char", "(((ptr_Test)->myunion).mychars)[1]")] + ), + + ( + ExprMem(ptr_test + ExprInt(2836, 64), 32), + [("int", "((ptr_Test)->myunion).myint")] + ), + + # union struct + ( + ExprMem(ptr_test + ExprInt(2840, 64), 8), + [("char", "*(((ptr_Test)->myunion_x).mychars)")] + ), + + ( + ExprMem(ptr_test + ExprInt(2840 + 1, 64), 8), + [("char", "(((ptr_Test)->myunion_x).mychars)[1]")] + ), + + ( + ExprMem(ptr_test + ExprInt(2840, 64), 32), + [("int", "((ptr_Test)->myunion_x).myint"), + ("int", "(((ptr_Test)->myunion_x).mystruct_x).a")] + ), + + ( + ptr_test + ExprInt(2840, 64), + [('union testV *', '&((ptr_Test)->myunion_x)')] + ), + + + # union union + ( + ptr_test + ExprInt(2848, 64), + [('union testW *', '&((ptr_Test)->myunion_y)')] + ), + + ( + ExprMem(ptr_test + ExprInt(2848, 64), 32), + [('int', '(((ptr_Test)->myunion_y).u0).a'), + ('uint', '(((ptr_Test)->myunion_y).u0).b'), + ('int', '(((ptr_Test)->myunion_y).u1).a')] + ), + + # recurse + ( + ptr_recurse, + [('struct recurse *', 'ptr_recurse')] + ), + + ( + ptr_recurse + ExprInt(8, 64), + [('int *', '&((ptr_recurse)->a)')] + ), + + ( + ExprMem(ptr_recurse, 64), + [('struct recurse *', '(ptr_recurse)->next')] + ), + + ( + ExprMem(ExprMem(ptr_recurse, 64), 64), + [('struct recurse *', '((ptr_recurse)->next)->next')] + ), + + + ( + ExprMem(ExprMem(ExprMem(ptr_recurse, 64), 64) + ExprInt(8, 64), 32), + [('int', '(((ptr_recurse)->next)->next)->a')] + ), + + + + # tab5 + + ( + ptr_test + ExprInt(0xb30, 64), + [("struct mini_st (*[32])[16]", "(ptr_Test)->tab5")] + ), + + ( + ExprMem(ptr_test + ExprInt(0xb30 + 0x8 * 2, 64), 64), + [("struct mini_st (*)[16]", "((ptr_Test)->tab5)[2]")] + ), + + ( + ExprMem(ExprMem(ptr_test + ExprInt(0xb30 + 0x8 * 2, 64), 64), 64), + [("struct mini_st [16]", "*(((ptr_Test)->tab5)[2])")] + ), + + ( + ExprMem(ExprMem(ptr_test + ExprInt(0xb30 + 0x8 * 2, 64), 64), 64) + ExprInt(12*3 + 8, 64), + [("short *", "&(((*(((ptr_Test)->tab5)[2]))[3]).z)")] + ), + + ( + ExprMem(ExprMem(ExprMem(ptr_test + ExprInt(0xb30 + 0x8 * 2, 64), 64), 64) + + ExprInt(12*3 + 8, 64), 16), + [("short", "((*(((ptr_Test)->tab5)[2]))[3]).z")] + ), + + + # tab 6 + ( + ExprMem(ptr_test + ExprInt(0xc30 + ((((3) * 4 + 2)*4 + 0)*4 + 1)*8, 64), 64), + [("int *", "(((((ptr_Test)->tab6)[3])[2])[0])[1]")] + ), + + ( + ExprMem(ExprMem(ptr_test + ExprInt(0xc30 + ((((3) * 4 + 2)*4 + 0)*4 + 1)*8, 64), 64), 32), + [("int", "*((((((ptr_Test)->tab6)[3])[2])[0])[1])")] + ), + + + +] + +mychandler = CHandler(types_mngr, expr_types) +exprc2expr = ExprCToExpr(expr_types, types_mngr) +mychandler.updt_expr_types(expr_types) + + +for (expr, result) in tests[4:]: + print "*" * 80 + print "Native expr:", expr + result = set(result) + expr_c = mychandler.expr_to_c(expr) + types = mychandler.expr_to_types(expr) + + target_type = mychandler.expr_to_types(expr) + + access_c_gen = ExprToAccessC(expr_types, types_mngr) + computed = set() + for c_str, ctype in mychandler.expr_to_c_and_types(expr): + print c_str, ctype + computed.add((str(ctype), c_str)) + assert computed == result + + + for out_type, out_str in computed: + parsed_expr = mychandler.c_to_expr(out_str, c_context) + parsed_type = mychandler.c_to_type(out_str, c_context) + print "Access expr:", parsed_expr + print "Access type:", parsed_type + + ast = parse_access(out_str) + access_c = ast_get_c_access_expr(ast, c_context) + print "Generated access:", access_c + + parsed_expr_bis, parsed_type_bis = mychandler.exprc2expr.get_expr(access_c, c_context) + assert parsed_expr_bis is not None + assert parsed_expr == parsed_expr_bis + assert parsed_type == parsed_type_bis + + expr_new1 = expr_simp(parsed_expr) + expr_new2 = expr_simp(expr) + print "\t", expr_new1 + assert expr_new1 == expr_new2 diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_1.S b/test/samples/x86_32/cst_propag/x86_32_sc_1.S new file mode 100644 index 00000000..0fe12e04 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_1.S @@ -0,0 +1,21 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, 1 + MOV EDX, 2 + LEA ECX, DWORD PTR [ECX+0x4] + LEA EBX, DWORD PTR [ECX+0x1] + CMP CL, 0x1 + JZ test1 + LEA EBX, DWORD PTR [EBX-1] + JMP end +test1: + LEA EBX, DWORD PTR [EBX-1] +end: + MOV EAX, EBX + LEA EAX, DWORD PTR [EAX + EDX] + MOV EDX, DWORD PTR [EBP+0xC] + LEA EAX, DWORD PTR [EAX + EDX] + MOV ESP, EBP + POP EBP + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_10.S b/test/samples/x86_32/cst_propag/x86_32_sc_10.S new file mode 100644 index 00000000..84eae85b --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_10.S @@ -0,0 +1,17 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, DWORD PTR [ESP+0x8] + INC EBX + CMP CL, 0x1 + JZ test1 + MOV EAX, 8 + JMP end +test1: + MOV EAX, 4 +end: + LEA EBX, DWORD PTR [EAX+EBX] + MOV EAX, EBX + MOV ESP, EBP + POP EBP + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_11.S b/test/samples/x86_32/cst_propag/x86_32_sc_11.S new file mode 100644 index 00000000..272c76da --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_11.S @@ -0,0 +1,11 @@ +main: + MOV ECX, 10 +loop1: + DEC ECX + JNZ less + JMP goon +less: + DEC ECX +goon: + JNZ loop1 + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_12.S b/test/samples/x86_32/cst_propag/x86_32_sc_12.S new file mode 100644 index 00000000..3420b882 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_12.S @@ -0,0 +1,12 @@ +main: + MOV EBX, 1 + MOV ECX, 2 + CMP EDX, 3 + JNZ test1 + ADD EBX, 1 + JMP goon +test1: + ADD ECX, 1 +goon: + LEA EAX, DWORD PTR [EBX+ECX] + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_13.S b/test/samples/x86_32/cst_propag/x86_32_sc_13.S new file mode 100644 index 00000000..08b9a891 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_13.S @@ -0,0 +1,7 @@ +main: + MOV ECX, 10 +loop: + DEC ECX + JNZ loop + MOV EAX, ECX + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_14.S b/test/samples/x86_32/cst_propag/x86_32_sc_14.S new file mode 100644 index 00000000..17cad5d7 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_14.S @@ -0,0 +1,10 @@ +main: + MOV ECX, 10 + MOV EDX, 10 +loop: + INC EDX + DEC EDX + DEC ECX + JNZ loop + MOV EAX, EDX + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_15.S b/test/samples/x86_32/cst_propag/x86_32_sc_15.S new file mode 100644 index 00000000..03a3c121 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_15.S @@ -0,0 +1,16 @@ +main: + ADD EDI, 1 + MOV ECX, 10 +loop1: + MOV EDX, 10 + INC EDI +loop2: + ADD EDI, 2 + SUB EDI, 2 + SUB EDX, 1 + JNZ loop2 + DEC EDI + SUB ECX, 1 + JNZ loop1 + MOV EAX, EDI + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_16.S b/test/samples/x86_32/cst_propag/x86_32_sc_16.S new file mode 100644 index 00000000..d54e2657 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_16.S @@ -0,0 +1,17 @@ +main: + MOV EBX, 1 + ADD EDI, 1 + MOV ECX, 10 +loop1: + MOV EDX, 10 + INC EDI +loop2: + LEA EDI, DWORD PTR [EDI+EBX] + LEA EDI, DWORD PTR [EDI-1] + SUB EDX, 1 + JNZ loop2 + DEC EDI + SUB ECX, 1 + JNZ loop1 + MOV EAX, EDI + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_17.S b/test/samples/x86_32/cst_propag/x86_32_sc_17.S new file mode 100644 index 00000000..576c02e2 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_17.S @@ -0,0 +1,8 @@ +main: + MOV EBX, 1 + ADD EDI, 1 + SUB EBX, EDI + SUB EDI, 1 + ADD EBX, EDI + MOV EAX, EBX + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_18.S b/test/samples/x86_32/cst_propag/x86_32_sc_18.S new file mode 100644 index 00000000..7d29abdb --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_18.S @@ -0,0 +1,33 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV EAX, DWORD PTR [EBP+0x8] + MOV EBX, DWORD PTR [EBP+0xC] + ADD EAX, EBX + TEST EAX, EAX + JNZ test + PUSH 1 + PUSH 2 + PUSH DWORD PTR [EBP+0x10] + CALL func1 + ADD ESP, 0xC + JMP goon +test: + MOV ECX, 10 +loop: + PUSH 1 + PUSH 2 + CALL func2 + ADD ESP, 0x8 + DEC ECX + JNZ loop +goon: + MOV ESP, EBP + POP EBP + RET + + +func1: + RET +func2: + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_19.S b/test/samples/x86_32/cst_propag/x86_32_sc_19.S new file mode 100644 index 00000000..0b0e0837 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_19.S @@ -0,0 +1,16 @@ +main: + + MOV EBX, 1 + MOV ECX, 1 + CMP EDX, 10 + JZ test1 + ADD EBX, 1 + ADD ECX, 1 + JMP gogon +test1: + ADD EBX, 2 + ADD ECX, 2 +gogon: + ADD EAX, EBX + ADD EAX, ECX + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_2.S b/test/samples/x86_32/cst_propag/x86_32_sc_2.S new file mode 100644 index 00000000..96e10fa6 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_2.S @@ -0,0 +1,21 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, DWORD PTR [ESP+0x8] + MOV EDX, DWORD PTR [EBP+0xC] + LEA ECX, DWORD PTR [ECX+0x4] + LEA EBX, DWORD PTR [EBX+0x1] + CMP CL, 0x1 + JZ test1 + LEA EBX, DWORD PTR [EBX-1] + JMP end +test1: + LEA EBX, DWORD PTR [EBX+0x1] +end: + MOV EAX, EBX + LEA EAX, DWORD PTR [EAX + EDX] + MOV EDX, DWORD PTR [EBP+0xC] + LEA EAX, DWORD PTR [EAX + EDX] + MOV ESP, EBP + POP EBP + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_20.S b/test/samples/x86_32/cst_propag/x86_32_sc_20.S new file mode 100644 index 00000000..4f2b82c6 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_20.S @@ -0,0 +1,19 @@ +main: + MOV EBX, 0 + MOV ECX, 0 + + CMP EDX, 0 + JNZ test1 + JMP goon +test1: + MOV EDX, 1 + LEA EDX, DWORD PTR [EDX+0xF] + LEA EBX, DWORD PTR [EBX+EDX] + MOV EDX, 2 + LEA EDX, DWORD PTR [EDX+0xE] + MOV ECX, EDX + LEA EBX, DWORD PTR [EBX+ECX] + JNZ test1 +goon: + MOV EAX, EBX + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_3.S b/test/samples/x86_32/cst_propag/x86_32_sc_3.S new file mode 100644 index 00000000..46d2afff --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_3.S @@ -0,0 +1,15 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, DWORD PTR [EBP+0x8] +loop: + SUB ECX, 1 + JZ end + PUSH EDX + POP ESI + JMP loop +end: + MOV EAX, DWORD PTR [ESP+0xC] + MOV ESP, EBP + POP EBP + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_4.S b/test/samples/x86_32/cst_propag/x86_32_sc_4.S new file mode 100644 index 00000000..1f9e82a3 --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_4.S @@ -0,0 +1,15 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, DWORD PTR [EBP+0x8] +loop: + PUSH EDX + POP ESI + SUB ECX, 1 + JZ end + JMP loop +end: + MOV EAX, DWORD PTR [ESP+0xC] + MOV ESP, EBP + POP EBP + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_5.S b/test/samples/x86_32/cst_propag/x86_32_sc_5.S new file mode 100644 index 00000000..b9d7e08a --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_5.S @@ -0,0 +1,24 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, DWORD PTR [EBP+0x8] + + SUB ECX, 1 + JZ test1 + SUB ECX, 1 + JZ test2 + SUB ECX, 1 + JZ test3 + + JMP end +test1: + INC EAX +test2: + INC EAX +test3: + INC EAX +end: + INC EAX + MOV ESP, EBP + POP EBP + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_6.S b/test/samples/x86_32/cst_propag/x86_32_sc_6.S new file mode 100644 index 00000000..65fc2b8b --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_6.S @@ -0,0 +1,37 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, DWORD PTR [EBP+0x8] + + INC EAX + SUB ECX, 1 + JZ test1 + ADD EAX, 1 + JMP go1 +test1: + ADD EAX, 2 +go1: + + + INC EAX + SUB ECX, 1 + JZ test2 + ADD EAX, 0x10 + JMP go2 +test2: + ADD EAX, 0x20 +go2: + + INC EAX + SUB ECX, 1 + JZ test3 + ADD EAX, 0x30 + JMP go3 +test3: + ADD EAX, 0x40 +go3: + + INC EAX + MOV ESP, EBP + POP EBP + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_7.S b/test/samples/x86_32/cst_propag/x86_32_sc_7.S new file mode 100644 index 00000000..96c5fc6e --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_7.S @@ -0,0 +1,16 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, DWORD PTR [EBP+0x8] + INC EAX + +loop: + INC EAX + DEC EAX + SUB ECX, 1 + JZ loop + + INC EAX + MOV ESP, EBP + POP EBP + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_8.S b/test/samples/x86_32/cst_propag/x86_32_sc_8.S new file mode 100644 index 00000000..5127c0fa --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_8.S @@ -0,0 +1,18 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, DWORD PTR [EBP+0x8] + INC EAX + +loop: + MOV EDX, 1 + MOV ESI, 1 + ADD EAX, EDX + SUB EAX, ESI + SUB ECX, 1 + JZ loop + + INC EAX + MOV ESP, EBP + POP EBP + RET diff --git a/test/samples/x86_32/cst_propag/x86_32_sc_9.S b/test/samples/x86_32/cst_propag/x86_32_sc_9.S new file mode 100644 index 00000000..0b01305b --- /dev/null +++ b/test/samples/x86_32/cst_propag/x86_32_sc_9.S @@ -0,0 +1,36 @@ +main: + PUSH EBP + MOV EBP, ESP + MOV ECX, 10 ; DWORD PTR [EBP+0x8] + + MOV EBX, 0x1000 ; + INC EBX ; EBX = 0x1001 + MOV EAX, EBX ; EAX = 0x1001 + MOV EBX, 0x10001 ; EBX = 0x10001 + DEC EBX ; EBX = 0x10000 + MOV ESI, EBX ; ESI = 0x10000 + ; + ADD EDI, EAX ; EDI += 0x1001 + ADD EDI, ESI ; EDI += 0x10000 + ;; EDI = EDI + 0x11001 + +loop: + MOV EBX, 0x1000 ; + MOV EAX, EBX ; + MOV EBX, 0x100001 ; + MOV ESI, EBX ; + MUL ESI ; EAX = 0x1000 + MOV EBX, 0x1 ; + ADD EDI, EBX ; EDI += 1 + MOV EBX, 0x1000 ; + ADD EDI, EBX ; EDI += 0x1000 + SUB EDI, EAX ; EDI -= 0x1000 + DEC EDI ; EDI -= 1 + SUB ECX, 1 + JNZ loop + + INC EDI + MOV EAX, EDI + MOV ESP, EBP + POP EBP + RET diff --git a/test/test_all.py b/test/test_all.py index 3da2dbb5..17193d9f 100755 --- a/test/test_all.py +++ b/test/test_all.py @@ -188,8 +188,9 @@ class SemanticTestAsm(RegressionTest): self.command_line = [self.shellcode_script, arch, input_filename, - output_filename, - self.container_dct.get(container, '')] + output_filename] + if container in self.container_dct: + self.command_line.append(self.container_dct[container]) self.products = [output_filename, "graph.dot"] @@ -247,6 +248,11 @@ for script in ["modint.py", ]: testset += RegressionTest([script], base_dir="expression") +## ObjC/CHandler +testset += RegressionTest(["test_chandler.py"], base_dir="expr_type", + tags=[TAGS["cparser"]]) + + ## IR for script in ["symbexec.py", "ir.py", @@ -299,6 +305,17 @@ testset += RegressionTest(["data_flow.py"], base_dir="analysis", for test_nb in xrange(1, 18)) for fname in fnames]) +for i in xrange(1, 21): + input_name = "cst_propag/x86_32_sc_%d" % i + bin_name = "samples/x86_32/%s.bin" % input_name + test_x86_32_cst = SemanticTestAsm("x86_32", None, [input_name]) + testset+= test_x86_32_cst + testset += RegressionTest(["../example/expression/constant_propagation.py", "-s", bin_name, "0"], + depends=[test_x86_32_cst], + products=["%s.propag.dot" % bin_name]) + + + ## Degraph class TestDepgraph(RegressionTest): """Dependency graph test""" @@ -560,6 +577,11 @@ testset += ExampleExpression(["access_c.py", Example.get_sample("human.bin")], testset += ExampleExpression(["expr_c.py"], tags=[TAGS["cparser"]]) + +testset += ExampleExpression(["constant_propagation.py", + Example.get_sample("simple_test.bin"), "-s", "0"], + products=["%s.propag.dot" % Example.get_sample("simple_test.bin")]) + for script in [["basic_op.py"], ["basic_simplification.py"], ["simplification_tools.py"], |