diff options
| -rw-r--r-- | example/ida/ctype_propagation.py | 266 | ||||
| -rw-r--r-- | example/ida/rpyc_ida.py | 25 | ||||
| -rw-r--r-- | miasm2/core/ctypesmngr.py | 22 | ||||
| -rw-r--r-- | miasm2/expression/parser.py | 70 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 1 | ||||
| -rw-r--r-- | miasm2/ir/symbexec.py | 95 | ||||
| -rw-r--r-- | miasm2/ir/symbexec_top.py | 221 | ||||
| -rw-r--r-- | miasm2/ir/symbexec_types.py | 243 | ||||
| -rw-r--r-- | miasm2/jitter/emulatedsymbexec.py | 6 | ||||
| -rw-r--r-- | miasm2/jitter/llvmconvert.py | 25 | ||||
| -rw-r--r-- | test/arch/x86/unit/asm_test.py | 10 | ||||
| -rw-r--r-- | test/arch/x86/unit/mn_div.py | 17 | ||||
| -rw-r--r-- | test/expression/parser.py | 16 | ||||
| -rwxr-xr-x | test/test_all.py | 3 |
14 files changed, 1002 insertions, 18 deletions
diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py new file mode 100644 index 00000000..e9986522 --- /dev/null +++ b/example/ida/ctype_propagation.py @@ -0,0 +1,266 @@ +import os +import tempfile + +from miasm2.core.bin_stream_ida import bin_stream_ida +from miasm2.expression import expression as m2_expr + +from miasm2.expression.simplifications import expr_simp +from miasm2.analysis.depgraph import DependencyGraph +from miasm2.ir.ir import IRBlock, AssignBlock + +from utils import guess_machine + +from miasm2.arch.x86.ctype import CTypeAMD64_unk +from miasm2.expression.expression import ExprId + +from miasm2.core.objc import CTypesManagerNotPacked, CTypeAnalyzer, ExprToAccessC, CHandler +from miasm2.core.ctypesmngr import CAstTypes +from miasm2.expression.expression import ExprMem, ExprId, ExprInt, ExprOp, ExprAff +from miasm2.ir.symbexec_types import SymbExecCType + +from miasm2.expression.parser import str_to_expr + + +class TypePropagationForm(Form): + + def __init__(self, ira): + + self.ira = ira + self.stk_unalias_force = False + + default_types_info = r"""ExprId("RDX", 64): char * +""" + archs = ["AMD64_unk", "X86_32_unk"] + + Form.__init__(self, +r"""BUTTON YES* Launch +BUTTON CANCEL NONE +Dependency Graph Settings +<##Header file :{headerFile}> +<Architecture/complator:{cbReg}> +<Types informations:{strTypesInfo}> +<Unalias stack:{rUnaliasStack}>{cMethod}> +""", { + 'headerFile': Form.FileInput(swidth=20, open=True), + 'cbReg': Form.DropdownListControl( + items=archs, + readonly=False, + selval=archs[0]), + 'strTypesInfo': Form.MultiLineTextControl(text=default_types_info, + flags=Form.MultiLineTextControl.TXTF_FIXEDFONT), + 'cMethod': Form.ChkGroupControl(("rUnaliasStack",)), + }) + self.Compile() + + @property + def unalias_stack(self): + return self.cMethod.value & 1 or self.stk_unalias_force + + +def get_block(ir_arch, mdis, addr): + """Get IRBlock at address @addr""" + mdis.job_done.clear() + lbl = ir_arch.get_label(addr) + if not lbl in ir_arch.blocks: + block = mdis.dis_bloc(lbl.offset) + ir_arch.add_bloc(block) + irblock = ir_arch.get_bloc(lbl) + if irblock is None: + raise LookupError('No block found at that address: %s' % lbl) + return irblock + + +def get_types_mngr(headerFile): + text = open(headerFile).read() + base_types = CTypeAMD64_unk() + types_ast = CAstTypes() + + # Add C types definition + types_ast.add_c_decl(text) + + types_mngr = CTypesManagerNotPacked(types_ast, base_types) + return types_mngr + + + +# add stack alignment simpl +def simp_stack_align(expr_simp, expr): + if not expr.is_op("&"): + return expr + if len(expr.args) != 2: + return expr + if not expr.args[1].is_int(0xFFFFFFF0): + return expr + if expr.args[0] != ir_arch.arch.regs.ESP_init: + return expr + return expr.args[0] + +expr_simp.enable_passes({m2_expr.ExprOp: [simp_stack_align]}) + +# Init +machine = guess_machine() +mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira + +bs = bin_stream_ida() +mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) +ir_arch = ira(mdis.symbol_pool) + +# Get the current function +func = idaapi.get_func(ScreenEA()) +addr = func.startEA +blocks = mdis.dis_multibloc(addr) +# Generate IR +for block in blocks: + ir_arch.add_bloc(block) + + +class MyCTypeAnalyzer(CTypeAnalyzer): + allow_none_result = True + + +class MyExprToAccessC(ExprToAccessC): + allow_none_result = True + + +class MyCHandler(CHandler): + cTypeAnalyzer_cls = MyCTypeAnalyzer + exprToAccessC_cls = MyExprToAccessC + + +# Get settings +settings = TypePropagationForm(ir_arch) +ret = settings.Execute() +if ret: + + ir_arch = ir_arch + + + types_mngr = get_types_mngr(settings.headerFile.value) + mychandler = MyCHandler(types_mngr, {}) + + #print 'Expr', settings.iStr1.value + #print 'Type', settings.iStr2.value + #print 'Header', settings.headerFile.value + print 'InfoTypes', settings.strTypesInfo.value + infos_types = {} + for line in settings.strTypesInfo.value.split('\n'): + if not line: + continue + expr_str, ctype_str = line.split(':') + expr_str, ctype_str = expr_str.strip(), ctype_str.strip() + expr = str_to_expr(expr_str) + ast = mychandler.type_analyzer.types_mngr.types_ast.parse_c_type(ctype_str) + ctype = mychandler.type_analyzer.types_mngr.types_ast.ast_parse_declaration(ast.ext[0]) + objc = types_mngr.get_objc(ctype) + print '='*20 + print expr, objc + infos_types[expr] = objc + + # Add fake head + lbl_real_start = ir_arch.symbol_pool.getby_offset(addr) + lbl_head = ir_arch.symbol_pool.getby_name_create("start") + + first_block = blocks.label2block(lbl_real_start) + + assignblk_head = AssignBlock([ExprAff(ir_arch.IRDst, ExprId(lbl_real_start, ir_arch.IRDst.size)), + ExprAff(ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) + ], first_block.lines[0]) + irb_head = IRBlock(lbl_head, [assignblk_head]) + ir_arch.blocks[lbl_head] = irb_head + ir_arch.graph.add_uniq_edge(lbl_head, lbl_real_start) + + class Engine(SymbExecCType): + def __init__(self, state): + mychandler = MyCHandler(types_mngr, state.infos_types) + super(Engine, self).__init__(ir_arch, + state.symbols, + state.infos_types, + mychandler) + + + def add_state(todo, states, addr, state): + addr = ir_arch.get_label(addr) + if addr not in states: + states[addr] = state + todo.add(addr) + else: + todo.add(addr) + states[addr] = states[addr].merge(state) + + + state = Engine.StateEngine(infos_types, infos_types) + states = {lbl_head:state} + todo = set([lbl_head]) + done = set() + + while todo: + lbl = todo.pop() + state = states[lbl] + if (lbl, state) in done: + continue + done.add((lbl, state)) + symbexec_engine = Engine(state) + + get_block(ir_arch, mdis, lbl) + + addr = symbexec_engine.emul_ir_block(lbl) + symbexec_engine.del_mem_above_stack(ir_arch.sp) + + ir_arch._graph = None + sons = ir_arch.graph.successors(lbl) + for son in sons: + if son.offset is None: + continue + add_state(todo, states, son.offset, symbexec_engine.get_state()) + + + class SymbExecCTypeFix(SymbExecCType): + def emulbloc(self, irb, step=False): + """ + Symbolic execution of the @irb on the current state + @irb: irblock instance + @step: display intermediate steps + """ + offset2cmt = {} + for assignblk in irb.irs: + instr = assignblk.instr + tmp_rw = assignblk.get_rw() + for dst, src in assignblk.iteritems(): + for arg in set(instr.args).union(set([src])): + if arg in tmp_rw and arg not in tmp_rw.values(): + continue + objc = self.eval_expr(arg) + if objc is None: + continue + if self.is_type_offset(objc): + continue + offset2cmt.setdefault(instr.offset, set()).add("%s: %s" % (arg, str(objc))) + self.eval_ir(assignblk) + + for offset, value in offset2cmt.iteritems(): + idc.MakeComm(offset, '\n'.join(value)) + + return self.eval_expr(self.ir_arch.IRDst) + + + class Engine(SymbExecCTypeFix): + def __init__(self, state): + mychandler = MyCHandler(types_mngr, state.infos_types) + super(Engine, self).__init__(ir_arch, + state.symbols, + state.infos_types, + mychandler) + + + for lbl, state in states.iteritems(): + symbexec_engine = Engine(state) + addr = symbexec_engine.emul_ir_block(lbl) + symbexec_engine.del_mem_above_stack(ir_arch.sp) + + + + + + + + diff --git a/example/ida/rpyc_ida.py b/example/ida/rpyc_ida.py new file mode 100644 index 00000000..21faf43a --- /dev/null +++ b/example/ida/rpyc_ida.py @@ -0,0 +1,25 @@ +"""rpyc IDA server""" + +from rpyc.utils.server import OneShotServer +from rpyc.core import SlaveService + + + +def serve_threaded(hostname="localhost", port=4455): + """This will run a rpyc server in IDA, so a custom script client will be + able to access IDA api. + WARNING: IDA will be locked until the client script terminates. + """ + + print 'Running server' + server = OneShotServer(SlaveService, hostname=hostname, + port=port, reuse_addr=True, ipv6=False, + authenticator=None, + auto_register=False) + server.logger.quiet = False + + return server.start() + + +if __name__ == "__main__": + serve_threaded() diff --git a/miasm2/core/ctypesmngr.py b/miasm2/core/ctypesmngr.py index 0c1d55f4..eeffb696 100644 --- a/miasm2/core/ctypesmngr.py +++ b/miasm2/core/ctypesmngr.py @@ -8,14 +8,16 @@ RE_HASH_CMT = re.compile(r'^#\s*\d+.*$', flags=re.MULTILINE) # http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf -def c_to_ast(c_str): +def c_to_ast(parser, c_str): """Transform a @c_str into a C ast Note: will ignore lines containing code refs ie: # 23 "miasm.h" + + @parser: pycparser instance + @c_str: c string """ new_str = re.sub(RE_HASH_CMT, "", c_str) - parser = c_parser.CParser() return parser.parse(new_str, filename='<stdin>') @@ -301,6 +303,9 @@ class CAstTypes(object): self._typedefs = dict(knowntypedefs) self.cpt = 0 self.loc_to_decl_info = {} + self.parser = c_parser.CParser() + self._cpt_decl = 0 + self.ast_to_typeid_rules = { c_ast.Struct: self.ast_to_typeid_struct, @@ -458,7 +463,7 @@ class CAstTypes(object): """ c_str = self.digest_decl(c_str) - ast = c_to_ast(c_str) + ast = c_to_ast(self.parser, c_str) self.add_c_decl_from_ast(ast) return ast @@ -693,3 +698,14 @@ class CAstTypes(object): """ for ext in ast.ext: ret = self.ast_parse_declaration(ext) + + def parse_c_type(self, c_str): + """Parse a C string representing a C type and return the associated + Miasm C object. + @c_str: C string of a C type + """ + + new_str = "%s __MIASM_INTERNAL_%s;" % (c_str, self._cpt_decl) + ret = self.parser.cparser.parse(input=new_str, lexer=self.parser.clex) + self._cpt_decl += 1 + return ret diff --git a/miasm2/expression/parser.py b/miasm2/expression/parser.py new file mode 100644 index 00000000..b3f3af1c --- /dev/null +++ b/miasm2/expression/parser.py @@ -0,0 +1,70 @@ +import pyparsing +from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ + ExprCond, ExprCompose, ExprOp, ExprAff + +integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: + int(t[0])) +hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) +hex_int = pyparsing.Combine(hex_word).setParseAction(lambda t: + int(t[0], 16)) + +str_int_pos = (hex_int | integer) +str_int_neg = (pyparsing.Suppress('-') + \ + (hex_int | integer)).setParseAction(lambda t: -t[0]) + +str_int = str_int_pos | str_int_neg + +STR_EXPRINT = pyparsing.Suppress("ExprInt") +STR_EXPRID = pyparsing.Suppress("ExprId") +STR_EXPRSLICE = pyparsing.Suppress("ExprSlice") +STR_EXPRMEM = pyparsing.Suppress("ExprMem") +STR_EXPRCOND = pyparsing.Suppress("ExprCond") +STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose") +STR_EXPROP = pyparsing.Suppress("ExprOp") +STR_EXPRAFF = pyparsing.Suppress("ExprAff") + +STR_COMMA = pyparsing.Suppress(",") +LPARENTHESIS = pyparsing.Suppress("(") +RPARENTHESIS = pyparsing.Suppress(")") + + +string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\') +string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\') + + +string = string_quote | string_dquote + +expr = pyparsing.Forward() + +expr_int = pyparsing.Group(STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS) +expr_id = pyparsing.Group(STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS) +expr_slice = pyparsing.Group(STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS) +expr_mem = pyparsing.Group(STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS) +expr_cond = pyparsing.Group(STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS) +expr_compose = pyparsing.Group(STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) +expr_op = pyparsing.Group(STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) +expr_aff = pyparsing.Group(STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS) + +expr << (expr_int | expr_id | expr_slice | expr_mem | expr_cond | \ + expr_compose | expr_op | expr_aff) + +expr_int.setParseAction(lambda t: ExprInt(*t[0])) +expr_id.setParseAction(lambda t: ExprId(*t[0])) +expr_slice.setParseAction(lambda t: ExprSlice(*t[0])) +expr_mem.setParseAction(lambda t: ExprMem(*t[0])) +expr_cond.setParseAction(lambda t: ExprCond(*t[0])) +expr_compose.setParseAction(lambda t: ExprCompose(*t[0])) +expr_op.setParseAction(lambda t: ExprOp(*t[0])) +expr_aff.setParseAction(lambda t: ExprAff(*t[0])) + + +def str_to_expr(str_in): + """Parse the @str_in and return the corresponoding Expression + @str_in: repr string of an Expression""" + + try: + value = expr.parseString(str_in) + except: + raise RuntimeError("Cannot parse expression %s" % str_in) + assert len(value) == 1 + return value[0] diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 2509e901..7c39cf04 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -810,6 +810,7 @@ class IntermediateRepresentation(object): if (len(self.graph.predecessors(label)) == 0 and len(self.graph.successors(label)) == 0): self.graph.del_node(label) + del self.blocks[label] return modified def merge_blocks(self): diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index f9444424..e98744c0 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -97,19 +97,77 @@ class SymbolMngr(object): return new_symbols +class StateEngine(object): + """Stores an Engine state""" + + def merge(self, other): + """Generate a new state, representing the merge of self and @other + @other: a StateEngine instance""" + + raise NotImplementedError("Abstract method") + + +class SymbolicState(StateEngine): + """Stores a SymbolicExecutionEngine state""" + + def __init__(self, dct): + self._symbols = frozenset(dct.items()) + + def __hash__(self): + return hash((self.__class__, self._symbols)) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ != other.__class__: + return False + return self.symbols == other.symbols + + def __iter__(self): + for dst, src in self._symbols: + yield dst, src + + def iteritems(self): + return self.__iter__() + + def merge(self, other): + """Merge two symbolic states + Only equal expressions are kept in both states + @other: second symbolic state + """ + + symb_a = self.symbols + symb_b = other.symbols + intersection = set(symb_a.keys()).intersection(symb_b.keys()) + out = {} + for dst in intersection: + if symb_a[dst] == symb_b[dst]: + out[dst] = symb_a[dst] + return self.__class__(out) + + @property + def symbols(self): + """Return the dictionnary of known symbols""" + return dict(self._symbols) + + class SymbolicExecutionEngine(object): """ Symbolic execution engine Allow IR code emulation in symbolic domain """ - def __init__(self, ir_arch, known_symbols, + StateEngine = SymbolicState + + def __init__(self, ir_arch, state, func_read=None, func_write=None, sb_expr_simp=expr_simp): + self.symbols = SymbolMngr() - for expr, value in known_symbols.items(): - self.symbols[expr] = value + for dst, src in state.iteritems(): + self.symbols[dst] = src + self.func_read = func_read self.func_write = func_write self.ir_arch = ir_arch @@ -190,6 +248,18 @@ class SymbolicExecutionEngine(object): ret = self.expr_simp(self.symbols[ret][:size]) return ret + def get_state(self): + """Return the current state of the SymbolicEngine""" + state = self.StateEngine(dict(self.symbols)) + return state + + def set_state(self, state): + """Restaure the @state of the engine + @state: StateEngine instance + """ + self.symbols = SymbolMngr() + for dst, src in dict(state).iteritems(): + self.symbols[dst] = src def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): """ @@ -198,38 +268,40 @@ class SymbolicExecutionEngine(object): 2. simplify """ + expr = self.expr_simp(expr) + #print '\t'*level, "Eval:", expr if expr in cache: ret = cache[expr] #print "In cache!", ret - elif isinstance(expr, m2_expr.ExprInt): + elif expr.is_int(): return expr - elif isinstance(expr, m2_expr.ExprId): + elif expr.is_id(): if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None: ret = m2_expr.ExprInt(expr.name.offset, expr.size) else: ret = state.get(expr, expr) - elif isinstance(expr, m2_expr.ExprMem): + elif expr.is_mem(): ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) ret = m2_expr.ExprMem(ptr, expr.size) ret = self.get_mem_state(ret) assert expr.size == ret.size - elif isinstance(expr, m2_expr.ExprCond): + elif expr.is_cond(): cond = self.apply_expr_on_state_visit_cache(expr.cond, state, cache, level+1) src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level+1) src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level+1) ret = m2_expr.ExprCond(cond, src1, src2) - elif isinstance(expr, m2_expr.ExprSlice): + elif expr.is_slice(): arg = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) ret = m2_expr.ExprSlice(arg, expr.start, expr.stop) - elif isinstance(expr, m2_expr.ExprOp): + elif expr.is_op(): args = [] for oarg in expr.args: arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level+1) assert oarg.size == arg.size args.append(arg) ret = m2_expr.ExprOp(expr.op, *args) - elif isinstance(expr, m2_expr.ExprCompose): + elif expr.is_compose(): args = [] for arg in expr.args: args.append(self.apply_expr_on_state_visit_cache(arg, state, cache, level+1)) @@ -390,7 +462,7 @@ class SymbolicExecutionEngine(object): elif isinstance(dst, m2_expr.ExprId): pool_out[dst] = src else: - raise ValueError("affected zarb", str(dst)) + raise ValueError("Unknown destination type", str(dst)) return pool_out.iteritems() @@ -442,6 +514,7 @@ class SymbolicExecutionEngine(object): """ for assignblk in irb.irs: if step: + print 'Instr', assignblk.instr print 'Assignblk:' print assignblk print '_' * 80 diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py new file mode 100644 index 00000000..fe54c65f --- /dev/null +++ b/miasm2/ir/symbexec_top.py @@ -0,0 +1,221 @@ +from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine +from miasm2.expression.simplifications import expr_simp +from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ + ExprMem, ExprCond, ExprCompose, ExprOp +from miasm2.core import asmblock + + +TOPSTR = "TOP" + +def exprid_top(expr): + """Return a TOP expression (ExprId("TOP") of size @expr.size + @expr: expression to replace with TOP + """ + return ExprId(TOPSTR, expr.size) + + +class SymbolicStateTop(StateEngine): + + def __init__(self, dct, regstop): + self._symbols = frozenset(dct.items()) + self._regstop = frozenset(regstop) + + def __hash__(self): + return hash((self.__class__, self._symbols, self._regstop)) + + def __str__(self): + out = [] + for dst, src in sorted(self._symbols): + out.append("%s = %s" % (dst, src)) + for dst in self._regstop: + out.append('TOP %s' %dst) + return "\n".join(out) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ != other.__class__: + return False + return (self.symbols == other.symbols and + self.regstop == other.regstop) + + def __iter__(self): + for dst, src in self._symbols: + yield dst, src + + def merge(self, other): + """Merge two symbolic states + Only equal expressions are kept in both states + @other: second symbolic state + """ + symb_a = self.symbols + symb_b = other.symbols + intersection = set(symb_a.keys()).intersection(symb_b.keys()) + diff = set(symb_a.keys()).union(symb_b.keys()).difference(intersection) + symbols = {} + regstop = set() + for dst in diff: + if dst.is_id(): + regstop.add(dst) + for dst in intersection: + if symb_a[dst] == symb_b[dst]: + symbols[dst] = symb_a[dst] + else: + regstop.add(dst) + return self.__class__(symbols, regstop) + + @property + def symbols(self): + """Return the dictionnary of known symbols""" + return dict(self._symbols) + + @property + def regstop(self): + """Return the set of expression with TOP values""" + return self._regstop + +class SymbExecTopNoMem(SymbolicExecutionEngine): + """ + Symbolic execution, include TOP value. + ExprMem are not propagated. + Any computation involving a TOP will generate TOP. + """ + + StateEngine = SymbolicStateTop + + def __init__(self, ir_arch, state, regstop, + func_read=None, + func_write=None, + sb_expr_simp=expr_simp): + known_symbols = dict(state) + super(SymbExecTopNoMem, self).__init__(ir_arch, known_symbols, + func_read, + func_write, + sb_expr_simp) + self.regstop = set(regstop) + + def get_state(self): + """Return the current state of the SymbolicEngine""" + return self.StateEngine(self.symbols, self.regstop) + + def eval_expr(self, expr, eval_cache=None): + if expr in self.regstop: + return exprid_top(expr) + ret = self.apply_expr_on_state(expr, eval_cache) + return ret + + def manage_mem(self, expr, state, cache, level): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = ExprMem(ptr, expr.size) + ret = self.get_mem_state(ret) + if ret.is_mem() and not ret.arg.is_int() and ret.arg == ptr: + ret = exprid_top(expr) + assert expr.size == ret.size + return ret + + def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): + """ + Deep First evaluate nodes: + 1. evaluate node's sons + 2. simplify + """ + + if expr in cache: + ret = cache[expr] + elif expr in state: + return state[expr] + elif expr.is_int(): + ret = expr + elif expr.is_id(): + if isinstance(expr.name, asmblock.asm_label) and expr.name.offset is not None: + ret = ExprInt(expr.name.offset, expr.size) + elif expr in self.regstop: + ret = exprid_top(expr) + else: + ret = state.get(expr, expr) + elif expr.is_mem(): + ret = self.manage_mem(expr, state, cache, level) + elif expr.is_cond(): + cond = self.apply_expr_on_state_visit_cache(expr.cond, state, cache, level+1) + src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level+1) + src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level+1) + if cond.is_id(TOPSTR) or src1.is_id(TOPSTR) or src2.is_id(TOPSTR): + ret = exprid_top(expr) + else: + ret = ExprCond(cond, src1, src2) + elif expr.is_slice(): + arg = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + if arg.is_id(TOPSTR): + ret = exprid_top(expr) + else: + ret = ExprSlice(arg, expr.start, expr.stop) + elif expr.is_op(): + args = [] + for oarg in expr.args: + arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level+1) + assert oarg.size == arg.size + if arg.is_id(TOPSTR): + return exprid_top(expr) + args.append(arg) + ret = ExprOp(expr.op, *args) + elif expr.is_compose(): + args = [] + for arg in expr.args: + arg = self.apply_expr_on_state_visit_cache(arg, state, cache, level+1) + if arg.is_id(TOPSTR): + return exprid_top(expr) + + args.append(arg) + ret = ExprCompose(*args) + else: + raise TypeError("Unknown expr type") + ret = self.expr_simp(ret) + assert expr.size == ret.size + cache[expr] = ret + return ret + + def apply_change(self, dst, src): + eval_cache = {} + if dst.is_mem(): + # If Write to TOP, forget all memory information + ret = self.eval_expr(dst.arg, eval_cache) + if ret.is_id(TOPSTR): + to_del = set() + for dst_tmp in self.symbols: + if dst_tmp.is_mem(): + to_del.add(dst_tmp) + for dst_to_del in to_del: + del self.symbols[dst_to_del] + return + src_o = self.expr_simp(src) + + # Force update. Ex: + # EBX += 1 (state: EBX = EBX+1) + # EBX -= 1 (state: EBX = EBX, must be updated) + if dst in self.regstop: + self.regstop.discard(dst) + self.symbols[dst] = src_o + + if dst == src_o: + # Avoid useless X = X information + del self.symbols[dst] + + if src_o.is_id(TOPSTR): + if dst in self.symbols: + del self.symbols[dst] + self.regstop.add(dst) + +class SymbExecTop(SymbExecTopNoMem): + """ + Symbolic execution, include TOP value. + ExprMem are propagated. + Any computation involving a TOP will generate TOP. + WARNING: avoid memory aliases here! + """ + + def manage_mem(self, expr, state, cache, level): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = ExprMem(ptr, expr.size) + ret = self.get_mem_state(ret) + assert expr.size == ret.size + return ret diff --git a/miasm2/ir/symbexec_types.py b/miasm2/ir/symbexec_types.py new file mode 100644 index 00000000..df159939 --- /dev/null +++ b/miasm2/ir/symbexec_types.py @@ -0,0 +1,243 @@ +from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine +from miasm2.expression.simplifications import expr_simp +from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ + ExprMem, ExprCond, ExprCompose, ExprOp + +from miasm2.core.ctypesmngr import CTypeId + + +class SymbolicStateCTypes(StateEngine): + """Store C types of symbols""" + + def __init__(self, dct, infos_types): + self._symbols = frozenset(dct.items()) + self._infos_types = frozenset(infos_types.items()) + + def __hash__(self): + return hash((self.__class__, self._symbols, self._infos_types)) + + def __str__(self): + out = [] + for dst, src in sorted(self._symbols): + out.append("%s = %s" % (dst, src)) + return "\n".join(out) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ != other.__class__: + return False + return (self.symbols == other.symbols and + self.infos_types == other.infos_types) + + def __iter__(self): + for dst, src in self._symbols: + yield dst, src + + def merge(self, other): + """Merge two symbolic states + Only expressions with equal C types in both states are kept. + @other: second symbolic state + """ + symb_a = self.symbols + symb_b = other.symbols + types_a = set(self.infos_types.items()) + types_b = set(other.infos_types.items()) + intersection = set(symb_a.keys()).intersection(symb_b.keys()) + symbols = {} + infos_types = dict(types_a.intersection(types_b)) + for dst in intersection: + if symb_a[dst] == symb_b[dst]: + symbols[dst] = symb_a[dst] + return self.__class__(symbols, infos_types) + + @property + def symbols(self): + """Return the dictionnary of known symbols'types""" + return dict(self._symbols) + + @property + def infos_types(self): + """Return known types of the state""" + return dict(self._infos_types) + + +class SymbExecCType(SymbolicExecutionEngine): + """Engine of C types propagation + WARNING: avoid memory aliases here! + """ + + StateEngine = SymbolicStateCTypes + OBJC_INTERNAL = "___OBJC___" + + def __init__(self, ir_arch, + symbols, infos_types, + chandler, + func_read=None, + func_write=None, + sb_expr_simp=expr_simp): + self.chandler = chandler + self.infos_types = dict(infos_types) + super(SymbExecCType, self).__init__(ir_arch, + {}, + func_read, + func_write, + sb_expr_simp) + self.symbols = dict(symbols) + offset_types = [] + for name in [('int',), ('long',), + ('long', 'long'), + ('char',), ('short',), + + ('unsigned', 'char',), ('unsigned', 'short',), + ('unsigned', 'int',), ('unsigned', 'long',), + ('unsigned', 'long', 'long')]: + objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId(*name)) + offset_types.append(objc) + self.offset_types = offset_types + + def is_type_offset(self, objc): + """Return True if @objc is char/short/int/long""" + return objc in self.offset_types + + def get_tpye_int_by_size(self, size): + """Return a char/short/int/long type with the size equal to @size + @size: size in bit""" + + for objc in self.offset_types: + if objc.size == size / 8: + return objc + return None + + def is_offset_list(self, types, size): + """Return the corresponding char/short/int/long type of @size, if every + types in the list @types are type offset + @types: a list of c types + @size: size in bit""" + + for arg_type in types: + if not self.is_type_offset(arg_type): + return None + objc = self.get_tpye_int_by_size(size) + if objc: + return objc + # default size + objc = self.offset_types[0] + return objc + + def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): + """ + Deep First evaluate nodes: + 1. evaluate node's sons + 2. simplify + """ + + expr = self.expr_simp(expr) + + if expr in cache: + return cache[expr] + elif expr in state: + return state[expr] + elif isinstance(expr, ExprInt): + objc = self.get_tpye_int_by_size(expr.size) + if objc is None: + objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId('int')) + return objc + elif isinstance(expr, ExprId): + if expr in state: + return state[expr] + return None + elif isinstance(expr, ExprMem): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level + 1) + if ptr is None: + return None + self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = ptr + ptr_expr = ExprId(self.OBJC_INTERNAL, expr.arg.size) + objcs = self.chandler.expr_to_types(ExprMem(ptr_expr, expr.size)) + if objcs is None: + return None + objc = objcs[0] + return objc + elif isinstance(expr, ExprCond): + src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level + 1) + src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level + 1) + types = [src1, src2] + objc = self.is_offset_list(types, expr.size) + if objc: + return objc + return None + elif isinstance(expr, ExprSlice): + objc = self.get_tpye_int_by_size(expr.size) + if objc is None: + # default size + objc = self.offset_types[0] + return objc + elif isinstance(expr, ExprOp): + args = [] + types = [] + for oarg in expr.args: + arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1) + types.append(arg) + if None in types: + return None + objc = self.is_offset_list(types, expr.size) + if objc: + return objc + # Find Base + int + if expr.op != '+': + return None + args = list(expr.args) + if args[-1].is_int(): + offset = args.pop() + types.pop() + if len(args) == 1: + arg, arg_type = args.pop(), types.pop() + self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = arg_type + ptr_expr = ExprId(self.OBJC_INTERNAL, arg.size) + objc = self.chandler.expr_to_types(ptr_expr + offset) + objc = objc[0] + return objc + return None + elif isinstance(expr, ExprCompose): + types = set() + for oarg in expr.args: + arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1) + types.add(arg) + objc = self.is_offset_list(types, expr.size) + if objc: + return objc + return None + else: + raise TypeError("Unknown expr type") + + def get_state(self): + """Return the current state of the SymbolicEngine""" + return self.StateEngine(self.symbols, self.infos_types) + + def eval_ir_expr(self, assignblk): + """ + Evaluate AssignBlock on the current state + @assignblk: AssignBlock instance + """ + pool_out = {} + eval_cache = {} + for dst, src in assignblk.iteritems(): + src = self.eval_expr(src, eval_cache) + if isinstance(dst, ExprMem): + continue + elif isinstance(dst, ExprId): + pool_out[dst] = src + else: + raise ValueError("affected zarb", str(dst)) + return pool_out.iteritems() + + def apply_change(self, dst, src): + objc = src + if objc is None and dst in self.symbols: + del self.symbols[dst] + else: + self.symbols[dst] = objc + + def del_mem_above_stack(self, stack_ptr): + """No stack deletion""" + return diff --git a/miasm2/jitter/emulatedsymbexec.py b/miasm2/jitter/emulatedsymbexec.py index d4a67fe8..97f038dc 100644 --- a/miasm2/jitter/emulatedsymbexec.py +++ b/miasm2/jitter/emulatedsymbexec.py @@ -105,6 +105,8 @@ class EmulatedSymbExec(SymbolicExecutionEngine): """Handle 'segm' operation""" if not expr.is_op_segm(): return expr + if not expr.args[0].is_int(): + return expr segm_nb = int(expr.args[0]) segmaddr = self.cpu.get_segm_base(segm_nb) return e_s(m2_expr.ExprInt(segmaddr, expr.size) + expr.args[1]) @@ -114,7 +116,9 @@ class EmulatedSymbExec(SymbolicExecutionEngine): if expr.op != "cpuid": return expr - a, reg_num = (int(x) for x in expr.args) + if any(not arg.is_int() for arg in expr.args): + return expr + a, reg_num = (int(arg) for arg in expr.args) # Not found error is keeped on purpose return m2_expr.ExprInt(self.cpuid[a][reg_num], expr.size) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index ed55aff8..cf6dea31 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -135,13 +135,31 @@ class LLVMContext_JIT(LLVMContext): self.library_filenames = library_filenames self.ir_arch = ir_arch self.arch_specific() + self.load_libraries() LLVMContext.__init__(self, name) self.vmcpu = {} - def new_module(self, name="mod"): - LLVMContext.new_module(self, name) + def load_libraries(self): + # Get LLVM specific functions + name = "libLLVM-%d.%d" % (llvm.llvm_version_info[0], + llvm.llvm_version_info[1], + ) + try: + # On Windows, no need to add ".dll" + self.add_shared_library(name) + except RuntimeError: + try: + # On Linux, ".so" is needed + self.add_shared_library("%s.so" % name) + except RuntimeError: + pass + + # Load additional libraries for lib_fname in self.library_filenames: self.add_shared_library(lib_fname) + + def new_module(self, name="mod"): + LLVMContext.new_module(self, name) self.add_memlookups() self.add_get_exceptionflag() self.add_op() @@ -512,7 +530,8 @@ class LLVMFunction(): Get or create a (LLVM module-)global constant with *name* or *value*. """ - module = self.mod + if name in self.mod.globals: + return self.mod.globals[name] data = llvm_ir.GlobalVariable(self.mod, value.type, name=name) data.global_constant = True data.initializer = value diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index aba47df1..8a6b215c 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -90,3 +90,13 @@ class Asm_Test_16(Asm_Test): self.myjit.vm.add_memory_page(self.run_addr, PAGE_READ | PAGE_WRITE, self.assembly) self.myjit.push_uint16_t(self.ret_addr) self.myjit.add_breakpoint(self.ret_addr, lambda x:False) + +class Asm_Test_64(Asm_Test): + arch_name = "x86_64" + arch_attrib = 64 + ret_addr = 0x1337beef + + def init_machine(self): + self.myjit.vm.add_memory_page(self.run_addr, PAGE_READ | PAGE_WRITE, self.assembly) + self.myjit.push_uint64_t(self.ret_addr) + self.myjit.add_breakpoint(self.ret_addr, lambda x:False) diff --git a/test/arch/x86/unit/mn_div.py b/test/arch/x86/unit/mn_div.py new file mode 100644 index 00000000..84569607 --- /dev/null +++ b/test/arch/x86/unit/mn_div.py @@ -0,0 +1,17 @@ +import sys +from asm_test import Asm_Test_64 + +class Test_DIV(Asm_Test_64): + TXT = ''' +main: + MOV RAX, 0x8877665544332211 + MOV RBX, 0x11223344556677 + DIV RBX + RET + ''' + def check(self): + assert self.myjit.cpu.RAX == 0x7F7 + assert self.myjit.cpu.RDX == 0x440 + +if __name__ == "__main__": + [test(*sys.argv[1:])() for test in [Test_DIV]] diff --git a/test/expression/parser.py b/test/expression/parser.py new file mode 100644 index 00000000..9c01c8a1 --- /dev/null +++ b/test/expression/parser.py @@ -0,0 +1,16 @@ +from miasm2.expression.parser import str_to_expr +from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ + ExprCond, ExprCompose, ExprOp, ExprAff + +for expr_test in [ExprInt(0x12, 32), + ExprId('test', 32), + ExprSlice(ExprInt(0x10, 32), 0, 8), + ExprMem(ExprInt(0x10, 32), 32), + ExprCond(ExprInt(0x10, 32), ExprInt(0x11, 32), ExprInt(0x12, 32)), + ExprCompose(ExprInt(0x10, 16), ExprInt(0x11, 8), ExprInt(0x12, 8)), + ExprInt(0x11, 8) + ExprInt(0x12, 8), + ExprAff(ExprId('EAX', 32), ExprInt(0x12, 32)), + ]: + + print 'Test: %s' % expr_test + assert str_to_expr(repr(expr_test)) == expr_test diff --git a/test/test_all.py b/test/test_all.py index 41954ff1..3da2dbb5 100755 --- a/test/test_all.py +++ b/test/test_all.py @@ -60,6 +60,7 @@ class ArchUnitTest(RegressionTest): # script -> blacklisted jitter blacklist = { "x86/unit/mn_float.py": ["python", "llvm"], + "x86/unit/mn_div.py": ["tcc", "gcc"], } for script in ["x86/sem.py", "x86/unit/mn_strings.py", @@ -80,6 +81,7 @@ for script in ["x86/sem.py", "x86/unit/mn_pushpop.py", "x86/unit/mn_seh.py", "x86/unit/mn_cpuid.py", + "x86/unit/mn_div.py", "arm/arch.py", "arm/sem.py", "aarch64/unit/mn_ubfm.py", @@ -241,6 +243,7 @@ for script in ["modint.py", "simplifications.py", "expression_helper.py", "expr_pickle.py", + "parser.py", ]: testset += RegressionTest([script], base_dir="expression") |