diff options
Diffstat (limited to 'miasm2')
| -rw-r--r-- | miasm2/core/ctypesmngr.py | 22 | ||||
| -rw-r--r-- | miasm2/expression/parser.py | 70 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 1 | ||||
| -rw-r--r-- | miasm2/ir/symbexec.py | 95 | ||||
| -rw-r--r-- | miasm2/ir/symbexec_top.py | 221 | ||||
| -rw-r--r-- | miasm2/ir/symbexec_types.py | 243 | ||||
| -rw-r--r-- | miasm2/jitter/emulatedsymbexec.py | 6 | ||||
| -rw-r--r-- | miasm2/jitter/llvmconvert.py | 25 |
8 files changed, 665 insertions, 18 deletions
diff --git a/miasm2/core/ctypesmngr.py b/miasm2/core/ctypesmngr.py index 0c1d55f4..eeffb696 100644 --- a/miasm2/core/ctypesmngr.py +++ b/miasm2/core/ctypesmngr.py @@ -8,14 +8,16 @@ RE_HASH_CMT = re.compile(r'^#\s*\d+.*$', flags=re.MULTILINE) # http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf -def c_to_ast(c_str): +def c_to_ast(parser, c_str): """Transform a @c_str into a C ast Note: will ignore lines containing code refs ie: # 23 "miasm.h" + + @parser: pycparser instance + @c_str: c string """ new_str = re.sub(RE_HASH_CMT, "", c_str) - parser = c_parser.CParser() return parser.parse(new_str, filename='<stdin>') @@ -301,6 +303,9 @@ class CAstTypes(object): self._typedefs = dict(knowntypedefs) self.cpt = 0 self.loc_to_decl_info = {} + self.parser = c_parser.CParser() + self._cpt_decl = 0 + self.ast_to_typeid_rules = { c_ast.Struct: self.ast_to_typeid_struct, @@ -458,7 +463,7 @@ class CAstTypes(object): """ c_str = self.digest_decl(c_str) - ast = c_to_ast(c_str) + ast = c_to_ast(self.parser, c_str) self.add_c_decl_from_ast(ast) return ast @@ -693,3 +698,14 @@ class CAstTypes(object): """ for ext in ast.ext: ret = self.ast_parse_declaration(ext) + + def parse_c_type(self, c_str): + """Parse a C string representing a C type and return the associated + Miasm C object. + @c_str: C string of a C type + """ + + new_str = "%s __MIASM_INTERNAL_%s;" % (c_str, self._cpt_decl) + ret = self.parser.cparser.parse(input=new_str, lexer=self.parser.clex) + self._cpt_decl += 1 + return ret diff --git a/miasm2/expression/parser.py b/miasm2/expression/parser.py new file mode 100644 index 00000000..b3f3af1c --- /dev/null +++ b/miasm2/expression/parser.py @@ -0,0 +1,70 @@ +import pyparsing +from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ + ExprCond, ExprCompose, ExprOp, ExprAff + +integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: + int(t[0])) +hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) +hex_int = pyparsing.Combine(hex_word).setParseAction(lambda t: + int(t[0], 16)) + +str_int_pos = (hex_int | integer) +str_int_neg = (pyparsing.Suppress('-') + \ + (hex_int | integer)).setParseAction(lambda t: -t[0]) + +str_int = str_int_pos | str_int_neg + +STR_EXPRINT = pyparsing.Suppress("ExprInt") +STR_EXPRID = pyparsing.Suppress("ExprId") +STR_EXPRSLICE = pyparsing.Suppress("ExprSlice") +STR_EXPRMEM = pyparsing.Suppress("ExprMem") +STR_EXPRCOND = pyparsing.Suppress("ExprCond") +STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose") +STR_EXPROP = pyparsing.Suppress("ExprOp") +STR_EXPRAFF = pyparsing.Suppress("ExprAff") + +STR_COMMA = pyparsing.Suppress(",") +LPARENTHESIS = pyparsing.Suppress("(") +RPARENTHESIS = pyparsing.Suppress(")") + + +string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\') +string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\') + + +string = string_quote | string_dquote + +expr = pyparsing.Forward() + +expr_int = pyparsing.Group(STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS) +expr_id = pyparsing.Group(STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS) +expr_slice = pyparsing.Group(STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS) +expr_mem = pyparsing.Group(STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS) +expr_cond = pyparsing.Group(STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS) +expr_compose = pyparsing.Group(STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) +expr_op = pyparsing.Group(STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) +expr_aff = pyparsing.Group(STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS) + +expr << (expr_int | expr_id | expr_slice | expr_mem | expr_cond | \ + expr_compose | expr_op | expr_aff) + +expr_int.setParseAction(lambda t: ExprInt(*t[0])) +expr_id.setParseAction(lambda t: ExprId(*t[0])) +expr_slice.setParseAction(lambda t: ExprSlice(*t[0])) +expr_mem.setParseAction(lambda t: ExprMem(*t[0])) +expr_cond.setParseAction(lambda t: ExprCond(*t[0])) +expr_compose.setParseAction(lambda t: ExprCompose(*t[0])) +expr_op.setParseAction(lambda t: ExprOp(*t[0])) +expr_aff.setParseAction(lambda t: ExprAff(*t[0])) + + +def str_to_expr(str_in): + """Parse the @str_in and return the corresponoding Expression + @str_in: repr string of an Expression""" + + try: + value = expr.parseString(str_in) + except: + raise RuntimeError("Cannot parse expression %s" % str_in) + assert len(value) == 1 + return value[0] diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 2509e901..7c39cf04 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -810,6 +810,7 @@ class IntermediateRepresentation(object): if (len(self.graph.predecessors(label)) == 0 and len(self.graph.successors(label)) == 0): self.graph.del_node(label) + del self.blocks[label] return modified def merge_blocks(self): diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index f9444424..e98744c0 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -97,19 +97,77 @@ class SymbolMngr(object): return new_symbols +class StateEngine(object): + """Stores an Engine state""" + + def merge(self, other): + """Generate a new state, representing the merge of self and @other + @other: a StateEngine instance""" + + raise NotImplementedError("Abstract method") + + +class SymbolicState(StateEngine): + """Stores a SymbolicExecutionEngine state""" + + def __init__(self, dct): + self._symbols = frozenset(dct.items()) + + def __hash__(self): + return hash((self.__class__, self._symbols)) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ != other.__class__: + return False + return self.symbols == other.symbols + + def __iter__(self): + for dst, src in self._symbols: + yield dst, src + + def iteritems(self): + return self.__iter__() + + def merge(self, other): + """Merge two symbolic states + Only equal expressions are kept in both states + @other: second symbolic state + """ + + symb_a = self.symbols + symb_b = other.symbols + intersection = set(symb_a.keys()).intersection(symb_b.keys()) + out = {} + for dst in intersection: + if symb_a[dst] == symb_b[dst]: + out[dst] = symb_a[dst] + return self.__class__(out) + + @property + def symbols(self): + """Return the dictionnary of known symbols""" + return dict(self._symbols) + + class SymbolicExecutionEngine(object): """ Symbolic execution engine Allow IR code emulation in symbolic domain """ - def __init__(self, ir_arch, known_symbols, + StateEngine = SymbolicState + + def __init__(self, ir_arch, state, func_read=None, func_write=None, sb_expr_simp=expr_simp): + self.symbols = SymbolMngr() - for expr, value in known_symbols.items(): - self.symbols[expr] = value + for dst, src in state.iteritems(): + self.symbols[dst] = src + self.func_read = func_read self.func_write = func_write self.ir_arch = ir_arch @@ -190,6 +248,18 @@ class SymbolicExecutionEngine(object): ret = self.expr_simp(self.symbols[ret][:size]) return ret + def get_state(self): + """Return the current state of the SymbolicEngine""" + state = self.StateEngine(dict(self.symbols)) + return state + + def set_state(self, state): + """Restaure the @state of the engine + @state: StateEngine instance + """ + self.symbols = SymbolMngr() + for dst, src in dict(state).iteritems(): + self.symbols[dst] = src def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): """ @@ -198,38 +268,40 @@ class SymbolicExecutionEngine(object): 2. simplify """ + expr = self.expr_simp(expr) + #print '\t'*level, "Eval:", expr if expr in cache: ret = cache[expr] #print "In cache!", ret - elif isinstance(expr, m2_expr.ExprInt): + elif expr.is_int(): return expr - elif isinstance(expr, m2_expr.ExprId): + elif expr.is_id(): if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None: ret = m2_expr.ExprInt(expr.name.offset, expr.size) else: ret = state.get(expr, expr) - elif isinstance(expr, m2_expr.ExprMem): + elif expr.is_mem(): ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) ret = m2_expr.ExprMem(ptr, expr.size) ret = self.get_mem_state(ret) assert expr.size == ret.size - elif isinstance(expr, m2_expr.ExprCond): + elif expr.is_cond(): cond = self.apply_expr_on_state_visit_cache(expr.cond, state, cache, level+1) src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level+1) src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level+1) ret = m2_expr.ExprCond(cond, src1, src2) - elif isinstance(expr, m2_expr.ExprSlice): + elif expr.is_slice(): arg = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) ret = m2_expr.ExprSlice(arg, expr.start, expr.stop) - elif isinstance(expr, m2_expr.ExprOp): + elif expr.is_op(): args = [] for oarg in expr.args: arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level+1) assert oarg.size == arg.size args.append(arg) ret = m2_expr.ExprOp(expr.op, *args) - elif isinstance(expr, m2_expr.ExprCompose): + elif expr.is_compose(): args = [] for arg in expr.args: args.append(self.apply_expr_on_state_visit_cache(arg, state, cache, level+1)) @@ -390,7 +462,7 @@ class SymbolicExecutionEngine(object): elif isinstance(dst, m2_expr.ExprId): pool_out[dst] = src else: - raise ValueError("affected zarb", str(dst)) + raise ValueError("Unknown destination type", str(dst)) return pool_out.iteritems() @@ -442,6 +514,7 @@ class SymbolicExecutionEngine(object): """ for assignblk in irb.irs: if step: + print 'Instr', assignblk.instr print 'Assignblk:' print assignblk print '_' * 80 diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py new file mode 100644 index 00000000..fe54c65f --- /dev/null +++ b/miasm2/ir/symbexec_top.py @@ -0,0 +1,221 @@ +from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine +from miasm2.expression.simplifications import expr_simp +from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ + ExprMem, ExprCond, ExprCompose, ExprOp +from miasm2.core import asmblock + + +TOPSTR = "TOP" + +def exprid_top(expr): + """Return a TOP expression (ExprId("TOP") of size @expr.size + @expr: expression to replace with TOP + """ + return ExprId(TOPSTR, expr.size) + + +class SymbolicStateTop(StateEngine): + + def __init__(self, dct, regstop): + self._symbols = frozenset(dct.items()) + self._regstop = frozenset(regstop) + + def __hash__(self): + return hash((self.__class__, self._symbols, self._regstop)) + + def __str__(self): + out = [] + for dst, src in sorted(self._symbols): + out.append("%s = %s" % (dst, src)) + for dst in self._regstop: + out.append('TOP %s' %dst) + return "\n".join(out) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ != other.__class__: + return False + return (self.symbols == other.symbols and + self.regstop == other.regstop) + + def __iter__(self): + for dst, src in self._symbols: + yield dst, src + + def merge(self, other): + """Merge two symbolic states + Only equal expressions are kept in both states + @other: second symbolic state + """ + symb_a = self.symbols + symb_b = other.symbols + intersection = set(symb_a.keys()).intersection(symb_b.keys()) + diff = set(symb_a.keys()).union(symb_b.keys()).difference(intersection) + symbols = {} + regstop = set() + for dst in diff: + if dst.is_id(): + regstop.add(dst) + for dst in intersection: + if symb_a[dst] == symb_b[dst]: + symbols[dst] = symb_a[dst] + else: + regstop.add(dst) + return self.__class__(symbols, regstop) + + @property + def symbols(self): + """Return the dictionnary of known symbols""" + return dict(self._symbols) + + @property + def regstop(self): + """Return the set of expression with TOP values""" + return self._regstop + +class SymbExecTopNoMem(SymbolicExecutionEngine): + """ + Symbolic execution, include TOP value. + ExprMem are not propagated. + Any computation involving a TOP will generate TOP. + """ + + StateEngine = SymbolicStateTop + + def __init__(self, ir_arch, state, regstop, + func_read=None, + func_write=None, + sb_expr_simp=expr_simp): + known_symbols = dict(state) + super(SymbExecTopNoMem, self).__init__(ir_arch, known_symbols, + func_read, + func_write, + sb_expr_simp) + self.regstop = set(regstop) + + def get_state(self): + """Return the current state of the SymbolicEngine""" + return self.StateEngine(self.symbols, self.regstop) + + def eval_expr(self, expr, eval_cache=None): + if expr in self.regstop: + return exprid_top(expr) + ret = self.apply_expr_on_state(expr, eval_cache) + return ret + + def manage_mem(self, expr, state, cache, level): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = ExprMem(ptr, expr.size) + ret = self.get_mem_state(ret) + if ret.is_mem() and not ret.arg.is_int() and ret.arg == ptr: + ret = exprid_top(expr) + assert expr.size == ret.size + return ret + + def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): + """ + Deep First evaluate nodes: + 1. evaluate node's sons + 2. simplify + """ + + if expr in cache: + ret = cache[expr] + elif expr in state: + return state[expr] + elif expr.is_int(): + ret = expr + elif expr.is_id(): + if isinstance(expr.name, asmblock.asm_label) and expr.name.offset is not None: + ret = ExprInt(expr.name.offset, expr.size) + elif expr in self.regstop: + ret = exprid_top(expr) + else: + ret = state.get(expr, expr) + elif expr.is_mem(): + ret = self.manage_mem(expr, state, cache, level) + elif expr.is_cond(): + cond = self.apply_expr_on_state_visit_cache(expr.cond, state, cache, level+1) + src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level+1) + src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level+1) + if cond.is_id(TOPSTR) or src1.is_id(TOPSTR) or src2.is_id(TOPSTR): + ret = exprid_top(expr) + else: + ret = ExprCond(cond, src1, src2) + elif expr.is_slice(): + arg = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + if arg.is_id(TOPSTR): + ret = exprid_top(expr) + else: + ret = ExprSlice(arg, expr.start, expr.stop) + elif expr.is_op(): + args = [] + for oarg in expr.args: + arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level+1) + assert oarg.size == arg.size + if arg.is_id(TOPSTR): + return exprid_top(expr) + args.append(arg) + ret = ExprOp(expr.op, *args) + elif expr.is_compose(): + args = [] + for arg in expr.args: + arg = self.apply_expr_on_state_visit_cache(arg, state, cache, level+1) + if arg.is_id(TOPSTR): + return exprid_top(expr) + + args.append(arg) + ret = ExprCompose(*args) + else: + raise TypeError("Unknown expr type") + ret = self.expr_simp(ret) + assert expr.size == ret.size + cache[expr] = ret + return ret + + def apply_change(self, dst, src): + eval_cache = {} + if dst.is_mem(): + # If Write to TOP, forget all memory information + ret = self.eval_expr(dst.arg, eval_cache) + if ret.is_id(TOPSTR): + to_del = set() + for dst_tmp in self.symbols: + if dst_tmp.is_mem(): + to_del.add(dst_tmp) + for dst_to_del in to_del: + del self.symbols[dst_to_del] + return + src_o = self.expr_simp(src) + + # Force update. Ex: + # EBX += 1 (state: EBX = EBX+1) + # EBX -= 1 (state: EBX = EBX, must be updated) + if dst in self.regstop: + self.regstop.discard(dst) + self.symbols[dst] = src_o + + if dst == src_o: + # Avoid useless X = X information + del self.symbols[dst] + + if src_o.is_id(TOPSTR): + if dst in self.symbols: + del self.symbols[dst] + self.regstop.add(dst) + +class SymbExecTop(SymbExecTopNoMem): + """ + Symbolic execution, include TOP value. + ExprMem are propagated. + Any computation involving a TOP will generate TOP. + WARNING: avoid memory aliases here! + """ + + def manage_mem(self, expr, state, cache, level): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = ExprMem(ptr, expr.size) + ret = self.get_mem_state(ret) + assert expr.size == ret.size + return ret diff --git a/miasm2/ir/symbexec_types.py b/miasm2/ir/symbexec_types.py new file mode 100644 index 00000000..df159939 --- /dev/null +++ b/miasm2/ir/symbexec_types.py @@ -0,0 +1,243 @@ +from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine +from miasm2.expression.simplifications import expr_simp +from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ + ExprMem, ExprCond, ExprCompose, ExprOp + +from miasm2.core.ctypesmngr import CTypeId + + +class SymbolicStateCTypes(StateEngine): + """Store C types of symbols""" + + def __init__(self, dct, infos_types): + self._symbols = frozenset(dct.items()) + self._infos_types = frozenset(infos_types.items()) + + def __hash__(self): + return hash((self.__class__, self._symbols, self._infos_types)) + + def __str__(self): + out = [] + for dst, src in sorted(self._symbols): + out.append("%s = %s" % (dst, src)) + return "\n".join(out) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ != other.__class__: + return False + return (self.symbols == other.symbols and + self.infos_types == other.infos_types) + + def __iter__(self): + for dst, src in self._symbols: + yield dst, src + + def merge(self, other): + """Merge two symbolic states + Only expressions with equal C types in both states are kept. + @other: second symbolic state + """ + symb_a = self.symbols + symb_b = other.symbols + types_a = set(self.infos_types.items()) + types_b = set(other.infos_types.items()) + intersection = set(symb_a.keys()).intersection(symb_b.keys()) + symbols = {} + infos_types = dict(types_a.intersection(types_b)) + for dst in intersection: + if symb_a[dst] == symb_b[dst]: + symbols[dst] = symb_a[dst] + return self.__class__(symbols, infos_types) + + @property + def symbols(self): + """Return the dictionnary of known symbols'types""" + return dict(self._symbols) + + @property + def infos_types(self): + """Return known types of the state""" + return dict(self._infos_types) + + +class SymbExecCType(SymbolicExecutionEngine): + """Engine of C types propagation + WARNING: avoid memory aliases here! + """ + + StateEngine = SymbolicStateCTypes + OBJC_INTERNAL = "___OBJC___" + + def __init__(self, ir_arch, + symbols, infos_types, + chandler, + func_read=None, + func_write=None, + sb_expr_simp=expr_simp): + self.chandler = chandler + self.infos_types = dict(infos_types) + super(SymbExecCType, self).__init__(ir_arch, + {}, + func_read, + func_write, + sb_expr_simp) + self.symbols = dict(symbols) + offset_types = [] + for name in [('int',), ('long',), + ('long', 'long'), + ('char',), ('short',), + + ('unsigned', 'char',), ('unsigned', 'short',), + ('unsigned', 'int',), ('unsigned', 'long',), + ('unsigned', 'long', 'long')]: + objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId(*name)) + offset_types.append(objc) + self.offset_types = offset_types + + def is_type_offset(self, objc): + """Return True if @objc is char/short/int/long""" + return objc in self.offset_types + + def get_tpye_int_by_size(self, size): + """Return a char/short/int/long type with the size equal to @size + @size: size in bit""" + + for objc in self.offset_types: + if objc.size == size / 8: + return objc + return None + + def is_offset_list(self, types, size): + """Return the corresponding char/short/int/long type of @size, if every + types in the list @types are type offset + @types: a list of c types + @size: size in bit""" + + for arg_type in types: + if not self.is_type_offset(arg_type): + return None + objc = self.get_tpye_int_by_size(size) + if objc: + return objc + # default size + objc = self.offset_types[0] + return objc + + def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): + """ + Deep First evaluate nodes: + 1. evaluate node's sons + 2. simplify + """ + + expr = self.expr_simp(expr) + + if expr in cache: + return cache[expr] + elif expr in state: + return state[expr] + elif isinstance(expr, ExprInt): + objc = self.get_tpye_int_by_size(expr.size) + if objc is None: + objc = self.chandler.type_analyzer.types_mngr.get_objc(CTypeId('int')) + return objc + elif isinstance(expr, ExprId): + if expr in state: + return state[expr] + return None + elif isinstance(expr, ExprMem): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level + 1) + if ptr is None: + return None + self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = ptr + ptr_expr = ExprId(self.OBJC_INTERNAL, expr.arg.size) + objcs = self.chandler.expr_to_types(ExprMem(ptr_expr, expr.size)) + if objcs is None: + return None + objc = objcs[0] + return objc + elif isinstance(expr, ExprCond): + src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level + 1) + src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level + 1) + types = [src1, src2] + objc = self.is_offset_list(types, expr.size) + if objc: + return objc + return None + elif isinstance(expr, ExprSlice): + objc = self.get_tpye_int_by_size(expr.size) + if objc is None: + # default size + objc = self.offset_types[0] + return objc + elif isinstance(expr, ExprOp): + args = [] + types = [] + for oarg in expr.args: + arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1) + types.append(arg) + if None in types: + return None + objc = self.is_offset_list(types, expr.size) + if objc: + return objc + # Find Base + int + if expr.op != '+': + return None + args = list(expr.args) + if args[-1].is_int(): + offset = args.pop() + types.pop() + if len(args) == 1: + arg, arg_type = args.pop(), types.pop() + self.chandler.type_analyzer.expr_types[self.OBJC_INTERNAL] = arg_type + ptr_expr = ExprId(self.OBJC_INTERNAL, arg.size) + objc = self.chandler.expr_to_types(ptr_expr + offset) + objc = objc[0] + return objc + return None + elif isinstance(expr, ExprCompose): + types = set() + for oarg in expr.args: + arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level + 1) + types.add(arg) + objc = self.is_offset_list(types, expr.size) + if objc: + return objc + return None + else: + raise TypeError("Unknown expr type") + + def get_state(self): + """Return the current state of the SymbolicEngine""" + return self.StateEngine(self.symbols, self.infos_types) + + def eval_ir_expr(self, assignblk): + """ + Evaluate AssignBlock on the current state + @assignblk: AssignBlock instance + """ + pool_out = {} + eval_cache = {} + for dst, src in assignblk.iteritems(): + src = self.eval_expr(src, eval_cache) + if isinstance(dst, ExprMem): + continue + elif isinstance(dst, ExprId): + pool_out[dst] = src + else: + raise ValueError("affected zarb", str(dst)) + return pool_out.iteritems() + + def apply_change(self, dst, src): + objc = src + if objc is None and dst in self.symbols: + del self.symbols[dst] + else: + self.symbols[dst] = objc + + def del_mem_above_stack(self, stack_ptr): + """No stack deletion""" + return diff --git a/miasm2/jitter/emulatedsymbexec.py b/miasm2/jitter/emulatedsymbexec.py index d4a67fe8..97f038dc 100644 --- a/miasm2/jitter/emulatedsymbexec.py +++ b/miasm2/jitter/emulatedsymbexec.py @@ -105,6 +105,8 @@ class EmulatedSymbExec(SymbolicExecutionEngine): """Handle 'segm' operation""" if not expr.is_op_segm(): return expr + if not expr.args[0].is_int(): + return expr segm_nb = int(expr.args[0]) segmaddr = self.cpu.get_segm_base(segm_nb) return e_s(m2_expr.ExprInt(segmaddr, expr.size) + expr.args[1]) @@ -114,7 +116,9 @@ class EmulatedSymbExec(SymbolicExecutionEngine): if expr.op != "cpuid": return expr - a, reg_num = (int(x) for x in expr.args) + if any(not arg.is_int() for arg in expr.args): + return expr + a, reg_num = (int(arg) for arg in expr.args) # Not found error is keeped on purpose return m2_expr.ExprInt(self.cpuid[a][reg_num], expr.size) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index ed55aff8..cf6dea31 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -135,13 +135,31 @@ class LLVMContext_JIT(LLVMContext): self.library_filenames = library_filenames self.ir_arch = ir_arch self.arch_specific() + self.load_libraries() LLVMContext.__init__(self, name) self.vmcpu = {} - def new_module(self, name="mod"): - LLVMContext.new_module(self, name) + def load_libraries(self): + # Get LLVM specific functions + name = "libLLVM-%d.%d" % (llvm.llvm_version_info[0], + llvm.llvm_version_info[1], + ) + try: + # On Windows, no need to add ".dll" + self.add_shared_library(name) + except RuntimeError: + try: + # On Linux, ".so" is needed + self.add_shared_library("%s.so" % name) + except RuntimeError: + pass + + # Load additional libraries for lib_fname in self.library_filenames: self.add_shared_library(lib_fname) + + def new_module(self, name="mod"): + LLVMContext.new_module(self, name) self.add_memlookups() self.add_get_exceptionflag() self.add_op() @@ -512,7 +530,8 @@ class LLVMFunction(): Get or create a (LLVM module-)global constant with *name* or *value*. """ - module = self.mod + if name in self.mod.globals: + return self.mod.globals[name] data = llvm_ir.GlobalVariable(self.mod, value.type, name=name) data.global_constant = True data.initializer = value |