diff options
66 files changed, 382 insertions, 523 deletions
diff --git a/.travis.yml b/.travis.yml index 49d7eef2..e1b1debc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,9 +37,12 @@ before_script: - "cd z3/build" - "make install" - "cd ../.." -# install miasm +# Miasm - "cd ..;" - "cd miasm;" +# turn deprecation warning into RuntimeError +- "find . -name '*.py' | xargs sed -i 's/warnings\\.warn(/raise RuntimeError(/g'" +# install - "python setup.py build build_ext -I$(pwd)/../virtualenv/include -L$(pwd)/../virtualenv/tinycc" - "python setup.py install" script: "python -c 'import z3; x = z3.BitVec(chr(0x41), 32)' && cd test && python test_all.py && git ls-files -o --exclude-standard" diff --git a/README.md b/README.md index 118051df..e3f16159 100644 --- a/README.md +++ b/README.md @@ -165,8 +165,8 @@ Disassembling the shellcode at address `0`: >>> from miasm2.analysis.machine import Machine >>> machine = Machine('x86_32') >>> mdis = machine.dis_engine(c.bin_stream) ->>> blocks = mdis.dis_multiblock(0) ->>> for block in blocks: +>>> asmcfg = mdis.dis_multiblock(0) +>>> for block in asmcfg.blocks: ... print block ... loc_0000000000000000:0x00000000 @@ -217,8 +217,7 @@ def code_sentinelle(jitter): Active logs: ``` ->>> jitter.jit.log_regs = True ->>> jitter.jit.log_mn = True +>>> jitter.set_trace_log() ``` Run at arbitrary address: @@ -269,7 +268,7 @@ Initializing the IR pool: ``` >>> ira = machine.ira() ->>> for block in blocks: +>>> for block in asmcfg.blocks: ... ira.add_block(block) ... ``` @@ -440,7 +439,7 @@ An auto-generated documentation is available [here](http://miasmdoc.ajax.re). Obtaining Miasm =============== -* Clone the repository: [Miasm on GitHub](https://github.com/serpilliere/miasm) +* Clone the repository: [Miasm on GitHub](https://github.com/cea-sec/miasm/) * Get one of the Docker images at [Docker Hub](https://registry.hub.docker.com/u/miasm/) Software requirements diff --git a/example/disasm/file.py b/example/disasm/file.py index 88ba6162..196e1b1a 100644 --- a/example/disasm/file.py +++ b/example/disasm/file.py @@ -13,6 +13,6 @@ cont = Container.from_stream(open(sys.argv[1])) mdis = dis_x86_32(cont.bin_stream) # Inform the engine to avoid disassembling null instructions mdis.dont_dis_nulstart_bloc = True -blocks = mdis.dis_multiblock(addr) +asmcfg = mdis.dis_multiblock(addr) -open('graph.dot', 'w').write(blocks.dot()) +open('graph.dot', 'w').write(asmcfg.dot()) diff --git a/example/expression/simplification_tools.py b/example/expression/simplification_tools.py index 7c15b3e7..cb062fb3 100644 --- a/example/expression/simplification_tools.py +++ b/example/expression/simplification_tools.py @@ -32,7 +32,6 @@ x = ExprMem(a + b + ExprInt(0x42, 32), 32) def replace_expr(e): - # print 'visit', e dct = {c + ExprInt(0x42, 32): d, a + b: c, } if e in dct: diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py index f459022e..db324833 100644 --- a/example/ida/ctype_propagation.py +++ b/example/ida/ctype_propagation.py @@ -10,7 +10,7 @@ from miasm2.arch.x86.ctype import CTypeAMD64_unk, CTypeX86_unk from miasm2.arch.msp430.ctype import CTypeMSP430_unk from miasm2.core.objc import CTypesManagerNotPacked, ExprToAccessC, CHandler from miasm2.core.ctypesmngr import CAstTypes -from miasm2.expression.expression import ExprId, ExprInt, ExprOp, ExprAff +from miasm2.expression.expression import ExprLoc, ExprInt, ExprOp, ExprAff from miasm2.ir.symbexec_types import SymbExecCType from miasm2.expression.parser import str_to_expr from miasm2.analysis.cst_propag import add_state, propagate_cst_expr @@ -19,9 +19,7 @@ from utils import guess_machine class TypePropagationForm(ida_kernwin.Form): - def __init__(self, ira): - - self.ira = ira + def __init__(self): default_types_info = r"""ExprId("RDX", 64): char *""" archs = ["AMD64_unk", "X86_32_unk", "msp430_unk"] @@ -204,7 +202,6 @@ class SymbExecCTypeFix(SymbExecCType): expr = self.cst_propag_link.get((irb.loc_key, index), {}).get(expr, expr) offset2cmt.setdefault(instr.offset, set()).add( "\n%s: %s\n%s" % (expr, c_str, c_type)) - self.eval_updt_assignblk(assignblk) for offset, value in offset2cmt.iteritems(): idc.MakeComm(offset, '\n'.join(value)) @@ -243,38 +240,38 @@ def get_ira_call_fixer(ira): def analyse_function(): - - # Init - machine = guess_machine() - mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira - - bs = bin_stream_ida() - mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) - - - iraCallStackFixer = get_ira_call_fixer(ira) - ir_arch = iraCallStackFixer(mdis.symbol_pool) - - # Get settings - settings = TypePropagationForm(ir_arch) + settings = TypePropagationForm() ret = settings.Execute() if not ret: return + + end = None if settings.cScope.value == 0: addr = settings.functionAddr.value else: addr = settings.startAddr.value if settings.cScope.value == 2: end = settings.endAddr - mdis.dont_dis = [end] - blocks = mdis.dis_multiblock(addr) + # Init + machine = guess_machine(addr=addr) + mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira + + bs = bin_stream_ida() + mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) + if end is not None: + mdis.dont_dis = [end] + + + iraCallStackFixer = get_ira_call_fixer(ira) + ir_arch = iraCallStackFixer(mdis.symbol_pool) + + asmcfg = mdis.dis_multiblock(addr) # Generate IR - for block in blocks: + for block in asmcfg.blocks: ir_arch.add_block(block) - cst_propag_link = {} if settings.cUnalias.value: init_infos = {ir_arch.sp: ir_arch.arch.regs.regs_init[ir_arch.sp] } @@ -298,7 +295,8 @@ def analyse_function(): expr_str, ctype_str = expr_str.strip(), ctype_str.strip() expr = str_to_expr(expr_str) ast = mychandler.types_mngr.types_ast.parse_c_type( - ctype_str) + ctype_str + ) ctype = mychandler.types_mngr.types_ast.ast_parse_declaration(ast.ext[0]) objc = types_mngr.get_objc(ctype) print '=' * 20 @@ -309,12 +307,15 @@ def analyse_function(): lbl_real_start = ir_arch.symbol_pool.getby_offset(addr) lbl_head = ir_arch.symbol_pool.getby_name_create("start") - first_block = blocks.label2block(lbl_real_start) + first_block = asmcfg.label2block(lbl_real_start) - assignblk_head = AssignBlock([ExprAff(ir_arch.IRDst, ExprId(lbl_real_start, ir_arch.IRDst.size)), - ExprAff( - ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) - ], first_block.lines[0]) + assignblk_head = AssignBlock( + [ + ExprAff(ir_arch.IRDst, ExprLoc(lbl_real_start, ir_arch.IRDst.size)), + ExprAff(ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) + ], + first_block.lines[0] + ) irb_head = IRBlock(lbl_head, [assignblk_head]) ir_arch.blocks[lbl_head] = irb_head ir_arch.graph.add_uniq_edge(lbl_head, lbl_real_start) @@ -332,7 +333,6 @@ def analyse_function(): done.add((lbl, state)) if lbl not in ir_arch.blocks: continue - symbexec_engine = TypePropagationEngine(ir_arch, types_mngr, state) addr = symbexec_engine.run_block_at(lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) diff --git a/example/ida/depgraph.py b/example/ida/depgraph.py index ece02ad4..4320be8d 100644 --- a/example/ida/depgraph.py +++ b/example/ida/depgraph.py @@ -39,8 +39,8 @@ class depGraphSettingsForm(ida_kernwin.Form): if assignblk.instr.offset == self.address: break assert line_nb is not None - cur_label = str(cur_block.loc_key) - labels = sorted(map(str, ira.blocks.keys())) + cur_loc_key = str(cur_block.loc_key) + loc_keys = sorted(map(str, ira.blocks.keys())) regs = sorted(ira.arch.regs.all_regs_ids_byname.keys()) regs += self.stk_args.keys() reg_default = regs[0] @@ -86,21 +86,21 @@ Method to use: tp=ida_kernwin.Form.FT_RAWHEX, value=line_nb), 'cbBBL': ida_kernwin.Form.DropdownListControl( - items=labels, + items=loc_keys, readonly=False, - selval=cur_label), + selval=cur_loc_key), 'cColor': ida_kernwin.Form.ColorInput(value=0xc0c020), }) self.Compile() @property - def label(self): + def loc_key(self): value = self.cbBBL.value - for real_label in self.ira.blocks: - if str(real_label) == value: - return real_label - raise ValueError("Bad label") + for real_loc_key in self.ira.blocks: + if str(real_loc_key) == value: + return real_loc_key + raise ValueError("Bad loc_key") @property def line_nb(self): @@ -198,8 +198,12 @@ def next_element(): def launch_depgraph(): global graphs, comments, sol_nb, settings, addr, ir_arch + # Get the current function + addr = idc.ScreenEA() + func = ida_funcs.get_func(addr) + # Init - machine = guess_machine() + machine = guess_machine(addr=func.startEA) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() @@ -212,9 +216,6 @@ def launch_depgraph(): continue mdis.symbol_pool.add_location(name, ad) - # Get the current function - addr = idc.ScreenEA() - func = ida_funcs.get_func(addr) asmcfg = mdis.dis_multiblock(func.startEA) # Generate IR @@ -225,7 +226,7 @@ def launch_depgraph(): settings = depGraphSettingsForm(ir_arch) settings.Execute() - label, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb + loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb # Simplify affectations for irb in ir_arch.blocks.values(): irs = [] @@ -249,7 +250,7 @@ def launch_depgraph(): # Get dependency graphs dg = settings.depgraph - graphs = dg.get(label, elements, line_nb, + graphs = dg.get(loc_key, elements, line_nb, set([ir_arch.symbol_pool.getby_offset(func.startEA)])) # Display the result diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index 370500e5..dd842281 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -98,7 +98,9 @@ class GraphMiasmIR(idaapi.GraphViewer): def build_graph(verbose=False, simplify=False): - machine = guess_machine() + start_addr = idc.ScreenEA() + + machine = guess_machine(addr=start_addr) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira if verbose: @@ -114,7 +116,6 @@ def build_graph(verbose=False, simplify=False): # populate symbols with ida names for addr, name in idautils.Names(): - # print hex(ad), repr(name) if name is None: continue if (mdis.symbol_pool.getby_offset(addr) or @@ -125,17 +126,16 @@ def build_graph(verbose=False, simplify=False): if verbose: print "start disasm" - addr = idc.ScreenEA() if verbose: print hex(addr) - asmcfg = mdis.dis_multiblock(addr) + asmcfg = mdis.dis_multiblock(start_addr) if verbose: print "generating graph" open('asm_flow.dot', 'w').write(asmcfg.dot()) - print "generating IR... %x" % addr + print "generating IR... %x" % start_addr for block in asmcfg.blocks: if verbose: @@ -144,7 +144,7 @@ def build_graph(verbose=False, simplify=False): ir_arch.add_block(block) if verbose: - print "IR ok... %x" % addr + print "IR ok... %x" % start_addr for irb in ir_arch.blocks.itervalues(): irs = [] diff --git a/example/ida/symbol_exec.py b/example/ida/symbol_exec.py index 63014ece..0d8c63c2 100644 --- a/example/ida/symbol_exec.py +++ b/example/ida/symbol_exec.py @@ -128,11 +128,16 @@ def symbolic_exec(): from utils import guess_machine + start, end = idc.SelStart(), idc.SelEnd() + bs = bin_stream_ida() - machine = guess_machine() + machine = guess_machine(addr=start) mdis = machine.dis_engine(bs) - start, end = idc.SelStart(), idc.SelEnd() + + if start == idc.BADADDR and end == idc.BADADDR: + start = idc.ScreenEA() + end = idc.next_head(start) # Get next instruction address mdis.dont_dis = [end] asmcfg = mdis.dis_multiblock(start) @@ -151,7 +156,8 @@ def symbolic_exec(): view = symbolicexec_t() all_views.append(view) if not view.Create(modified, machine, mdis.symbol_pool, - "Symbolic Execution - 0x%x to 0x%x" % (start, end)): + "Symbolic Execution - 0x%x to 0x%x" + % (start, idc.prev_head(end))): return view.Show() diff --git a/example/ida/utils.py b/example/ida/utils.py index 481220a9..5620a881 100644 --- a/example/ida/utils.py +++ b/example/ida/utils.py @@ -5,7 +5,7 @@ from miasm2.analysis.machine import Machine from miasm2.ir.translators import Translator import miasm2.expression.expression as m2_expr -def guess_machine(): +def guess_machine(addr=None): "Return an instance of Machine corresponding to the IDA guessed processor" processor_name = GetLongPrm(INF_PROCNAME) @@ -39,7 +39,14 @@ def guess_machine(): (False, 64, True): "aarch64b", (False, 64, False): "aarch64l", } - is_armt = globals().get('armt', False) + + # Get T reg to detect arm/thumb function + # Default is arm + is_armt = False + if addr is not None: + t_reg = GetReg(addr, "T") + is_armt = t_reg == 1 + is_bigendian = info.is_be() infos = (is_armt, size, is_bigendian) if not infos in info2machine: diff --git a/example/jitter/mips32.py b/example/jitter/mips32.py index c5b2f7f5..31ab03c8 100755 --- a/example/jitter/mips32.py +++ b/example/jitter/mips32.py @@ -5,16 +5,11 @@ from miasm2.analysis import debugging from miasm2.jitter.csts import * from miasm2.analysis.machine import Machine -from pdb import pm - parser = ArgumentParser( description="""Sandbox raw binary with mips32 engine (ex: jit_mips32.py example/mips32_sc_l.bin 0)""") -parser.add_argument("-r", "--log-regs", - help="Log registers value for each instruction", - action="store_true") -parser.add_argument("-m", "--log-mn", - help="Log desassembly conversion for each instruction", +parser.add_argument("-t", "--trace", + help="Log instructions/registers values", action="store_true") parser.add_argument("-n", "--log-newbloc", help="Log basic blocks processed by the Jitter", @@ -43,9 +38,11 @@ def jit_mips32_binary(args): myjit.init_stack() # Log level (if available with jitter engine) - myjit.jit.log_regs = args.log_regs - myjit.jit.log_mn = args.log_mn - myjit.jit.log_newbloc = args.log_newbloc + myjit.set_trace_log( + trace_instr=args.trace, + trace_regs=args.trace, + trace_new_blocks=args.log_newbloc + ) myjit.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, open(filepath).read()) myjit.add_breakpoint(0x1337BEEF, code_sentinelle) diff --git a/example/jitter/msp430.py b/example/jitter/msp430.py index 6dd67542..2f9b8649 100755 --- a/example/jitter/msp430.py +++ b/example/jitter/msp430.py @@ -8,11 +8,8 @@ from miasm2.analysis.machine import Machine parser = ArgumentParser( description="""Sandbox raw binary with msp430 engine (ex: jit_msp430.py example/msp430_sc.bin 0)""") -parser.add_argument("-r", "--log-regs", - help="Log registers value for each instruction", - action="store_true") -parser.add_argument("-m", "--log-mn", - help="Log desassembly conversion for each instruction", +parser.add_argument("-t", "--trace", + help="Log instructions/registers values", action="store_true") parser.add_argument("-n", "--log-newbloc", help="Log basic blocks processed by the Jitter", @@ -36,9 +33,11 @@ def jit_msp430_binary(args): myjit.init_stack() # Log level (if available with jitter engine) - myjit.jit.log_regs = args.log_regs - myjit.jit.log_mn = args.log_mn - myjit.jit.log_newbloc = args.log_newbloc + myjit.set_trace_log( + trace_instr=args.trace, + trace_regs=args.trace, + trace_new_blocks=args.log_newbloc + ) myjit.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, open(filepath, "rb").read()) myjit.add_breakpoint(0x1337, lambda _: exit(0)) diff --git a/example/jitter/x86_32.py b/example/jitter/x86_32.py index 1409d7aa..5272f732 100644 --- a/example/jitter/x86_32.py +++ b/example/jitter/x86_32.py @@ -24,8 +24,7 @@ data = open(args.filename).read() run_addr = 0x40000000 myjit.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, data) -myjit.jit.log_regs = True -myjit.jit.log_mn = True +myjit.set_trace_log() myjit.push_uint32_t(0x1337beef) myjit.add_breakpoint(0x1337beef, code_sentinelle) diff --git a/miasm2/analysis/binary.py b/miasm2/analysis/binary.py index f5a727d7..5d9374da 100644 --- a/miasm2/analysis/binary.py +++ b/miasm2/analysis/binary.py @@ -202,6 +202,8 @@ class ContainerELF(Container): offset = symb.value if offset == 0: continue + if not name: + continue try: self._symbol_pool.add_location(name, offset) except ValueError: diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index 5e88665e..0782c12c 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -4,7 +4,6 @@ from miasm2.ir.symbexec import SymbolicExecutionEngine def get_node_name(label, i, n): - # n_name = "%s_%d_%s"%(label.name, i, n) n_name = (label, i, n) return n_name @@ -69,18 +68,12 @@ def intra_block_flow_symbexec(ir_arch, flow_graph, irb, in_nodes, out_nodes): sb = SymbolicExecutionEngine(ir_arch, dict(symbols_init)) sb.emulbloc(irb) - # print "*"*40 - # print irb - # print sb.dump_id() - # print sb.dump_mem() for n_w in sb.symbols: - # print n_w v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue read_values = v.get_r(cst_read=True) - # print n_w, v, [str(x) for x in read_values] node_n_w = get_node_name(irb.loc_key, len(irb), n_w) for n_r in read_values: @@ -97,9 +90,6 @@ def intra_block_flow_symbexec(ir_arch, flow_graph, irb, in_nodes, out_nodes): def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): lbl, current_nodes, exec_nodes = todo - # print 'TODO' - # print lbl - # print [(str(x[0]), str(x[1])) for x in current_nodes] current_nodes = dict(current_nodes) # link current nodes to bloc in_nodes @@ -107,12 +97,10 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo print "cannot find bloc!!", lbl return set() irb = ir_arch.blocks[lbl] - # pp(('IN', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) to_del = set() for n_r, node_n_r in irb_in_nodes[irb.loc_key].items(): if not n_r in current_nodes: continue - # print 'add link', current_nodes[n_r], node_n_r flow_graph.add_uniq_edge(current_nodes[n_r], node_n_r) to_del.add(n_r) @@ -137,8 +125,6 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo for lbl_dst in ir_arch.graph.successors(irb.loc_key): todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) - # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) - return todo @@ -161,18 +147,11 @@ def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes): if not isinstance(n_r, ExprId): continue - # print "###", n_r - # print "###", irb - # print "###", 'OUT', [str(x) for x in irb.out_nodes] - # print "###", irb_son - # print "###", 'IN', [str(x) for x in irb_son.in_nodes] - node_n_w = irb.loc_key, len(irb), n_r irb_out_nodes[irb.loc_key][n_r] = node_n_w if not n_r in irb_in_nodes[irb.loc_key]: irb_in_nodes[irb.loc_key][n_r] = irb.loc_key, 0, n_r node_n_r = irb_in_nodes[irb.loc_key][n_r] - # print "###", node_n_r for lbl_p in ir_arch.graph.predecessors(irb.loc_key): todo.add(lbl_p) @@ -220,46 +199,11 @@ class symb_exec_func: b = self.ir_arch.get_block(ad) if b is None: raise ValueError("unknown bloc! %s" % ad) - """ - dead = b.dead[0] - for d in dead: - if d in variables: - del(variables[d]) - """ variables = variables.items() s = parent, ad, tuple(sorted(variables)) - """ - state_var = s[1] - if s in self.states_var_done: - print 'skip state' - return - if not ad in self.stateby_ad: - self.stateby_ad[ad] = set() - self.stateby_ad[ad].add(state_var) - - """ self.todo.add(s) - """ - if not ad in self.cpt: - self.cpt[ad] = 0 - """ - """ - def get_next_min(self): - state_by_ad = {} - for state in self.todo: - ad = state[1] - if not ad in state_by_ad: - state_by_ad[ad] = [] - state_by_ad[ad].append(state) - print "XX", [len(x) for x in state_by_ad.values()] - state_by_ad = state_by_ad.items() - state_by_ad.sort(key=lambda x:len(x[1])) - state_by_ad.reverse() - return state_by_ad.pop()[1][0] - """ - def get_next_state(self): state = self.todo.pop() return state @@ -273,16 +217,10 @@ class symb_exec_func: self.total_done += 1 print 'CPT', self.total_done while self.todo: - # if self.total_done>20: - # self.get_next_min() - # state = self.todo.pop() state = self.get_next_state() parent, ad, s = state self.states_done.add(state) self.states_var_done.add(state) - # if s in self.states_var_done: - # print "state done" - # continue sb = SymbolicExecutionEngine(self.ir_arch, dict(s)) diff --git a/miasm2/analysis/debugging.py b/miasm2/analysis/debugging.py index fc03eb17..6b88f00a 100644 --- a/miasm2/analysis/debugging.py +++ b/miasm2/analysis/debugging.py @@ -273,7 +273,7 @@ class DebugCmd(cmd.Cmd, object): def add_breakpoints(self, bp_addr): for addr in bp_addr: - addr = int(addr, 0) + addr = int(addr, 0) good = True for i, dbg_obj in enumerate(self.dbg.bp_list): diff --git a/miasm2/analysis/disasm_cb.py b/miasm2/analysis/disasm_cb.py index e759e313..0dc482ac 100644 --- a/miasm2/analysis/disasm_cb.py +++ b/miasm2/analysis/disasm_cb.py @@ -5,7 +5,6 @@ from miasm2.expression.simplifications import expr_simp from miasm2.core.asmblock \ import AsmSymbolPool, AsmConstraintNext, AsmConstraintTo from miasm2.core.utils import upck32 -# from miasm2.core.graph import DiGraph def get_ira(mnemo, attrib): @@ -32,11 +31,8 @@ def arm_guess_subcall( ir_arch.add_block(cur_bloc) ir_blocks = ir_arch.blocks.values() - # flow_graph = DiGraph() to_add = set() for irblock in ir_blocks: - # print 'X'*40 - # print irblock pc_val = None lr_val = None for exprs in irblock: @@ -53,17 +49,13 @@ def arm_guess_subcall( l = cur_bloc.lines[-1] if lr_val.arg != l.offset + l.l: continue - # print 'IS CALL!' l = symbol_pool.getby_offset_create(int(lr_val)) c = AsmConstraintNext(l) to_add.add(c) offsets_to_dis.add(int(lr_val)) - # if to_add: - # print 'R'*70 for c in to_add: - # print c cur_bloc.addto(c) @@ -80,16 +72,11 @@ def arm_guess_jump_table( ir_blocks = ir_arch.blocks.values() for irblock in ir_blocks: - # print 'X'*40 - # print irblock pc_val = None - # lr_val = None for exprs in irblock: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src - # if e.dst == mnemo.regs.LR: - # lr_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 6eaf1e91..1fd177bb 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -189,7 +189,7 @@ class DSEEngine(object): self.jitter.exec_cb = self.callback # Clean jit cache to avoid multi-line basic blocks already jitted - self.jitter.jit.loc_key_to_jit_block.clear() + self.jitter.jit.clear_jitted_blocks() def attach(self, emulator): """Attach the DSE to @emulator diff --git a/miasm2/analysis/sandbox.py b/miasm2/analysis/sandbox.py index e77b1669..b1147adb 100644 --- a/miasm2/analysis/sandbox.py +++ b/miasm2/analysis/sandbox.py @@ -57,16 +57,15 @@ class Sandbox(object): cls.__init__(self, **kwargs) # Logging options - if self.options.singlestep: - self.jitter.jit.log_mn = True - self.jitter.jit.log_regs = True + self.jitter.set_trace_log( + trace_instr=self.options.singlestep, + trace_regs=self.options.singlestep, + trace_new_blocks=self.options.dumpblocs + ) if not self.options.quiet_function_calls: log_func.setLevel(logging.INFO) - if self.options.dumpblocs: - self.jitter.jit.log_newbloc = True - @classmethod def parser(cls, *args, **kwargs): """ diff --git a/miasm2/arch/aarch64/jit.py b/miasm2/arch/aarch64/jit.py index 31570f52..b557a179 100644 --- a/miasm2/arch/aarch64/jit.py +++ b/miasm2/arch/aarch64/jit.py @@ -1,6 +1,6 @@ import logging -from miasm2.jitter.jitload import jitter, named_arguments +from miasm2.jitter.jitload import Jitter, named_arguments from miasm2.core import asmblock from miasm2.core.utils import pck64, upck64 from miasm2.arch.aarch64.sem import ir_aarch64b, ir_aarch64l @@ -11,12 +11,12 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.CRITICAL) -class jitter_aarch64l(jitter): +class jitter_aarch64l(Jitter): max_reg_arg = 8 def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_aarch64l(sp), *args, **kwargs) + Jitter.__init__(self, ir_aarch64l(sp), *args, **kwargs) self.vm.set_little_endian() def push_uint64_t(self, value): @@ -69,7 +69,7 @@ class jitter_aarch64l(jitter): func_prepare_systemv = func_prepare_stdcall def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc @@ -77,5 +77,5 @@ class jitter_aarch64b(jitter_aarch64l): def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_aarch64b(sp), *args, **kwargs) + Jitter.__init__(self, ir_aarch64b(sp), *args, **kwargs) self.vm.set_big_endian() diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index ad582878..c232e8dc 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -350,7 +350,36 @@ def csel(arg1, arg2, arg3, arg4): cond_expr = cond2expr[arg4.name] arg1 = arg2 if cond_expr else arg3 +def ccmp(ir, instr, arg1, arg2, arg3, arg4): + e = [] + if(arg2.is_int): + arg2=m2_expr.ExprInt(arg2.arg.arg,arg1.size) + default_nf = arg3[0:1] + default_zf = arg3[1:2] + default_cf = arg3[2:3] + default_of = arg3[3:4] + cond_expr = cond2expr[arg4.name] + res = arg1 - arg2 + new_nf = nf + new_zf = update_flag_zf(res)[0].src + new_cf = update_flag_sub_cf(arg1, arg2, res).src + new_of = update_flag_sub_of(arg1, arg2, res).src + + e.append(m2_expr.ExprAff(nf, m2_expr.ExprCond(cond_expr, + new_nf, + default_nf))) + e.append(m2_expr.ExprAff(zf, m2_expr.ExprCond(cond_expr, + new_zf, + default_zf))) + e.append(m2_expr.ExprAff(cf, m2_expr.ExprCond(cond_expr, + new_cf, + default_cf))) + e.append(m2_expr.ExprAff(of, m2_expr.ExprCond(cond_expr, + new_of, + default_of))) + return e, [] + def csinc(ir, instr, arg1, arg2, arg3, arg4): e = [] cond_expr = cond2expr[arg4.name] @@ -761,6 +790,7 @@ mnemo_func.update({ 'cmp': cmp, 'cmn': cmn, 'movk': movk, + 'ccmp': ccmp, 'csinc': csinc, 'csinv': csinv, 'csneg': csneg, diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 17b57ba4..624642cf 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -1038,16 +1038,12 @@ class arm_op2(arm_arg): shift_kind = shift & 1 shift_type = (shift >> 1) & 3 shift >>= 3 - # print self.parent.immop.value, hex(shift), hex(shift_kind), - # hex(shift_type) if shift_kind: # shift kind is reg if shift & 1: - # log.debug('error in shift1') return False rs = shift >> 1 if rs == 0xf: - # log.debug('error in shift2') return False shift_op = regs_expr[rs] else: @@ -2155,12 +2151,10 @@ class armt_rlist_pclr(armt_rlist): reg_l = list(e.args) self.parent.pclr.value = 0 if self.parent.pp.value == 0: - # print 'push' if regs_expr[14] in reg_l: reg_l.remove(regs_expr[14]) self.parent.pclr.value = 1 else: - # print 'pop', if regs_expr[15] in reg_l: reg_l.remove(regs_expr[15]) self.parent.pclr.value = 1 diff --git a/miasm2/arch/arm/disasm.py b/miasm2/arch/arm/disasm.py index 205e2a17..8997fa2b 100644 --- a/miasm2/arch/arm/disasm.py +++ b/miasm2/arch/arm/disasm.py @@ -24,7 +24,7 @@ def cb_arm_fix_call(mn, cur_bloc, symbol_pool, offsets_to_dis, *args, **kwargs): return if not l2.args[1] in values: return - loc_key_cst = self.symbol_pool.getby_offset_create(l1.offset + 4) + loc_key_cst = symbol_pool.getby_offset_create(l1.offset + 4) cur_bloc.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(l1.offset + 4) diff --git a/miasm2/arch/arm/jit.py b/miasm2/arch/arm/jit.py index b92e2c32..ef2e14ae 100644 --- a/miasm2/arch/arm/jit.py +++ b/miasm2/arch/arm/jit.py @@ -1,6 +1,6 @@ import logging -from miasm2.jitter.jitload import jitter, named_arguments +from miasm2.jitter.jitload import Jitter, named_arguments from miasm2.core import asmblock from miasm2.core.utils import pck32, upck32 from miasm2.arch.arm.sem import ir_armb, ir_arml, ir_armtl, ir_armtb, cond_dct_inv, tab_cond @@ -51,12 +51,12 @@ class arm_CGen(CGen): return irblocks_list -class jitter_arml(jitter): +class jitter_arml(Jitter): C_Gen = arm_CGen def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_arml(sp), *args, **kwargs) + Jitter.__init__(self, ir_arml(sp), *args, **kwargs) self.vm.set_little_endian() def push_uint32_t(self, value): @@ -107,7 +107,7 @@ class jitter_arml(jitter): get_arg_n_systemv = get_arg_n_stdcall def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc @@ -116,7 +116,7 @@ class jitter_armb(jitter_arml): def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_armb(sp), *args, **kwargs) + Jitter.__init__(self, ir_armb(sp), *args, **kwargs) self.vm.set_big_endian() @@ -125,5 +125,5 @@ class jitter_armtl(jitter_arml): def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_armtl(sp), *args, **kwargs) + Jitter.__init__(self, ir_armtl(sp), *args, **kwargs) self.vm.set_little_endian() diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index c80e9826..ccd56e8f 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -635,7 +635,6 @@ def st_ld_r(ir, instr, a, a2, b, store=False, size=32, s_ext=False, z_ext=False) base, off = b.args[0], b.args[1] # ExprInt(size/8, 32) else: base, off = b, ExprInt(0, 32) - # print a, wb, base, off, postinc if postinc: ad = base else: @@ -734,13 +733,11 @@ def ldrsh(ir, instr, a, b): def st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False): e = [] wb = False - # sb = False dst = None if isinstance(a, ExprOp) and a.op == 'wback': wb = True a = a.args[0] if isinstance(b, ExprOp) and b.op == 'sbit': - # sb = True b = b.args[0] regs = b.args base = a diff --git a/miasm2/arch/mips32/jit.py b/miasm2/arch/mips32/jit.py index 180f8b0a..c637fb13 100644 --- a/miasm2/arch/mips32/jit.py +++ b/miasm2/arch/mips32/jit.py @@ -1,6 +1,6 @@ import logging -from miasm2.jitter.jitload import jitter, named_arguments +from miasm2.jitter.jitload import Jitter, named_arguments from miasm2.core import asmblock from miasm2.core.utils import pck32, upck32 from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b @@ -71,7 +71,7 @@ class mipsCGen(CGen): loc_key = self.get_block_post_label(block) offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) - out = (self.CODE_RETURN_NO_EXCEPTION % (self.loc_key_to_jitlabel(loc_key), + out = (self.CODE_RETURN_NO_EXCEPTION % (loc_key, self.C_PC, m2_expr.ExprId('branch_dst_irdst', 32), m2_expr.ExprId('branch_dst_irdst', 32), @@ -80,13 +80,13 @@ class mipsCGen(CGen): return out -class jitter_mips32l(jitter): +class jitter_mips32l(Jitter): C_Gen = mipsCGen def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_mips32l(sp), *args, **kwargs) + Jitter.__init__(self, ir_mips32l(sp), *args, **kwargs) self.vm.set_little_endian() def push_uint32_t(self, value): @@ -102,7 +102,7 @@ class jitter_mips32l(jitter): return upck32(self.vm.get_mem(self.cpu.SP + 4 * index, 4)) def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc # calling conventions @@ -146,5 +146,5 @@ class jitter_mips32b(jitter_mips32l): def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_mips32b(sp), *args, **kwargs) + Jitter.__init__(self, ir_mips32b(sp), *args, **kwargs) self.vm.set_big_endian() diff --git a/miasm2/arch/msp430/jit.py b/miasm2/arch/msp430/jit.py index dd5fe94e..dcd7e91a 100644 --- a/miasm2/arch/msp430/jit.py +++ b/miasm2/arch/msp430/jit.py @@ -1,4 +1,4 @@ -from miasm2.jitter.jitload import jitter +from miasm2.jitter.jitload import Jitter from miasm2.core import asmblock from miasm2.core.utils import pck16, upck16 from miasm2.arch.msp430.sem import ir_msp430 @@ -11,11 +11,11 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.CRITICAL) -class jitter_msp430(jitter): +class jitter_msp430(Jitter): def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_msp430(sp), *args, **kwargs) + Jitter.__init__(self, ir_msp430(sp), *args, **kwargs) self.vm.set_little_endian() def push_uint16_t(self, value): @@ -37,6 +37,6 @@ class jitter_msp430(jitter): return value def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py index 877c2a70..a3521fb5 100644 --- a/miasm2/arch/msp430/sem.py +++ b/miasm2/arch/msp430/sem.py @@ -434,7 +434,6 @@ class ir_msp430(IntermediateRepresentation): pass def get_ir(self, instr): - # print instr#, args args = instr.args instr_ir, extra_ir = mnemo_func[instr.name](self, instr, *args) self.mod_sr(instr, instr_ir, extra_ir) diff --git a/miasm2/arch/ppc/jit.py b/miasm2/arch/ppc/jit.py index 9134e032..e79faabd 100644 --- a/miasm2/arch/ppc/jit.py +++ b/miasm2/arch/ppc/jit.py @@ -1,4 +1,4 @@ -from miasm2.jitter.jitload import jitter, named_arguments +from miasm2.jitter.jitload import Jitter, named_arguments from miasm2.core import asmblock from miasm2.arch.ppc.sem import ir_ppc32b import struct @@ -11,7 +11,7 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.CRITICAL) -class jitter_ppc32b(jitter): +class jitter_ppc32b(Jitter): max_reg_arg = 8 def __init__(self, *args, **kwargs): @@ -66,5 +66,5 @@ class jitter_ppc32b(jitter): def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc diff --git a/miasm2/arch/x86/jit.py b/miasm2/arch/x86/jit.py index a12a66f5..5485ed85 100644 --- a/miasm2/arch/x86/jit.py +++ b/miasm2/arch/x86/jit.py @@ -1,6 +1,6 @@ import logging -from miasm2.jitter.jitload import jitter, named_arguments +from miasm2.jitter.jitload import Jitter, named_arguments from miasm2.core import asmblock from miasm2.core.utils import pck16, pck32, pck64, upck16, upck32, upck64 from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 @@ -34,13 +34,13 @@ class x86_64_CGen(x86_32_CGen): out.append('dump_gpregs_64(jitcpu->cpu);') return out -class jitter_x86_16(jitter): +class jitter_x86_16(Jitter): C_Gen = x86_32_CGen def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) + Jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode @@ -62,17 +62,17 @@ class jitter_x86_16(jitter): return upck16(self.vm.get_mem(self.cpu.SP + 4 * index, 4)) def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.IP = self.pc -class jitter_x86_32(jitter): +class jitter_x86_32(Jitter): C_Gen = x86_32_CGen def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_x86_32(sp), *args, **kwargs) + Jitter.__init__(self, ir_x86_32(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False @@ -104,7 +104,7 @@ class jitter_x86_32(jitter): return upck32(self.vm.get_mem(self.cpu.ESP + 4 * index, 4)) def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.EIP = self.pc # calling conventions @@ -180,7 +180,7 @@ class jitter_x86_32(jitter): -class jitter_x86_64(jitter): +class jitter_x86_64(Jitter): C_Gen = x86_64_CGen args_regs_systemv = ['RDI', 'RSI', 'RDX', 'RCX', 'R8', 'R9'] @@ -188,7 +188,7 @@ class jitter_x86_64(jitter): def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() - jitter.__init__(self, ir_x86_64(sp), *args, **kwargs) + Jitter.__init__(self, ir_x86_64(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False @@ -211,7 +211,7 @@ class jitter_x86_64(jitter): return upck64(self.vm.get_mem(self.cpu.RSP + 8 * index, 8)) def init_run(self, *args, **kwargs): - jitter.init_run(self, *args, **kwargs) + Jitter.init_run(self, *args, **kwargs) self.cpu.RIP = self.pc # calling conventions diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index d53677be..5989a0b4 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -776,7 +776,7 @@ def pop_gen(ir, instr, src, size): e.append(m2_expr.ExprAff(sp, new_sp)) # XXX FIX XXX for pop [esp] if isinstance(src, m2_expr.ExprMem): - src = src.replace_expr({sp: new_sp}) + src = expr_simp(src.replace_expr({sp: new_sp})) result = sp if ir.do_stk_segm: result = ir.gen_segm_expr(SS, result) diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index 35b7e1db..08ff25e9 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -521,7 +521,7 @@ class AsmSymbolPool(object): return "".join("%s\n" % loc_key for loc_key in self._loc_keys) def __getitem__(self, item): - warnings.warn('DEPRECATION WARNING: use "offset_to_loc_key" or "name_to_loc_key"') + warnings.warn('DEPRECATION WARNING: use "getby_name" or "getby_offset"') if item in self._name_to_loc_key: return self._name_to_loc_key[item] if item in self._offset_to_loc_key: @@ -529,7 +529,7 @@ class AsmSymbolPool(object): raise KeyError('unknown symbol %r' % item) def __contains__(self, item): - warnings.warn('DEPRECATION WARNING: use "offset_to_loc_key" or "name_to_loc_key"') + warnings.warn('DEPRECATION WARNING: use "getby_name" or "getby_offset"') return item in self._name_to_loc_key or item in self._offset_to_loc_key def merge(self, symbol_pool): diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index a142ab77..80f81aff 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -1030,7 +1030,7 @@ class instruction(object): if name == '_': fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue - if not name in symbols: + if symbols.getby_name(name) is None: raise ValueError('Unresolved symbol: %r' % exprloc) offset = symbols.loc_key_to_offset(loc_key) diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index 86871c37..3b97cbb6 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -73,30 +73,6 @@ def guess_next_new_label(symbol_pool): i += 1 -def replace_expr_labels(expr, symbol_pool, replace_id): - """Create LocKey of the expression @expr in the @symbol_pool - Update @replace_id""" - - if not expr.is_loc(): - return expr - - old_name = symbol_pool.loc_key_to_name(expr.loc_key) - new_lbl = symbol_pool.getby_name_create(old_name) - replace_id[expr] = ExprLoc(new_lbl, expr.size) - return replace_id[expr] - - -def replace_orphan_labels(instr, symbol_pool): - """Link orphan labels used by @instr to the @symbol_pool""" - - for i, arg in enumerate(instr.args): - replace_id = {} - arg.visit(lambda e: replace_expr_labels(e, - symbol_pool, - replace_id)) - instr.args[i] = instr.args[i].replace_expr(replace_id) - - STATE_NO_BLOC = 0 STATE_IN_BLOC = 1 @@ -224,8 +200,6 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, symbol_pool, attrib) - replace_orphan_labels(instr, symbol_pool) - if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 5ea596ae..b8266bf7 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -644,7 +644,7 @@ class ExprLoc(Expr): return Expr.get_object(cls, (loc_key, size)) def __str__(self): - return "label_%d" % self._loc_key.key + return "loc_%d" % self._loc_key.key def get_r(self, mem_read=False, cst_read=False): return set() diff --git a/miasm2/expression/expression_reduce.py b/miasm2/expression/expression_reduce.py index 45386ca2..22ac8d8d 100644 --- a/miasm2/expression/expression_reduce.py +++ b/miasm2/expression/expression_reduce.py @@ -4,8 +4,8 @@ Apply reduction rules to an Expression ast """ import logging -from miasm2.expression.expression import ExprInt, ExprId, ExprOp, ExprSlice,\ - ExprCompose, ExprMem, ExprCond +from miasm2.expression.expression import ExprInt, ExprId, ExprLoc, ExprOp, \ + ExprSlice, ExprCompose, ExprMem, ExprCond log_reduce = logging.getLogger("expr_reduce") console_handler = logging.StreamHandler() @@ -29,7 +29,7 @@ class ExprNode(object): expr = self.expr if self.info is not None: out = repr(self.info) - elif expr.is_int() or expr.is_id(): + elif expr.is_int() or expr.is_id() or expr.is_loc(): out = str(expr) elif expr.is_mem(): out = "@%d[%r]" % (self.expr.size, self.arg) @@ -76,7 +76,7 @@ class ExprReducer(object): @expr: Expression to analyze """ - if isinstance(expr, (ExprId, ExprInt)): + if isinstance(expr, (ExprId, ExprLoc, ExprInt)): node = ExprNode(expr) elif isinstance(expr, (ExprMem, ExprSlice)): son = self.expr2node(expr.arg) @@ -118,7 +118,7 @@ class ExprReducer(object): expr = node.expr log_reduce.debug("\t" * lvl + "Reduce...: %s", node.expr) - if isinstance(expr, (ExprId, ExprInt)): + if isinstance(expr, (ExprId, ExprInt, ExprLoc)): pass elif isinstance(expr, ExprMem): arg = self.categorize(node.arg, lvl=lvl + 1, **kwargs) diff --git a/miasm2/expression/simplifications_common.py b/miasm2/expression/simplifications_common.py index 13b25ce2..149c5b8d 100644 --- a/miasm2/expression/simplifications_common.py +++ b/miasm2/expression/simplifications_common.py @@ -250,6 +250,26 @@ def simp_cst_propagation(e_s, expr): e_s(Y.msb()) == ExprInt(0, 1)): args = [args[0].args[0], X + Y] + # ((var >> int1) << int1) => var & mask + # ((var << int1) >> int1) => var & mask + if (op_name in ['<<', '>>'] and + args[0].is_op() and + args[0].op in ['<<', '>>'] and + op_name != args[0]): + var = args[0].args[0] + int1 = args[0].args[1] + int2 = args[1] + if int1 == int2 and int1.is_int() and int(int1) < expr.size: + if op_name == '>>': + mask = ExprInt((1 << (expr.size - int(int1))) - 1, expr.size) + else: + mask = ExprInt( + ((1 << int(int1)) - 1) ^ ((1 << expr.size) - 1), + expr.size + ) + ret = var & mask + return ret + # ((A & A.mask) if op_name == "&" and args[-1] == expr.mask: return ExprOp('&', *args[:-1]) diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 7ee55f97..c75bd9e8 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -1050,15 +1050,7 @@ class SymbolicExecutionEngine(object): print '_' * 80 dst = self.eval_expr(self.ir_arch.IRDst) - # Best effort to resolve destination as ExprLoc - if dst.is_loc(): - ret = dst - elif dst.is_int(): - label = self.ir_arch.symbol_pool.getby_offset_create(int(dst)) - ret = ExprLoc(label, dst.size) - else: - ret = dst - return ret + return dst def run_block_at(self, addr, step=False): """ diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 2f354d47..b7821e85 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -56,10 +56,9 @@ class TranslatorC(Translator): return str(loc_key) offset = self.symbol_pool.loc_key_to_offset(loc_key) - name = self.symbol_pool.loc_key_to_name(loc_key) - if offset is None: - return name + return str(loc_key) + return "0x%x" % offset def from_ExprAff(self, expr): diff --git a/miasm2/jitter/Jitgcc.c b/miasm2/jitter/Jitgcc.c index 79274f24..329b7db4 100644 --- a/miasm2/jitter/Jitgcc.c +++ b/miasm2/jitter/Jitgcc.c @@ -10,13 +10,13 @@ typedef struct { typedef int (*jitted_func)(block_id*, PyObject*); -PyObject* gcc_exec_bloc(PyObject* self, PyObject* args) +PyObject* gcc_exec_block(PyObject* self, PyObject* args) { jitted_func func; PyObject* jitcpu; PyObject* func_py; PyObject* lbl2ptr; - PyObject* breakpoints; + PyObject* stop_offsets; PyObject* retaddr = NULL; int status; block_id BlockDst; @@ -26,7 +26,7 @@ PyObject* gcc_exec_bloc(PyObject* self, PyObject* args) if (!PyArg_ParseTuple(args, "OOOO|K", - &retaddr, &jitcpu, &lbl2ptr, &breakpoints, + &retaddr, &jitcpu, &lbl2ptr, &stop_offsets, &max_exec_per_call)) return NULL; @@ -73,8 +73,8 @@ PyObject* gcc_exec_bloc(PyObject* self, PyObject* args) if (status) return retaddr; - // Check breakpoint - if (PyDict_Contains(breakpoints, retaddr)) + // Check stop offsets + if (PySet_Contains(stop_offsets, retaddr)) return retaddr; } } @@ -85,8 +85,8 @@ static PyObject *GccError; static PyMethodDef GccMethods[] = { - {"gcc_exec_bloc", gcc_exec_bloc, METH_VARARGS, - "gcc exec bloc"}, + {"gcc_exec_block", gcc_exec_block, METH_VARARGS, + "gcc exec block"}, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/miasm2/jitter/Jitllvm.c b/miasm2/jitter/Jitllvm.c index b46f88e3..6ecbd483 100644 --- a/miasm2/jitter/Jitllvm.c +++ b/miasm2/jitter/Jitllvm.c @@ -10,7 +10,7 @@ // Needed to get the JitCpu.cpu offset, arch independent #include "arch/JitCore_x86.h" -PyObject* llvm_exec_bloc(PyObject* self, PyObject* args) +PyObject* llvm_exec_block(PyObject* self, PyObject* args) { uint64_t (*func)(void*, void*, void*, uint8_t*); vm_cpu_t* cpu; @@ -20,14 +20,14 @@ PyObject* llvm_exec_bloc(PyObject* self, PyObject* args) uint8_t status; PyObject* func_py; PyObject* lbl2ptr; - PyObject* breakpoints; + PyObject* stop_offsets; PyObject* retaddr = NULL; uint64_t max_exec_per_call = 0; uint64_t cpt; int do_cpt; if (!PyArg_ParseTuple(args, "OOOO|K", - &retaddr, &jitcpu, &lbl2ptr, &breakpoints, + &retaddr, &jitcpu, &lbl2ptr, &stop_offsets, &max_exec_per_call)) return NULL; @@ -68,16 +68,16 @@ PyObject* llvm_exec_bloc(PyObject* self, PyObject* args) if (status) return retaddr; - // Check breakpoint - if (PyDict_Contains(breakpoints, retaddr)) + // Check stop offsets + if (PySet_Contains(stop_offsets, retaddr)) return retaddr; } } static PyMethodDef LLVMMethods[] = { - {"llvm_exec_bloc", llvm_exec_bloc, METH_VARARGS, - "llvm exec bloc"}, + {"llvm_exec_block", llvm_exec_block, METH_VARARGS, + "llvm exec block"}, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/miasm2/jitter/codegen.py b/miasm2/jitter/codegen.py index 92af3259..c9e98d93 100644 --- a/miasm2/jitter/codegen.py +++ b/miasm2/jitter/codegen.py @@ -113,12 +113,6 @@ class CGen(object): self.C_PC = self.id_to_c(self.PC) - def loc_key_to_jitlabel(self, lbl): - """Convert LocKey to a jitter label name""" - offset = self.ir_arch.symbol_pool.loc_key_to_offset(lbl) - assert offset is not None - return "jitblock_%X" % offset - def dst_to_c(self, src): """Translate Expr @src into C code""" if not isinstance(src, Expr): @@ -241,7 +235,7 @@ class CGen(object): for dst, src in sorted(assignblk.iteritems()): src = src.replace_expr(prefetchers) - if dst is self.ir_arch.IRDst: + if dst == self.ir_arch.IRDst: pass elif isinstance(dst, ExprId): new_dst = self.add_local_var(dst_var, dst_index, dst) @@ -263,7 +257,7 @@ class CGen(object): raise ValueError("Unknown dst") for dst, new_dst in dst_var.iteritems(): - if dst is self.ir_arch.IRDst: + if dst == self.ir_arch.IRDst: continue c_updt.append('%s = %s;' % (self.id_to_c(dst), self.id_to_c(new_dst))) c_var.append("uint%d_t %s;" % (new_dst.size, new_dst)) @@ -376,15 +370,14 @@ class CGen(object): offset = self.ir_arch.symbol_pool.loc_key_to_offset(dst) if offset is None: # Generate goto for local labels - name = self.ir_arch.symbol_pool.loc_key_to_name(dst) - return ['goto %s;' % name] + return ['goto %s;' % dst] if (offset > attrib.instr.offset and offset in instr_offsets): # Only generate goto for next instructions. # (consecutive instructions) out += self.gen_post_code(attrib) out += self.gen_post_instr_checks(attrib) - out.append('goto %s;' % self.loc_key_to_jitlabel(dst)) + out.append('goto %s;' % dst) else: out += self.gen_post_code(attrib) out.append('BlockDst->address = DST_value;') @@ -537,8 +530,8 @@ class CGen(object): post_label = self.get_block_post_label(block) post_offset = self.ir_arch.symbol_pool.loc_key_to_offset(post_label) instr_offsets.append(post_offset) - lbl_start = self.ir_arch.symbol_pool.getby_offset_create(instr_offsets[0]) - return (self.CODE_INIT % self.loc_key_to_jitlabel(lbl_start)).split("\n"), instr_offsets + lbl_start = block.loc_key + return (self.CODE_INIT % lbl_start).split("\n"), instr_offsets def gen_irblock(self, instr_attrib, attributes, instr_offsets, irblock): """ @@ -570,10 +563,10 @@ class CGen(object): Generate the C code for the final block instruction """ - lbl = self.get_block_post_label(block) - offset = self.ir_arch.symbol_pool.loc_key_to_offset(lbl) + loc_key = self.get_block_post_label(block) + offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) dst = self.dst_to_c(offset) - code = self.CODE_RETURN_NO_EXCEPTION % (self.loc_key_to_jitlabel(lbl), self.C_PC, dst, dst) + code = self.CODE_RETURN_NO_EXCEPTION % (loc_key, self.C_PC, dst, dst) return code.split('\n') def gen_c(self, block, log_mn=False, log_regs=False): @@ -592,15 +585,9 @@ class CGen(object): instr_attrib, irblocks_attributes = self.get_attributes(instr, irblocks, log_mn, log_regs) for index, irblock in enumerate(irblocks): new_irblock = self.ir_arch.irbloc_fix_regs_for_mode(irblock, self.ir_arch.attrib) - label = new_irblock.loc_key - offset = self.ir_arch.symbol_pool.loc_key_to_offset(label) - if offset is None: - name = self.ir_arch.symbol_pool.loc_key_to_name(label) - out.append("%-40s // %.16X %s" % - (str(name) + ":", instr.offset, instr)) - else: - out.append("%-40s // %.16X %s" % - (self.loc_key_to_jitlabel(label) + ":", instr.offset, instr)) + label = str(new_irblock.loc_key) + out.append("%-40s // %.16X %s" % + (label + ":", instr.offset, instr)) if index == 0: out += self.gen_pre_code(instr_attrib) out += self.gen_irblock(instr_attrib, irblocks_attributes[index], instr_offsets, new_irblock) diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py index bf56b9de..b636782d 100644 --- a/miasm2/jitter/jitcore.py +++ b/miasm2/jitter/jitcore.py @@ -16,6 +16,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # from hashlib import md5 +import warnings from miasm2.core.asmblock import disasmEngine, AsmBlockBad from miasm2.core.interval import interval @@ -28,57 +29,56 @@ class JitCore(object): "JiT management. This is an abstract class" + # Jitted function's name + FUNCNAME = "block_entry" + jitted_block_delete_cb = None jitted_block_max_size = 10000 - def __init__(self, ir_arch, bs=None): + def __init__(self, ir_arch, bin_stream): """Initialise a JitCore instance. @ir_arch: ir instance for current architecture - @bs: bitstream + @bin_stream: bin_stream instance """ - + # Arch related self.ir_arch = ir_arch self.arch_name = "%s%s" % (self.ir_arch.arch.name, self.ir_arch.attrib) - self.bs = bs - self.known_blocs = {} - self.loc_key_to_jit_block = BoundedDict(self.jitted_block_max_size, + + # Structures for block tracking + self.offset_to_jitted_func = BoundedDict(self.jitted_block_max_size, delete_cb=self.jitted_block_delete_cb) - self.lbl2bloc = {} + self.loc_key_to_block = {} + self.blocks_mem_interval = interval() + + # Logging & options self.log_mn = False self.log_regs = False self.log_newbloc = False - self.segm_to_do = set() - self.jitcount = 0 - self.addr2obj = {} - self.addr2objref = {} - self.blocs_mem_interval = interval() - self.disasm_cb = None - self.split_dis = set() self.options = {"jit_maxline": 50, # Maximum number of line jitted "max_exec_per_call": 0 # 0 means no limit } + # Disassembly Engine + self.split_dis = set() self.mdis = disasmEngine( - ir_arch.arch, ir_arch.attrib, bs, + ir_arch.arch, ir_arch.attrib, bin_stream, lines_wd=self.options["jit_maxline"], symbol_pool=ir_arch.symbol_pool, follow_call=False, dontdis_retcall=False, split_dis=self.split_dis, - dis_block_callback=self.disasm_cb ) def set_options(self, **kwargs): "Set options relative to the backend" - self.options.update(kwargs) def clear_jitted_blocks(self): "Reset all jitted blocks" - self.loc_key_to_jit_block.clear() - self.lbl2bloc.clear() - self.blocs_mem_interval = interval() + self.offset_to_jitted_func.clear() + self.loc_key_to_block.clear() + self.blocks_mem_interval = interval() def add_disassembly_splits(self, *args): """The disassembly engine will stop on address in args if they @@ -93,7 +93,7 @@ class JitCore(object): "Initialise the Jitter" raise NotImplementedError("Abstract class") - def get_bloc_min_max(self, cur_block): + def set_block_min_max(self, cur_block): "Update cur_block to set min/max address" if cur_block.lines: @@ -106,32 +106,31 @@ class JitCore(object): cur_block.ad_max = offset+1 - def add_bloc_to_mem_interval(self, vm, block): + def add_block_to_mem_interval(self, vm, block): "Update vm to include block addresses in its memory range" - - self.blocs_mem_interval += interval([(block.ad_min, block.ad_max - 1)]) + self.blocks_mem_interval += interval([(block.ad_min, block.ad_max - 1)]) vm.reset_code_bloc_pool() - for a, b in self.blocs_mem_interval: + for a, b in self.blocks_mem_interval: vm.add_code_bloc(a, b + 1) - def jitirblocs(self, label, irblocks): + def jit_irblocks(self, label, irblocks): """JiT a group of irblocks. @label: the label of the irblocks - @irblocks: a gorup of irblocks + @irblocks: a group of irblocks """ raise NotImplementedError("Abstract class") - def add_bloc(self, block): + def add_block(self, block): """Add a block to JiT and JiT it. @block: asm_bloc to add """ irblocks = self.ir_arch.add_block(block, gen_pc_updt = True) block.blocks = irblocks - self.jitirblocs(block.loc_key, irblocks) + self.jit_irblocks(block.loc_key, irblocks) - def disbloc(self, addr, vm): + def disasm_and_jit_block(self, addr, vm): """Disassemble a new block and JiT it @addr: address of the block to disassemble (LocKey or int) @vm: VmMngr instance @@ -145,7 +144,6 @@ class JitCore(object): # Prepare disassembler self.mdis.lines_wd = self.options["jit_maxline"] - self.mdis.dis_block_callback = self.disasm_cb # Disassemble it cur_block = self.mdis.dis_block(addr) @@ -156,30 +154,36 @@ class JitCore(object): print cur_block.to_string(self.mdis.symbol_pool) # Update label -> block - self.lbl2bloc[cur_block.loc_key] = cur_block + self.loc_key_to_block[cur_block.loc_key] = cur_block # Store min/max block address needed in jit automod code - self.get_bloc_min_max(cur_block) + self.set_block_min_max(cur_block) # JiT it - self.add_bloc(cur_block) + self.add_block(cur_block) # Update jitcode mem range - self.add_bloc_to_mem_interval(vm, cur_block) + self.add_block_to_mem_interval(vm, cur_block) return cur_block - def runbloc(self, cpu, lbl, breakpoints): - """Run the block starting at lbl. + def run_at(self, cpu, offset, stop_offsets): + """Run from the starting address @offset. + Execution will stop if: + - max_exec_per_call option is reached + - a new, yet unknown, block is reached after the execution of block at + address @offset + - an address in @stop_offsets is reached @cpu: JitCpu instance - @lbl: target label + @offset: starting address (int) + @stop_offsets: set of address on which the jitter must stop """ - if lbl is None: - lbl = getattr(cpu, self.ir_arch.pc.name) + if offset is None: + offset = getattr(cpu, self.ir_arch.pc.name) - if not lbl in self.loc_key_to_jit_block: + if offset not in self.offset_to_jitted_func: # Need to JiT the block - cur_block = self.disbloc(lbl, cpu.vmmngr) + cur_block = self.disasm_and_jit_block(offset, cpu.vmmngr) if isinstance(cur_block, AsmBlockBad): errno = cur_block.errno if errno == AsmBlockBad.ERROR_IO: @@ -188,15 +192,16 @@ class JitCore(object): cpu.set_exception(EXCEPT_UNK_MNEMO) else: raise RuntimeError("Unhandled disasm result %r" % errno) - return lbl + return offset # Run the block and update cpu/vmmngr state - return self.exec_wrapper(lbl, cpu, self.loc_key_to_jit_block.data, breakpoints, + return self.exec_wrapper(offset, cpu, self.offset_to_jitted_func.data, + stop_offsets, self.options["max_exec_per_call"]) - def blocs2memrange(self, blocks): + def blocks_to_memrange(self, blocks): """Return an interval instance standing for blocks addresses - @blocks: list of asm_bloc instances + @blocks: list of AsmBlock instances """ mem_range = interval() @@ -215,10 +220,10 @@ class JitCore(object): vm.reset_code_bloc_pool() # Add blocks in the pool - for start, stop in self.blocs_mem_interval: + for start, stop in self.blocks_mem_interval: vm.add_code_bloc(start, stop + 1) - def del_bloc_in_range(self, ad1, ad2): + def del_block_in_range(self, ad1, ad2): """Find and remove jitted block in range [ad1, ad2]. Return the list of block removed. @ad1: First address @@ -227,7 +232,7 @@ class JitCore(object): # Find concerned blocks modified_blocks = set() - for block in self.lbl2bloc.values(): + for block in self.loc_key_to_block.values(): if not block.lines: continue if block.ad_max <= ad1 or block.ad_min >= ad2: @@ -238,10 +243,10 @@ class JitCore(object): modified_blocks.add(block) # Generate interval to delete - del_interval = self.blocs2memrange(modified_blocks) + del_interval = self.blocks_to_memrange(modified_blocks) # Remove interval from monitored interval list - self.blocs_mem_interval -= del_interval + self.blocks_mem_interval -= del_interval # Remove modified blocks for block in modified_blocks: @@ -249,17 +254,17 @@ class JitCore(object): for irblock in block.blocks: # Remove offset -> jitted block link offset = self.ir_arch.symbol_pool.loc_key_to_offset(irblock.loc_key) - if offset in self.loc_key_to_jit_block: - del(self.loc_key_to_jit_block[offset]) + if offset in self.offset_to_jitted_func: + del(self.offset_to_jitted_func[offset]) except AttributeError: # The block has never been translated in IR offset = self.ir_arch.symbol_pool.loc_key_to_offset(block.loc_key) - if offset in self.loc_key_to_jit_block: - del(self.loc_key_to_jit_block[offset]) + if offset in self.offset_to_jitted_func: + del(self.offset_to_jitted_func[offset]) # Remove label -> block link - del(self.lbl2bloc[block.loc_key]) + del(self.loc_key_to_block[block.loc_key]) return modified_blocks @@ -269,7 +274,7 @@ class JitCore(object): @mem_range: list of start/stop addresses """ for addr_start, addr_stop in mem_range: - self.del_bloc_in_range(addr_start, addr_stop) + self.del_block_in_range(addr_start, addr_stop) self.__updt_jitcode_mem_range(vm) vm.reset_memory_access() @@ -295,3 +300,13 @@ class JitCore(object): self.log_regs, block_raw)).hexdigest() return block_hash + + @property + def disasm_cb(self): + warnings.warn("Deprecated API: use .mdis.dis_block_callback") + return self.mdis.dis_block_callback + + @disasm_cb.setter + def disasm_cb(self, value): + warnings.warn("Deprecated API: use .mdis.dis_block_callback") + self.mdis.dis_block_callback = value diff --git a/miasm2/jitter/jitcore_cc_base.py b/miasm2/jitter/jitcore_cc_base.py index f0a75cf4..bbf10a53 100644 --- a/miasm2/jitter/jitcore_cc_base.py +++ b/miasm2/jitter/jitcore_cc_base.py @@ -46,9 +46,9 @@ class resolver: class JitCore_Cc_Base(JitCore): "JiT management, abstract class using a C compiler as backend" - def __init__(self, ir_arch, bs=None): + def __init__(self, ir_arch, bin_stream): self.jitted_block_delete_cb = self.deleteCB - super(JitCore_Cc_Base, self).__init__(ir_arch, bs) + super(JitCore_Cc_Base, self).__init__(ir_arch, bin_stream) self.resolver = resolver() self.ir_arch = ir_arch self.states = {} @@ -85,21 +85,12 @@ class JitCore_Cc_Base(JitCore): """ self.codegen = codegen - def loc_key_to_filename(self, loc_key): - """ - Generate function name from @loc_key - @loc_key: LocKey instance - """ - return "block_%s" % self.codegen.loc_key_to_jitlabel(loc_key) - - def gen_c_code(self, loc_key, block): + def gen_c_code(self, block): """ Return the C code corresponding to the @irblocks - @loc_key: LocKey of the block to jit @irblocks: list of irblocks """ - f_name = self.loc_key_to_filename(loc_key) - f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % f_name + f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % self.FUNCNAME out = self.codegen.gen_c(block, log_mn=self.log_mn, log_regs=self.log_regs) out = [f_declaration + '{'] + out + ['}\n'] c_code = out diff --git a/miasm2/jitter/jitcore_gcc.py b/miasm2/jitter/jitcore_gcc.py index d9da3160..cd92bab1 100644 --- a/miasm2/jitter/jitcore_gcc.py +++ b/miasm2/jitter/jitcore_gcc.py @@ -13,9 +13,9 @@ from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base, gen_core class JitCore_Gcc(JitCore_Cc_Base): "JiT management, using a C compiler as backend" - def __init__(self, ir_arch, bs=None): - super(JitCore_Gcc, self).__init__(ir_arch, bs) - self.exec_wrapper = Jitgcc.gcc_exec_bloc + def __init__(self, ir_arch, bin_stream): + super(JitCore_Gcc, self).__init__(ir_arch, bin_stream) + self.exec_wrapper = Jitgcc.gcc_exec_block def deleteCB(self, offset): """Free the state associated to @offset and delete it @@ -25,15 +25,14 @@ class JitCore_Gcc(JitCore_Cc_Base): del self.states[offset] def load_code(self, label, fname_so): - f_name = self.loc_key_to_filename(label) lib = ctypes.cdll.LoadLibrary(fname_so) - func = getattr(lib, f_name) + func = getattr(lib, self.FUNCNAME) addr = ctypes.cast(func, ctypes.c_void_p).value offset = self.ir_arch.symbol_pool.loc_key_to_offset(label) - self.loc_key_to_jit_block[offset] = addr + self.offset_to_jitted_func[offset] = addr self.states[offset] = lib - def add_bloc(self, block): + def add_block(self, block): """Add a bloc to JiT and JiT it. @block: block to jit """ @@ -41,7 +40,7 @@ class JitCore_Gcc(JitCore_Cc_Base): fname_out = os.path.join(self.tempdir, "%s.so" % block_hash) if not os.access(fname_out, os.R_OK | os.X_OK): - func_code = self.gen_c_code(block.loc_key, block) + func_code = self.gen_c_code(block) # Create unique C file fdesc, fname_in = tempfile.mkstemp(suffix=".c") diff --git a/miasm2/jitter/jitcore_llvm.py b/miasm2/jitter/jitcore_llvm.py index 5152cf9e..bc921569 100644 --- a/miasm2/jitter/jitcore_llvm.py +++ b/miasm2/jitter/jitcore_llvm.py @@ -8,7 +8,6 @@ import Jitllvm class JitCore_LLVM(jitcore.JitCore): - "JiT management, using LLVM as backend" # Architecture dependant libraries @@ -20,8 +19,8 @@ class JitCore_LLVM(jitcore.JitCore): "ppc32": "JitCore_ppc32.so", } - def __init__(self, ir_arch, bs=None): - super(JitCore_LLVM, self).__init__(ir_arch, bs) + def __init__(self, ir_arch, bin_stream): + super(JitCore_LLVM, self).__init__(ir_arch, bin_stream) self.options.update({"safe_mode": True, # Verify each function "optimise": True, # Optimise functions @@ -29,7 +28,7 @@ class JitCore_LLVM(jitcore.JitCore): "log_assembly": False, # Print assembly executed }) - self.exec_wrapper = Jitllvm.llvm_exec_bloc + self.exec_wrapper = Jitllvm.llvm_exec_block self.ir_arch = ir_arch # Cache temporary dir @@ -74,7 +73,7 @@ class JitCore_LLVM(jitcore.JitCore): # Enable caching self.context.enable_cache() - def add_bloc(self, block): + def add_block(self, block): """Add a block to JiT and JiT it. @block: the block to add """ @@ -84,7 +83,7 @@ class JitCore_LLVM(jitcore.JitCore): if not os.access(fname_out, os.R_OK): # Build a function in the context - func = LLVMFunction(self.context, block.loc_key) + func = LLVMFunction(self.context, self.FUNCNAME) # Set log level func.log_regs = self.log_regs @@ -115,9 +114,9 @@ class JitCore_LLVM(jitcore.JitCore): else: # The cache file exists: function can be loaded from cache - ptr = self.context.get_ptr_from_cache(fname_out, block.loc_key) + ptr = self.context.get_ptr_from_cache(fname_out, self.FUNCNAME) # Store a pointer on the function jitted code loc_key = block.loc_key offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) - self.loc_key_to_jit_block[offset] = ptr + self.offset_to_jitted_func[offset] = ptr diff --git a/miasm2/jitter/jitcore_python.py b/miasm2/jitter/jitcore_python.py index 785e3fa1..45b418b5 100644 --- a/miasm2/jitter/jitcore_python.py +++ b/miasm2/jitter/jitcore_python.py @@ -15,8 +15,8 @@ class JitCore_Python(jitcore.JitCore): SymbExecClass = EmulatedSymbExec - def __init__(self, ir_arch, bs=None): - super(JitCore_Python, self).__init__(ir_arch, bs) + def __init__(self, ir_arch, bin_stream): + super(JitCore_Python, self).__init__(ir_arch, bin_stream) self.ir_arch = ir_arch # CPU & VM (None for now) will be set later @@ -34,10 +34,10 @@ class JitCore_Python(jitcore.JitCore): "Preload symbols according to current architecture" self.symbexec.reset_regs() - def jitirblocs(self, loc_key, irblocks): + def jit_irblocks(self, loc_key, irblocks): """Create a python function corresponding to an irblocks' group. @loc_key: the loc_key of the irblocks - @irblocks: a gorup of irblocks + @irblocks: a group of irblocks """ def myfunc(cpu): @@ -129,9 +129,9 @@ class JitCore_Python(jitcore.JitCore): # Associate myfunc with current loc_key offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) assert offset is not None - self.loc_key_to_jit_block[offset] = myfunc + self.offset_to_jitted_func[offset] = myfunc - def exec_wrapper(self, loc_key, cpu, _loc_key_to_jit_block, _breakpoints, + def exec_wrapper(self, loc_key, cpu, _offset_to_jitted_func, _stop_offsets, _max_exec_per_call): """Call the function @loc_key with @cpu @loc_key: function's loc_key @@ -139,7 +139,7 @@ class JitCore_Python(jitcore.JitCore): """ # Get Python function corresponding to @loc_key - fc_ptr = self.loc_key_to_jit_block[loc_key] + fc_ptr = self.offset_to_jitted_func[loc_key] # Execute the function return fc_ptr(cpu) diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index 28200997..5f8b4ad6 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -1,5 +1,6 @@ import logging +import warnings from functools import wraps from collections import Sequence, namedtuple, Iterator @@ -160,7 +161,7 @@ class ExceptionHandle(): return not self.__eq__(to_cmp) -class jitter(object): +class Jitter(object): "Main class for JIT handling" @@ -302,11 +303,14 @@ class jitter(object): """ self.exceptions_handler.add_callback(flag, callback) - def runbloc(self, pc): + def run_at(self, pc): """Wrapper on JiT backend. Run the code at PC and return the next PC. @pc: address of code to run""" - return self.jit.runbloc(self.cpu, pc, self.breakpoints_handler.callbacks) + return self.jit.run_at( + self.cpu, pc, + set(self.breakpoints_handler.callbacks.keys()) + ) def runiter_once(self, pc): """Iterator on callbacks results on code running from PC. @@ -348,7 +352,7 @@ class jitter(object): assert(self.get_exception() == 0) # Run the bloc at PC - self.pc = self.runbloc(self.pc) + self.pc = self.run_at(self.pc) # Check exceptions (raised by the execution of the block) exception_flag = self.get_exception() @@ -484,3 +488,33 @@ class jitter(object): self.symbexec.update_cpu_from_engine() return ret + + def set_trace_log(self, + trace_instr=True, trace_regs=True, + trace_new_blocks=False): + """ + Activate/Deactivate trace log options + + @trace_instr: activate instructions tracing log + @trace_regs: activate registers tracing log + @trace_new_blocks: dump new code blocks log + """ + + # As trace state changes, clear already jitted blocks + self.jit.clear_jitted_blocks() + + self.jit.log_mn = trace_instr + self.jit.log_regs = trace_regs + self.jit.log_newbloc = trace_new_blocks + + +class jitter(Jitter): + """ + DEPRECATED object + Use Jitter instead of jitter + """ + + + def __init__(self, *args, **kwargs): + warnings.warn("Deprecated API: use Jitter") + super(jitter, self).__init__(*args, **kwargs) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index 2045f083..16b08cf1 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -75,16 +75,10 @@ class LLVMContext(): @label: str or asmlabel instance""" if isinstance(label, str): return label - if not isinstance(label, LocKey): - raise ValueError("label must either be str or LocKey") - - offset = self.ir_arch.symbol_pool.loc_key_to_offset(label) - - if offset is None: - name = self.ir_arch.symbol_pool.loc_key_to_name(label) - return "%s" % name + elif isinstance(label, LocKey): + return str(label) else: - return "label_off_%X" % offset + raise ValueError("label must either be str or LocKey") def optimise_level(self, level=2): """Set the optimisation level to @level from 0 to 2 @@ -341,7 +335,6 @@ class LLVMContext_JIT(LLVMContext): def get_ptr_from_cache(self, file_name, func_name): "Load @file_name and return a pointer on the jitter @func_name" # We use an empty module to avoid loosing time on function building - func_name = self.canonize_label_name(func_name) empty_module = llvm.parse_assembly("") empty_module.fname_out = file_name @@ -398,7 +391,6 @@ class LLVMFunction(): def __init__(self, llvm_context, name="fc", new_module=True): "Create a new function with name @name" self.llvm_context = llvm_context - name = self.llvm_context.canonize_label_name(name) if new_module: self.llvm_context.new_module() self.mod = self.llvm_context.get_module() diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py index deaebd09..01dea647 100644 --- a/miasm2/jitter/loader/elf.py +++ b/miasm2/jitter/loader/elf.py @@ -32,7 +32,6 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True): # XXX quick hack fa = get_import_address_elf(e) dyn_funcs = {} - # log.debug('imported funcs: %s' % fa) for (libname, libfunc), ads in fa.items(): for ad in ads: ad_base_lib = runtime_lib.lib_get_add_base(libname) @@ -77,7 +76,6 @@ def vm_load_elf(vm, fdata, name="", **kargs): # -2: Trick to avoid merging 2 consecutive pages i += [(a_addr, b_addr - 2)] for a, b in i.intervals: - # print hex(a), hex(b) vm.add_memory_page(a, PAGE_READ | PAGE_WRITE, "\x00" * (b + 2 - a), repr(name)) diff --git a/test/analysis/data_flow.py b/test/analysis/data_flow.py index c3469109..d5b197d2 100644 --- a/test/analysis/data_flow.py +++ b/test/analysis/data_flow.py @@ -673,7 +673,6 @@ for test_nb, test in enumerate([(G1_IRA, G1_EXP_IRA), reaching_defs = ReachingDefinitions(g_ira) defuse = DiGraphDefUse(reaching_defs, deref_mem=True) - #open("defuse_%02d.dot" % (test_nb+1), "w").write(defuse.dot()) # # Simplify graph dead_simp(g_ira) diff --git a/test/analysis/depgraph.py b/test/analysis/depgraph.py index 4e023761..86857182 100644 --- a/test/analysis/depgraph.py +++ b/test/analysis/depgraph.py @@ -1058,7 +1058,7 @@ for test_nb, test in enumerate([(G1_IRA, G1_INPUT), all_results.add(unflatGraph(flatGraph(result.graph))) open("graph_test_%02d_%02d.dot" % (test_nb + 1, i), "w").write(dg2graph(result.graph)) - # print all_flat + if g_ind == 0: all_flat = sorted(all_flat) all_flats.append(all_flat) diff --git a/test/analysis/dse.py b/test/analysis/dse.py index 5a72db34..4367f6f7 100644 --- a/test/analysis/dse.py +++ b/test/analysis/dse.py @@ -34,8 +34,7 @@ class DSETest(object): self.myjit = jitter(jitter_engine) self.myjit.init_stack() - self.myjit.jit.log_regs = True - self.myjit.jit.log_mn = True + self.myjit.set_trace_log() self.dse = None self.assembly = None diff --git a/test/arch/aarch64/unit/asm_test.py b/test/arch/aarch64/unit/asm_test.py index ca27ef9d..437a8056 100644 --- a/test/arch/aarch64/unit/asm_test.py +++ b/test/arch/aarch64/unit/asm_test.py @@ -16,16 +16,11 @@ class Asm_Test(object): self.myjit = Machine("aarch64l").jitter(jitter) self.myjit.init_stack() - self.myjit.jit.log_regs = False - self.myjit.jit.log_mn = False - - def __call__(self): self.asm() self.run() self.check() - def asm(self): blocks, symbol_pool = parse_asm.parse_txt(mn_aarch64, 'l', self.TXT, symbol_pool = self.myjit.ir_arch.symbol_pool) diff --git a/test/arch/arm/arch.py b/test/arch/arm/arch.py index 90d137d0..f69bb104 100644 --- a/test/arch/arm/arch.py +++ b/test/arch/arm/arch.py @@ -5,41 +5,6 @@ from pdb import pm symbol_pool = AsmSymbolPool() -if 0: - a = bs('00') - b = bs('01') - c = bs(l=2) - d = bs(l=4, fname='rd') - e = bs_name(l=1, name={'ADD': 0, 'SUB': 1}) - assert(isinstance(e, bs_divert)) - scc = bs_mod_name(l=1, mn_mod=['', 'S']) - f = bs(l=1, cls=(arm_reg,)) - - class arm_mov(mn_arm): - fields = [bs('0000'), bs('0000'), bs('0000')] - - class arm_DATA(mn_arm): - fields = [bs('1111'), e, scc, f, bs('0')] - mn = mn_arm.dis(0xF000000) - - -if 0: - import cProfile - cProfile.run('mn_arm.dis("\xe1\xa0\xa0\x06", "l")') - # l = mn_arm.dis(bin_stream("\xe1\xa0\xa0\x06"), mode_arm) - # print l - """ - mode = 64 - l = mn_x86.fromstring("ADC DWORD PTR [RAX], 0x11223344", mode) - print 'xx' - #t= time.time() - import cProfile - def f(): - x = l.asm(mode) - print x - cProfile.run('f()') - """ - def h2i(s): return s.replace(' ', '').decode('hex') @@ -268,15 +233,11 @@ for s, l in reg_tests_arm: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] l = mn_arm.fromstring(s, symbol_pool, 'l') - # print l assert(str(l) == s) a = mn_arm.asm(l) print [x for x in a] print repr(b) - # print mn.args assert(b in a) reg_tests_armt = [ @@ -720,36 +681,14 @@ for s, l in reg_tests_armt: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] l = mn_armt.fromstring(s, symbol_pool, 'l') - # print l assert(str(l) == s) print 'Asm..', l a = mn_armt.asm(l) print [x for x in a] print repr(b) - # print mn.args assert(b in a) -""" -print "*"*30, "START SPECIAL PARSING", "*"*30 -parse_tests = [ - "MOV LR, toto", - "MOV LR, 1+toto", - "MOV LR, (lend-lstart)^toto<<<R1", - "MOV LR, R1 LSL (l_end-l_start)^toto<<<R1", - "MOV LR, R1 LSL (l_end-l_start)^toto<<<R1", - "EOR R0, R1, toto^titi+1", - ] - -for l in parse_tests: - print "-"*80 - l = mn_arm.fromstring(l, 'l') - print l.name, ", ".join([str(a) for a in l.args]) -""" - - print 'TEST time', time.time() - ts # speed test arm @@ -787,7 +726,6 @@ instr_num = 0 ts = time.time() while off < bs.getlen(): mn = mn_armt.dis(bs, 'l', off) - # print instr_num, off, str(mn) instr_num += 1 off += mn.l print 'instr per sec:', instr_num / (time.time() - ts) diff --git a/test/arch/mips32/arch.py b/test/arch/mips32/arch.py index c6b68c0c..6fc36d13 100644 --- a/test/arch/mips32/arch.py +++ b/test/arch/mips32/arch.py @@ -228,13 +228,9 @@ for s, l in reg_tests_mips32: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] l = mn_mips32.fromstring(s, symbol_pool, 'b') - # print l assert(str(l) == s) a = mn_mips32.asm(l, 'b') print [x for x in a] print repr(b) - # print mn.args assert(b in a) diff --git a/test/arch/mips32/unit/asm_test.py b/test/arch/mips32/unit/asm_test.py index f03a32d7..a2203783 100644 --- a/test/arch/mips32/unit/asm_test.py +++ b/test/arch/mips32/unit/asm_test.py @@ -18,9 +18,6 @@ class Asm_Test(object): self.myjit = Machine("mips32l").jitter(jitter) self.myjit.init_stack() - self.myjit.jit.log_regs = False - self.myjit.jit.log_mn = False - def __call__(self): self.asm() self.run() diff --git a/test/arch/msp430/arch.py b/test/arch/msp430/arch.py index 3df2becb..08e5bdae 100644 --- a/test/arch/msp430/arch.py +++ b/test/arch/msp430/arch.py @@ -95,13 +95,9 @@ for s, l in reg_tests_msp: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] l = mn_msp430.fromstring(s, symbol_pool, None) - # print l assert(str(l) == s) a = mn_msp430.asm(l) print [x for x in a] print repr(b) - # print mn.args assert(b in a) diff --git a/test/arch/sh4/arch.py b/test/arch/sh4/arch.py index 574dcf49..9162fdbc 100644 --- a/test/arch/sh4/arch.py +++ b/test/arch/sh4/arch.py @@ -398,15 +398,11 @@ for s, l in reg_tests_sh4: print s print mn assert(str(mn) == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] l = mn_sh4.fromstring(s, symbol_pool, None) - # print l assert(str(l) == s) a = mn_sh4.asm(l) print [x for x in a] print repr(b) - # print mn.args assert(b in a) diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 05b31815..1865ceba 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -3062,17 +3062,13 @@ for mode, s, l, in reg_tests: print s print mn assert(str(mn).strip() == s) - # print hex(b) - # print [str(x.get()) for x in mn.args] print 'fromstring', repr(s) l = mn_x86.fromstring(s, symbol_pool, mode) - # print l print 'str args', [(str(x), x.size) for x in l.args] assert(str(l).strip(' ') == s) a = mn_x86.asm(l) print 'asm result', [x for x in a] print repr(b) - # test_file[mode[0]].write(b) for x in a: print "BYTES", repr(x) @@ -3086,7 +3082,6 @@ for mode, s, l, in reg_tests: assert(str(rl).strip(' ') == s) print repr(b), a assert(b in a) - # print mn.args print 'TEST time', time.time() - ts @@ -3118,9 +3113,7 @@ def profile_dis(o): print 'instr per sec:', instr_num / (time.time() - ts) import cProfile -# cProfile.run(r'mn_x86.dis("\x81\x54\x18\xfe\x44\x33\x22\x11", m32)') cProfile.run('profile_dis(o)') -# profile_dis(o) # Test instruction representation with prefix instr_bytes = '\x65\xc7\x00\x09\x00\x00\x00' diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index 961967f9..4b802606 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -18,9 +18,6 @@ class Asm_Test(object): self.myjit = Machine(self.arch_name).jitter(jitter_engine) self.myjit.init_stack() - self.myjit.jit.log_regs = False - self.myjit.jit.log_mn = False - def test_init(self): pass @@ -81,10 +78,6 @@ class Asm_Test_16(Asm_Test): self.myjit.stack_size = 0x1000 self.myjit.init_stack() - self.myjit.jit.log_regs = False - self.myjit.jit.log_mn = False - - def init_machine(self): self.myjit.vm.add_memory_page(self.run_addr, PAGE_READ | PAGE_WRITE, self.assembly) self.myjit.push_uint16_t(self.ret_addr) diff --git a/test/core/asmblock.py b/test/core/asmblock.py index c4a97518..0e965bfd 100644 --- a/test/core/asmblock.py +++ b/test/core/asmblock.py @@ -247,7 +247,7 @@ assert len(entry_block.lines) == 4 assert map(str, entry_block.lines) == ['XOR EAX, EAX', 'XOR EBX, EBX', 'XOR ECX, ECX', - 'JNZ label_3'] + 'JNZ loc_3'] assert len(asmcfg.successors(entry_block.loc_key)) == 2 assert len(entry_block.bto) == 2 nextb = asmcfg.loc_key_to_block((cons.loc_key for cons in entry_block.bto @@ -258,11 +258,11 @@ assert len(nextb.lines) == 4 assert map(str, nextb.lines) == ['XOR EDX, EDX', 'XOR ESI, ESI', 'XOR EDI, EDI', - 'JMP label_4'] + 'JMP loc_4'] assert asmcfg.successors(nextb.loc_key) == [nextb.loc_key] assert len(tob.lines) == 2 assert map(str, tob.lines) == ['XOR EBP, EBP', - 'JMP label_3'] + 'JMP loc_3'] assert asmcfg.successors(tob.loc_key) == [tob.loc_key] # Check split_block @@ -289,7 +289,7 @@ lbl_newb = asmcfg.successors(entry_block.loc_key)[0] newb = asmcfg.loc_key_to_block(lbl_newb) assert len(newb.lines) == 2 assert map(str, newb.lines) == ['XOR ECX, ECX', - 'JNZ label_3'] + 'JNZ loc_3'] preds = asmcfg.predecessors(lbl_newb) assert len(preds) == 2 assert entry_block.loc_key in preds diff --git a/test/expression/simplifications.py b/test/expression/simplifications.py index a4e839cf..b2591a83 100644 --- a/test/expression/simplifications.py +++ b/test/expression/simplifications.py @@ -177,6 +177,10 @@ to_test = [(ExprInt(1, 32) - ExprInt(1, 32), ExprInt(0, 32)), (ExprInt(0x4142, 32)[:32], ExprInt(0x4142, 32)), (ExprInt(0x4142, 32)[:8], ExprInt(0x42, 8)), (ExprInt(0x4142, 32)[8:16], ExprInt(0x41, 8)), + (ExprOp('>>', ExprOp('<<', a, ExprInt(0x4, 32)), ExprInt(0x4, 32)), + ExprOp('&', a, ExprInt(0x0FFFFFFF, 32))), + (ExprOp('<<', ExprOp('>>', a, ExprInt(0x4, 32)), ExprInt(0x4, 32)), + ExprOp('&', a, ExprInt(0xFFFFFFF0, 32))), (a[:32], a), (a[:8][:8], a[:8]), (a[:16][:8], a[:8]), diff --git a/test/jitter/bad_block.py b/test/jitter/bad_block.py index 04c1f475..ae11e696 100644 --- a/test/jitter/bad_block.py +++ b/test/jitter/bad_block.py @@ -32,8 +32,7 @@ run_addr = 0x40000000 jitter.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, data) -jitter.jit.log_regs = True -jitter.jit.log_mn = True +jitter.set_trace_log() jitter.push_uint32_t(0x1337beef) jitter.add_breakpoint(0x1337beef, code_sentinelle) diff --git a/test/jitter/jit_options.py b/test/jitter/jit_options.py index 4fe936d5..a0ddbc11 100644 --- a/test/jitter/jit_options.py +++ b/test/jitter/jit_options.py @@ -33,8 +33,7 @@ def init_jitter(): # Init jitter myjit.init_stack() - myjit.jit.log_regs = True - myjit.jit.log_mn = True + myjit.set_trace_log() myjit.push_uint32_t(0x1337beef) myjit.add_breakpoint(0x1337beef, code_sentinelle) diff --git a/test/jitter/jmp_out_mem.py b/test/jitter/jmp_out_mem.py index 49da16ad..93ae8304 100644 --- a/test/jitter/jmp_out_mem.py +++ b/test/jitter/jmp_out_mem.py @@ -35,8 +35,7 @@ run_addr = 0x40000000 jitter.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, data) -jitter.jit.log_regs = True -jitter.jit.log_mn = True +jitter.set_trace_log() jitter.push_uint32_t(0x1337beef) jitter.add_breakpoint(0x1337beef, code_sentinelle) diff --git a/test/jitter/test_post_instr.py b/test/jitter/test_post_instr.py index edf86645..39e87616 100644 --- a/test/jitter/test_post_instr.py +++ b/test/jitter/test_post_instr.py @@ -23,8 +23,8 @@ jitter.vm.add_memory_page(0x1000, PAGE_READ|PAGE_WRITE, "\x00"*0x1000, "code pag # RET jitter.vm.set_mem(0x1000, "B844332211C3".decode('hex')) -jitter.jit.log_mn = True -jitter.jit.log_regs = True + +jitter.set_trace_log() def do_not_raise_me(jitter): raise ValueError("Should not be here") |